This source file includes following definitions.
- goya_get_fixed_properties
- goya_pci_bars_map
- goya_set_ddr_bar_base
- goya_init_iatu
- goya_early_init
- goya_early_fini
- goya_mmu_prepare_reg
- goya_qman0_set_security
- goya_fetch_psoc_frequency
- goya_late_init
- goya_late_fini
- goya_sw_init
- goya_sw_fini
- goya_init_dma_qman
- goya_init_dma_ch
- goya_init_dma_qmans
- goya_disable_external_queues
- goya_stop_queue
- goya_stop_external_queues
- goya_init_cpu_queues
- goya_set_pll_refclk
- goya_disable_clk_rlx
- _goya_tpc_mbist_workaround
- goya_tpc_mbist_workaround
- goya_init_golden_registers
- goya_init_mme_qman
- goya_init_mme_cmdq
- goya_init_mme_qmans
- goya_init_tpc_qman
- goya_init_tpc_cmdq
- goya_init_tpc_qmans
- goya_disable_internal_queues
- goya_stop_internal_queues
- goya_dma_stall
- goya_tpc_stall
- goya_mme_stall
- goya_enable_msix
- goya_sync_irqs
- goya_disable_msix
- goya_enable_timestamp
- goya_disable_timestamp
- goya_halt_engines
- goya_push_uboot_to_device
- goya_push_linux_to_device
- goya_pldm_init_cpu
- goya_read_device_fw_version
- goya_init_cpu
- goya_mmu_update_asid_hop0_addr
- goya_mmu_init
- goya_hw_init
- goya_hw_fini
- goya_suspend
- goya_resume
- goya_cb_mmap
- goya_ring_doorbell
- goya_pqe_write
- goya_dma_alloc_coherent
- goya_dma_free_coherent
- goya_get_int_queue_base
- goya_send_job_on_qman0
- goya_send_cpu_message
- goya_test_queue
- goya_test_cpu_queue
- goya_test_queues
- goya_dma_pool_zalloc
- goya_dma_pool_free
- goya_cpu_accessible_dma_pool_alloc
- goya_cpu_accessible_dma_pool_free
- goya_dma_map_sg
- goya_dma_unmap_sg
- goya_get_dma_desc_list_size
- goya_pin_memory_before_cs
- goya_validate_dma_pkt_host
- goya_validate_dma_pkt_no_host
- goya_validate_dma_pkt_no_mmu
- goya_validate_dma_pkt_mmu
- goya_validate_wreg32
- goya_validate_cb
- goya_patch_dma_packet
- goya_patch_cb
- goya_parse_cb_mmu
- goya_parse_cb_no_mmu
- goya_parse_cb_no_ext_queue
- goya_cs_parser
- goya_add_end_of_cb_packets
- goya_update_eq_ci
- goya_restore_phase_topology
- goya_clear_sm_regs
- goya_debugfs_read32
- goya_debugfs_write32
- goya_read_pte
- goya_write_pte
- _goya_get_event_desc
- goya_get_event_desc
- goya_print_razwi_info
- goya_print_mmu_error_info
- goya_print_irq_info
- goya_unmask_irq_arr
- goya_soft_reset_late_init
- goya_unmask_irq
- goya_handle_eqe
- goya_get_events_stat
- goya_memset_device_memory
- goya_context_switch
- goya_mmu_clear_pgt_range
- goya_mmu_set_dram_default_page
- goya_mmu_add_mappings_for_device_cpu
- goya_mmu_remove_device_cpu_mappings
- goya_mmu_prepare
- goya_mmu_invalidate_cache
- goya_mmu_invalidate_cache_range
- goya_send_heartbeat
- goya_armcp_info_get
- goya_is_device_idle
- goya_hw_queues_lock
- goya_hw_queues_unlock
- goya_get_pci_id
- goya_get_eeprom_data
- goya_get_hw_state
- goya_set_asic_funcs
1
2
3
4
5
6
7
8 #include "goyaP.h"
9 #include "include/hw_ip/mmu/mmu_general.h"
10 #include "include/hw_ip/mmu/mmu_v1_0.h"
11 #include "include/goya/asic_reg/goya_masks.h"
12 #include "include/goya/goya_reg_map.h"
13
14 #include <linux/pci.h>
15 #include <linux/genalloc.h>
16 #include <linux/hwmon.h>
17 #include <linux/io-64-nonatomic-lo-hi.h>
18 #include <linux/iommu.h>
19 #include <linux/seq_file.h>
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75 #define GOYA_MMU_REGS_NUM 63
76
77 #define GOYA_DMA_POOL_BLK_SIZE 0x100
78
79 #define GOYA_RESET_TIMEOUT_MSEC 500
80 #define GOYA_PLDM_RESET_TIMEOUT_MSEC 20000
81 #define GOYA_RESET_WAIT_MSEC 1
82 #define GOYA_CPU_RESET_WAIT_MSEC 100
83 #define GOYA_PLDM_RESET_WAIT_MSEC 1000
84 #define GOYA_TEST_QUEUE_WAIT_USEC 100000
85 #define GOYA_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
86 #define GOYA_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
87
88 #define GOYA_QMAN0_FENCE_VAL 0xD169B243
89
90 #define GOYA_MAX_STRING_LEN 20
91
92 #define GOYA_CB_POOL_CB_CNT 512
93 #define GOYA_CB_POOL_CB_SIZE 0x20000
94
95 #define IS_QM_IDLE(engine, qm_glbl_sts0) \
96 (((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
97 #define IS_DMA_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(DMA, qm_glbl_sts0)
98 #define IS_TPC_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(TPC, qm_glbl_sts0)
99 #define IS_MME_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(MME, qm_glbl_sts0)
100
101 #define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
102 (((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
103 engine##_CMDQ_IDLE_MASK)
104 #define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
105 IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
106 #define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
107 IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
108
109 #define IS_DMA_IDLE(dma_core_sts0) \
110 !((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
111
112 #define IS_TPC_IDLE(tpc_cfg_sts) \
113 (((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
114
115 #define IS_MME_IDLE(mme_arch_sts) \
116 (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
117
118
119 static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
120 "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
121 "goya cq 4", "goya cpu eq"
122 };
123
124 static u16 goya_packet_sizes[MAX_PACKET_ID] = {
125 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
126 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
127 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
128 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
129 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
130 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
131 [PACKET_FENCE] = sizeof(struct packet_fence),
132 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
133 [PACKET_NOP] = sizeof(struct packet_nop),
134 [PACKET_STOP] = sizeof(struct packet_stop)
135 };
136
137 static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = {
138 mmDMA_QM_0_GLBL_NON_SECURE_PROPS,
139 mmDMA_QM_1_GLBL_NON_SECURE_PROPS,
140 mmDMA_QM_2_GLBL_NON_SECURE_PROPS,
141 mmDMA_QM_3_GLBL_NON_SECURE_PROPS,
142 mmDMA_QM_4_GLBL_NON_SECURE_PROPS,
143 mmTPC0_QM_GLBL_SECURE_PROPS,
144 mmTPC0_QM_GLBL_NON_SECURE_PROPS,
145 mmTPC0_CMDQ_GLBL_SECURE_PROPS,
146 mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS,
147 mmTPC0_CFG_ARUSER,
148 mmTPC0_CFG_AWUSER,
149 mmTPC1_QM_GLBL_SECURE_PROPS,
150 mmTPC1_QM_GLBL_NON_SECURE_PROPS,
151 mmTPC1_CMDQ_GLBL_SECURE_PROPS,
152 mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS,
153 mmTPC1_CFG_ARUSER,
154 mmTPC1_CFG_AWUSER,
155 mmTPC2_QM_GLBL_SECURE_PROPS,
156 mmTPC2_QM_GLBL_NON_SECURE_PROPS,
157 mmTPC2_CMDQ_GLBL_SECURE_PROPS,
158 mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS,
159 mmTPC2_CFG_ARUSER,
160 mmTPC2_CFG_AWUSER,
161 mmTPC3_QM_GLBL_SECURE_PROPS,
162 mmTPC3_QM_GLBL_NON_SECURE_PROPS,
163 mmTPC3_CMDQ_GLBL_SECURE_PROPS,
164 mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS,
165 mmTPC3_CFG_ARUSER,
166 mmTPC3_CFG_AWUSER,
167 mmTPC4_QM_GLBL_SECURE_PROPS,
168 mmTPC4_QM_GLBL_NON_SECURE_PROPS,
169 mmTPC4_CMDQ_GLBL_SECURE_PROPS,
170 mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS,
171 mmTPC4_CFG_ARUSER,
172 mmTPC4_CFG_AWUSER,
173 mmTPC5_QM_GLBL_SECURE_PROPS,
174 mmTPC5_QM_GLBL_NON_SECURE_PROPS,
175 mmTPC5_CMDQ_GLBL_SECURE_PROPS,
176 mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS,
177 mmTPC5_CFG_ARUSER,
178 mmTPC5_CFG_AWUSER,
179 mmTPC6_QM_GLBL_SECURE_PROPS,
180 mmTPC6_QM_GLBL_NON_SECURE_PROPS,
181 mmTPC6_CMDQ_GLBL_SECURE_PROPS,
182 mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS,
183 mmTPC6_CFG_ARUSER,
184 mmTPC6_CFG_AWUSER,
185 mmTPC7_QM_GLBL_SECURE_PROPS,
186 mmTPC7_QM_GLBL_NON_SECURE_PROPS,
187 mmTPC7_CMDQ_GLBL_SECURE_PROPS,
188 mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS,
189 mmTPC7_CFG_ARUSER,
190 mmTPC7_CFG_AWUSER,
191 mmMME_QM_GLBL_SECURE_PROPS,
192 mmMME_QM_GLBL_NON_SECURE_PROPS,
193 mmMME_CMDQ_GLBL_SECURE_PROPS,
194 mmMME_CMDQ_GLBL_NON_SECURE_PROPS,
195 mmMME_SBA_CONTROL_DATA,
196 mmMME_SBB_CONTROL_DATA,
197 mmMME_SBC_CONTROL_DATA,
198 mmMME_WBC_CONTROL_DATA,
199 mmPCIE_WRAP_PSOC_ARUSER,
200 mmPCIE_WRAP_PSOC_AWUSER
201 };
202
203 static u32 goya_all_events[] = {
204 GOYA_ASYNC_EVENT_ID_PCIE_IF,
205 GOYA_ASYNC_EVENT_ID_TPC0_ECC,
206 GOYA_ASYNC_EVENT_ID_TPC1_ECC,
207 GOYA_ASYNC_EVENT_ID_TPC2_ECC,
208 GOYA_ASYNC_EVENT_ID_TPC3_ECC,
209 GOYA_ASYNC_EVENT_ID_TPC4_ECC,
210 GOYA_ASYNC_EVENT_ID_TPC5_ECC,
211 GOYA_ASYNC_EVENT_ID_TPC6_ECC,
212 GOYA_ASYNC_EVENT_ID_TPC7_ECC,
213 GOYA_ASYNC_EVENT_ID_MME_ECC,
214 GOYA_ASYNC_EVENT_ID_MME_ECC_EXT,
215 GOYA_ASYNC_EVENT_ID_MMU_ECC,
216 GOYA_ASYNC_EVENT_ID_DMA_MACRO,
217 GOYA_ASYNC_EVENT_ID_DMA_ECC,
218 GOYA_ASYNC_EVENT_ID_CPU_IF_ECC,
219 GOYA_ASYNC_EVENT_ID_PSOC_MEM,
220 GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT,
221 GOYA_ASYNC_EVENT_ID_SRAM0,
222 GOYA_ASYNC_EVENT_ID_SRAM1,
223 GOYA_ASYNC_EVENT_ID_SRAM2,
224 GOYA_ASYNC_EVENT_ID_SRAM3,
225 GOYA_ASYNC_EVENT_ID_SRAM4,
226 GOYA_ASYNC_EVENT_ID_SRAM5,
227 GOYA_ASYNC_EVENT_ID_SRAM6,
228 GOYA_ASYNC_EVENT_ID_SRAM7,
229 GOYA_ASYNC_EVENT_ID_SRAM8,
230 GOYA_ASYNC_EVENT_ID_SRAM9,
231 GOYA_ASYNC_EVENT_ID_SRAM10,
232 GOYA_ASYNC_EVENT_ID_SRAM11,
233 GOYA_ASYNC_EVENT_ID_SRAM12,
234 GOYA_ASYNC_EVENT_ID_SRAM13,
235 GOYA_ASYNC_EVENT_ID_SRAM14,
236 GOYA_ASYNC_EVENT_ID_SRAM15,
237 GOYA_ASYNC_EVENT_ID_SRAM16,
238 GOYA_ASYNC_EVENT_ID_SRAM17,
239 GOYA_ASYNC_EVENT_ID_SRAM18,
240 GOYA_ASYNC_EVENT_ID_SRAM19,
241 GOYA_ASYNC_EVENT_ID_SRAM20,
242 GOYA_ASYNC_EVENT_ID_SRAM21,
243 GOYA_ASYNC_EVENT_ID_SRAM22,
244 GOYA_ASYNC_EVENT_ID_SRAM23,
245 GOYA_ASYNC_EVENT_ID_SRAM24,
246 GOYA_ASYNC_EVENT_ID_SRAM25,
247 GOYA_ASYNC_EVENT_ID_SRAM26,
248 GOYA_ASYNC_EVENT_ID_SRAM27,
249 GOYA_ASYNC_EVENT_ID_SRAM28,
250 GOYA_ASYNC_EVENT_ID_SRAM29,
251 GOYA_ASYNC_EVENT_ID_GIC500,
252 GOYA_ASYNC_EVENT_ID_PLL0,
253 GOYA_ASYNC_EVENT_ID_PLL1,
254 GOYA_ASYNC_EVENT_ID_PLL3,
255 GOYA_ASYNC_EVENT_ID_PLL4,
256 GOYA_ASYNC_EVENT_ID_PLL5,
257 GOYA_ASYNC_EVENT_ID_PLL6,
258 GOYA_ASYNC_EVENT_ID_AXI_ECC,
259 GOYA_ASYNC_EVENT_ID_L2_RAM_ECC,
260 GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET,
261 GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT,
262 GOYA_ASYNC_EVENT_ID_PCIE_DEC,
263 GOYA_ASYNC_EVENT_ID_TPC0_DEC,
264 GOYA_ASYNC_EVENT_ID_TPC1_DEC,
265 GOYA_ASYNC_EVENT_ID_TPC2_DEC,
266 GOYA_ASYNC_EVENT_ID_TPC3_DEC,
267 GOYA_ASYNC_EVENT_ID_TPC4_DEC,
268 GOYA_ASYNC_EVENT_ID_TPC5_DEC,
269 GOYA_ASYNC_EVENT_ID_TPC6_DEC,
270 GOYA_ASYNC_EVENT_ID_TPC7_DEC,
271 GOYA_ASYNC_EVENT_ID_MME_WACS,
272 GOYA_ASYNC_EVENT_ID_MME_WACSD,
273 GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER,
274 GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC,
275 GOYA_ASYNC_EVENT_ID_PSOC,
276 GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR,
277 GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR,
278 GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR,
279 GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR,
280 GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR,
281 GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR,
282 GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR,
283 GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR,
284 GOYA_ASYNC_EVENT_ID_TPC0_CMDQ,
285 GOYA_ASYNC_EVENT_ID_TPC1_CMDQ,
286 GOYA_ASYNC_EVENT_ID_TPC2_CMDQ,
287 GOYA_ASYNC_EVENT_ID_TPC3_CMDQ,
288 GOYA_ASYNC_EVENT_ID_TPC4_CMDQ,
289 GOYA_ASYNC_EVENT_ID_TPC5_CMDQ,
290 GOYA_ASYNC_EVENT_ID_TPC6_CMDQ,
291 GOYA_ASYNC_EVENT_ID_TPC7_CMDQ,
292 GOYA_ASYNC_EVENT_ID_TPC0_QM,
293 GOYA_ASYNC_EVENT_ID_TPC1_QM,
294 GOYA_ASYNC_EVENT_ID_TPC2_QM,
295 GOYA_ASYNC_EVENT_ID_TPC3_QM,
296 GOYA_ASYNC_EVENT_ID_TPC4_QM,
297 GOYA_ASYNC_EVENT_ID_TPC5_QM,
298 GOYA_ASYNC_EVENT_ID_TPC6_QM,
299 GOYA_ASYNC_EVENT_ID_TPC7_QM,
300 GOYA_ASYNC_EVENT_ID_MME_QM,
301 GOYA_ASYNC_EVENT_ID_MME_CMDQ,
302 GOYA_ASYNC_EVENT_ID_DMA0_QM,
303 GOYA_ASYNC_EVENT_ID_DMA1_QM,
304 GOYA_ASYNC_EVENT_ID_DMA2_QM,
305 GOYA_ASYNC_EVENT_ID_DMA3_QM,
306 GOYA_ASYNC_EVENT_ID_DMA4_QM,
307 GOYA_ASYNC_EVENT_ID_DMA0_CH,
308 GOYA_ASYNC_EVENT_ID_DMA1_CH,
309 GOYA_ASYNC_EVENT_ID_DMA2_CH,
310 GOYA_ASYNC_EVENT_ID_DMA3_CH,
311 GOYA_ASYNC_EVENT_ID_DMA4_CH,
312 GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU,
313 GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU,
314 GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU,
315 GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU,
316 GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU,
317 GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU,
318 GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU,
319 GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU,
320 GOYA_ASYNC_EVENT_ID_DMA_BM_CH0,
321 GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
322 GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
323 GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
324 GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
325 };
326
327 static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
328 static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
329 static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
330 static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
331
332 void goya_get_fixed_properties(struct hl_device *hdev)
333 {
334 struct asic_fixed_properties *prop = &hdev->asic_prop;
335 int i;
336
337 for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
338 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
339 prop->hw_queues_props[i].driver_only = 0;
340 }
341
342 for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
343 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
344 prop->hw_queues_props[i].driver_only = 1;
345 }
346
347 for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
348 NUMBER_OF_INT_HW_QUEUES; i++) {
349 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
350 prop->hw_queues_props[i].driver_only = 0;
351 }
352
353 for (; i < HL_MAX_QUEUES; i++)
354 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
355
356 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
357
358 prop->dram_base_address = DRAM_PHYS_BASE;
359 prop->dram_size = DRAM_PHYS_DEFAULT_SIZE;
360 prop->dram_end_address = prop->dram_base_address + prop->dram_size;
361 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
362
363 prop->sram_base_address = SRAM_BASE_ADDR;
364 prop->sram_size = SRAM_SIZE;
365 prop->sram_end_address = prop->sram_base_address + prop->sram_size;
366 prop->sram_user_base_address = prop->sram_base_address +
367 SRAM_USER_BASE_OFFSET;
368
369 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
370 prop->mmu_dram_default_page_addr = MMU_DRAM_DEFAULT_PAGE_ADDR;
371 if (hdev->pldm)
372 prop->mmu_pgt_size = 0x800000;
373 else
374 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
375 prop->mmu_pte_size = HL_PTE_SIZE;
376 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
377 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
378 prop->dram_page_size = PAGE_SIZE_2MB;
379
380 prop->va_space_host_start_address = VA_HOST_SPACE_START;
381 prop->va_space_host_end_address = VA_HOST_SPACE_END;
382 prop->va_space_dram_start_address = VA_DDR_SPACE_START;
383 prop->va_space_dram_end_address = VA_DDR_SPACE_END;
384 prop->dram_size_for_default_page_mapping =
385 prop->va_space_dram_end_address;
386 prop->cfg_size = CFG_SIZE;
387 prop->max_asid = MAX_ASID;
388 prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
389 prop->high_pll = PLL_HIGH_DEFAULT;
390 prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT;
391 prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE;
392 prop->max_power_default = MAX_POWER_DEFAULT;
393 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
394 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
395 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
396 }
397
398
399
400
401
402
403
404
405
406
407 static int goya_pci_bars_map(struct hl_device *hdev)
408 {
409 static const char * const name[] = {"SRAM_CFG", "MSIX", "DDR"};
410 bool is_wc[3] = {false, false, true};
411 int rc;
412
413 rc = hl_pci_bars_map(hdev, name, is_wc);
414 if (rc)
415 return rc;
416
417 hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] +
418 (CFG_BASE - SRAM_BASE_ADDR);
419
420 return 0;
421 }
422
423 static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
424 {
425 struct goya_device *goya = hdev->asic_specific;
426 u64 old_addr = addr;
427 int rc;
428
429 if ((goya) && (goya->ddr_bar_cur_addr == addr))
430 return old_addr;
431
432
433 rc = hl_pci_set_dram_bar_base(hdev, 1, 4, addr);
434 if (rc)
435 return U64_MAX;
436
437 if (goya) {
438 old_addr = goya->ddr_bar_cur_addr;
439 goya->ddr_bar_cur_addr = addr;
440 }
441
442 return old_addr;
443 }
444
445
446
447
448
449
450
451
452
453 static int goya_init_iatu(struct hl_device *hdev)
454 {
455 return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
456 HOST_PHYS_BASE, HOST_PHYS_SIZE);
457 }
458
459
460
461
462
463
464
465
466
467
468
469
470 static int goya_early_init(struct hl_device *hdev)
471 {
472 struct asic_fixed_properties *prop = &hdev->asic_prop;
473 struct pci_dev *pdev = hdev->pdev;
474 u32 val;
475 int rc;
476
477 goya_get_fixed_properties(hdev);
478
479
480 if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) {
481 dev_err(hdev->dev,
482 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
483 SRAM_CFG_BAR_ID,
484 (unsigned long long) pci_resource_len(pdev,
485 SRAM_CFG_BAR_ID),
486 CFG_BAR_SIZE);
487 return -ENODEV;
488 }
489
490 if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) {
491 dev_err(hdev->dev,
492 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
493 MSIX_BAR_ID,
494 (unsigned long long) pci_resource_len(pdev,
495 MSIX_BAR_ID),
496 MSIX_BAR_SIZE);
497 return -ENODEV;
498 }
499
500 prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
501
502 rc = hl_pci_init(hdev, 48);
503 if (rc)
504 return rc;
505
506 if (!hdev->pldm) {
507 val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
508 if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
509 dev_warn(hdev->dev,
510 "PCI strap is not configured correctly, PCI bus errors may occur\n");
511 }
512
513 return 0;
514 }
515
516
517
518
519
520
521
522
523
524 static int goya_early_fini(struct hl_device *hdev)
525 {
526 hl_pci_fini(hdev);
527
528 return 0;
529 }
530
531 static void goya_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
532 {
533
534 WREG32_AND(reg, ~0x7FF);
535 WREG32_OR(reg, asid);
536 }
537
538 static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
539 {
540 struct goya_device *goya = hdev->asic_specific;
541
542 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
543 return;
544
545 if (secure)
546 WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED);
547 else
548 WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED);
549
550 RREG32(mmDMA_QM_0_GLBL_PROT);
551 }
552
553
554
555
556
557
558
559 static void goya_fetch_psoc_frequency(struct hl_device *hdev)
560 {
561 struct asic_fixed_properties *prop = &hdev->asic_prop;
562
563 prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR);
564 prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF);
565 prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD);
566 prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
567 }
568
569 int goya_late_init(struct hl_device *hdev)
570 {
571 struct asic_fixed_properties *prop = &hdev->asic_prop;
572 int rc;
573
574 goya_fetch_psoc_frequency(hdev);
575
576 rc = goya_mmu_clear_pgt_range(hdev);
577 if (rc) {
578 dev_err(hdev->dev,
579 "Failed to clear MMU page tables range %d\n", rc);
580 return rc;
581 }
582
583 rc = goya_mmu_set_dram_default_page(hdev);
584 if (rc) {
585 dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc);
586 return rc;
587 }
588
589 rc = goya_mmu_add_mappings_for_device_cpu(hdev);
590 if (rc)
591 return rc;
592
593 rc = goya_init_cpu_queues(hdev);
594 if (rc)
595 return rc;
596
597 rc = goya_test_cpu_queue(hdev);
598 if (rc)
599 return rc;
600
601 rc = goya_armcp_info_get(hdev);
602 if (rc) {
603 dev_err(hdev->dev, "Failed to get armcp info %d\n", rc);
604 return rc;
605 }
606
607
608
609
610
611 WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
612
613 rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
614 if (rc) {
615 dev_err(hdev->dev,
616 "Failed to enable PCI access from CPU %d\n", rc);
617 return rc;
618 }
619
620 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
621 GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
622
623 return 0;
624 }
625
626
627
628
629
630
631
632
633 void goya_late_fini(struct hl_device *hdev)
634 {
635 const struct hwmon_channel_info **channel_info_arr;
636 int i = 0;
637
638 if (!hdev->hl_chip_info->info)
639 return;
640
641 channel_info_arr = hdev->hl_chip_info->info;
642
643 while (channel_info_arr[i]) {
644 kfree(channel_info_arr[i]->config);
645 kfree(channel_info_arr[i]);
646 i++;
647 }
648
649 kfree(channel_info_arr);
650
651 hdev->hl_chip_info->info = NULL;
652 }
653
654
655
656
657
658
659
660 static int goya_sw_init(struct hl_device *hdev)
661 {
662 struct goya_device *goya;
663 int rc;
664
665
666 goya = kzalloc(sizeof(*goya), GFP_KERNEL);
667 if (!goya)
668 return -ENOMEM;
669
670
671 goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
672
673 goya->mme_clk = GOYA_PLL_FREQ_LOW;
674 goya->tpc_clk = GOYA_PLL_FREQ_LOW;
675 goya->ic_clk = GOYA_PLL_FREQ_LOW;
676
677 hdev->asic_specific = goya;
678
679
680 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
681 &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
682 if (!hdev->dma_pool) {
683 dev_err(hdev->dev, "failed to create DMA pool\n");
684 rc = -ENOMEM;
685 goto free_goya_device;
686 }
687
688 hdev->cpu_accessible_dma_mem =
689 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
690 HL_CPU_ACCESSIBLE_MEM_SIZE,
691 &hdev->cpu_accessible_dma_address,
692 GFP_KERNEL | __GFP_ZERO);
693
694 if (!hdev->cpu_accessible_dma_mem) {
695 rc = -ENOMEM;
696 goto free_dma_pool;
697 }
698
699 dev_dbg(hdev->dev, "cpu accessible memory at bus address %pad\n",
700 &hdev->cpu_accessible_dma_address);
701
702 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
703 if (!hdev->cpu_accessible_dma_pool) {
704 dev_err(hdev->dev,
705 "Failed to create CPU accessible DMA pool\n");
706 rc = -ENOMEM;
707 goto free_cpu_dma_mem;
708 }
709
710 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
711 (uintptr_t) hdev->cpu_accessible_dma_mem,
712 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
713 if (rc) {
714 dev_err(hdev->dev,
715 "Failed to add memory to CPU accessible DMA pool\n");
716 rc = -EFAULT;
717 goto free_cpu_accessible_dma_pool;
718 }
719
720 spin_lock_init(&goya->hw_queues_lock);
721
722 return 0;
723
724 free_cpu_accessible_dma_pool:
725 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
726 free_cpu_dma_mem:
727 hdev->asic_funcs->asic_dma_free_coherent(hdev,
728 HL_CPU_ACCESSIBLE_MEM_SIZE,
729 hdev->cpu_accessible_dma_mem,
730 hdev->cpu_accessible_dma_address);
731 free_dma_pool:
732 dma_pool_destroy(hdev->dma_pool);
733 free_goya_device:
734 kfree(goya);
735
736 return rc;
737 }
738
739
740
741
742
743
744
745 static int goya_sw_fini(struct hl_device *hdev)
746 {
747 struct goya_device *goya = hdev->asic_specific;
748
749 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
750
751 hdev->asic_funcs->asic_dma_free_coherent(hdev,
752 HL_CPU_ACCESSIBLE_MEM_SIZE,
753 hdev->cpu_accessible_dma_mem,
754 hdev->cpu_accessible_dma_address);
755
756 dma_pool_destroy(hdev->dma_pool);
757
758 kfree(goya);
759
760 return 0;
761 }
762
763 static void goya_init_dma_qman(struct hl_device *hdev, int dma_id,
764 dma_addr_t bus_address)
765 {
766 struct goya_device *goya = hdev->asic_specific;
767 u32 mtr_base_lo, mtr_base_hi;
768 u32 so_base_lo, so_base_hi;
769 u32 gic_base_lo, gic_base_hi;
770 u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI);
771
772 mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
773 mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
774 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
775 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
776
777 gic_base_lo =
778 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
779 gic_base_hi =
780 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
781
782 WREG32(mmDMA_QM_0_PQ_BASE_LO + reg_off, lower_32_bits(bus_address));
783 WREG32(mmDMA_QM_0_PQ_BASE_HI + reg_off, upper_32_bits(bus_address));
784
785 WREG32(mmDMA_QM_0_PQ_SIZE + reg_off, ilog2(HL_QUEUE_LENGTH));
786 WREG32(mmDMA_QM_0_PQ_PI + reg_off, 0);
787 WREG32(mmDMA_QM_0_PQ_CI + reg_off, 0);
788
789 WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
790 WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
791 WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
792 WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
793 WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
794 WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
795 WREG32(mmDMA_QM_0_GLBL_ERR_WDATA + reg_off,
796 GOYA_ASYNC_EVENT_ID_DMA0_QM + dma_id);
797
798
799 WREG32(mmDMA_QM_0_PQ_CFG1 + reg_off, 0x00020002);
800 WREG32(mmDMA_QM_0_CQ_CFG1 + reg_off, 0x00080008);
801
802 if (goya->hw_cap_initialized & HW_CAP_MMU)
803 WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_PARTLY_TRUSTED);
804 else
805 WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED);
806
807 WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, QMAN_DMA_ERR_MSG_EN);
808 WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE);
809 }
810
811 static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
812 {
813 u32 gic_base_lo, gic_base_hi;
814 u64 sob_addr;
815 u32 reg_off = dma_id * (mmDMA_CH_1_CFG1 - mmDMA_CH_0_CFG1);
816
817 gic_base_lo =
818 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
819 gic_base_hi =
820 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
821
822 WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO + reg_off, gic_base_lo);
823 WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI + reg_off, gic_base_hi);
824 WREG32(mmDMA_CH_0_ERRMSG_WDATA + reg_off,
825 GOYA_ASYNC_EVENT_ID_DMA0_CH + dma_id);
826
827 if (dma_id)
828 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
829 (dma_id - 1) * 4;
830 else
831 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
832
833 WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr));
834 WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001);
835 }
836
837
838
839
840
841
842
843
844
845 void goya_init_dma_qmans(struct hl_device *hdev)
846 {
847 struct goya_device *goya = hdev->asic_specific;
848 struct hl_hw_queue *q;
849 int i;
850
851 if (goya->hw_cap_initialized & HW_CAP_DMA)
852 return;
853
854 q = &hdev->kernel_queues[0];
855
856 for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) {
857 goya_init_dma_qman(hdev, i, q->bus_address);
858 goya_init_dma_ch(hdev, i);
859 }
860
861 goya->hw_cap_initialized |= HW_CAP_DMA;
862 }
863
864
865
866
867
868
869
870 static void goya_disable_external_queues(struct hl_device *hdev)
871 {
872 struct goya_device *goya = hdev->asic_specific;
873
874 if (!(goya->hw_cap_initialized & HW_CAP_DMA))
875 return;
876
877 WREG32(mmDMA_QM_0_GLBL_CFG0, 0);
878 WREG32(mmDMA_QM_1_GLBL_CFG0, 0);
879 WREG32(mmDMA_QM_2_GLBL_CFG0, 0);
880 WREG32(mmDMA_QM_3_GLBL_CFG0, 0);
881 WREG32(mmDMA_QM_4_GLBL_CFG0, 0);
882 }
883
884 static int goya_stop_queue(struct hl_device *hdev, u32 cfg_reg,
885 u32 cp_sts_reg, u32 glbl_sts0_reg)
886 {
887 int rc;
888 u32 status;
889
890
891
892 WREG32(cfg_reg, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
893
894 status = RREG32(cp_sts_reg);
895 if (status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK) {
896 rc = hl_poll_timeout(
897 hdev,
898 cp_sts_reg,
899 status,
900 !(status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK),
901 1000,
902 QMAN_FENCE_TIMEOUT_USEC);
903
904
905 if (rc)
906 return 0;
907 }
908
909 rc = hl_poll_timeout(
910 hdev,
911 glbl_sts0_reg,
912 status,
913 (status & TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK),
914 1000,
915 QMAN_STOP_TIMEOUT_USEC);
916
917 if (rc) {
918 dev_err(hdev->dev,
919 "Timeout while waiting for QMAN to stop\n");
920 return -EINVAL;
921 }
922
923 return 0;
924 }
925
926
927
928
929
930
931
932
933
934 static int goya_stop_external_queues(struct hl_device *hdev)
935 {
936 int rc, retval = 0;
937
938 struct goya_device *goya = hdev->asic_specific;
939
940 if (!(goya->hw_cap_initialized & HW_CAP_DMA))
941 return retval;
942
943 rc = goya_stop_queue(hdev,
944 mmDMA_QM_0_GLBL_CFG1,
945 mmDMA_QM_0_CP_STS,
946 mmDMA_QM_0_GLBL_STS0);
947
948 if (rc) {
949 dev_err(hdev->dev, "failed to stop DMA QMAN 0\n");
950 retval = -EIO;
951 }
952
953 rc = goya_stop_queue(hdev,
954 mmDMA_QM_1_GLBL_CFG1,
955 mmDMA_QM_1_CP_STS,
956 mmDMA_QM_1_GLBL_STS0);
957
958 if (rc) {
959 dev_err(hdev->dev, "failed to stop DMA QMAN 1\n");
960 retval = -EIO;
961 }
962
963 rc = goya_stop_queue(hdev,
964 mmDMA_QM_2_GLBL_CFG1,
965 mmDMA_QM_2_CP_STS,
966 mmDMA_QM_2_GLBL_STS0);
967
968 if (rc) {
969 dev_err(hdev->dev, "failed to stop DMA QMAN 2\n");
970 retval = -EIO;
971 }
972
973 rc = goya_stop_queue(hdev,
974 mmDMA_QM_3_GLBL_CFG1,
975 mmDMA_QM_3_CP_STS,
976 mmDMA_QM_3_GLBL_STS0);
977
978 if (rc) {
979 dev_err(hdev->dev, "failed to stop DMA QMAN 3\n");
980 retval = -EIO;
981 }
982
983 rc = goya_stop_queue(hdev,
984 mmDMA_QM_4_GLBL_CFG1,
985 mmDMA_QM_4_CP_STS,
986 mmDMA_QM_4_GLBL_STS0);
987
988 if (rc) {
989 dev_err(hdev->dev, "failed to stop DMA QMAN 4\n");
990 retval = -EIO;
991 }
992
993 return retval;
994 }
995
996
997
998
999
1000
1001
1002
1003
1004 int goya_init_cpu_queues(struct hl_device *hdev)
1005 {
1006 struct goya_device *goya = hdev->asic_specific;
1007 struct hl_eq *eq;
1008 u32 status;
1009 struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
1010 int err;
1011
1012 if (!hdev->cpu_queues_enable)
1013 return 0;
1014
1015 if (goya->hw_cap_initialized & HW_CAP_CPU_Q)
1016 return 0;
1017
1018 eq = &hdev->event_queue;
1019
1020 WREG32(mmCPU_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
1021 WREG32(mmCPU_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
1022
1023 WREG32(mmCPU_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
1024 WREG32(mmCPU_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
1025
1026 WREG32(mmCPU_CQ_BASE_ADDR_LOW,
1027 lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1028 WREG32(mmCPU_CQ_BASE_ADDR_HIGH,
1029 upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1030
1031 WREG32(mmCPU_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
1032 WREG32(mmCPU_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
1033 WREG32(mmCPU_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
1034
1035
1036 WREG32(mmCPU_EQ_CI, 0);
1037
1038 WREG32(mmCPU_IF_PF_PQ_PI, 0);
1039
1040 WREG32(mmCPU_PQ_INIT_STATUS, PQ_INIT_STATUS_READY_FOR_CP);
1041
1042 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
1043 GOYA_ASYNC_EVENT_ID_PI_UPDATE);
1044
1045 err = hl_poll_timeout(
1046 hdev,
1047 mmCPU_PQ_INIT_STATUS,
1048 status,
1049 (status == PQ_INIT_STATUS_READY_FOR_HOST),
1050 1000,
1051 GOYA_CPU_TIMEOUT_USEC);
1052
1053 if (err) {
1054 dev_err(hdev->dev,
1055 "Failed to setup communication with device CPU\n");
1056 return -EIO;
1057 }
1058
1059 goya->hw_cap_initialized |= HW_CAP_CPU_Q;
1060 return 0;
1061 }
1062
1063 static void goya_set_pll_refclk(struct hl_device *hdev)
1064 {
1065 WREG32(mmCPU_PLL_DIV_SEL_0, 0x0);
1066 WREG32(mmCPU_PLL_DIV_SEL_1, 0x0);
1067 WREG32(mmCPU_PLL_DIV_SEL_2, 0x0);
1068 WREG32(mmCPU_PLL_DIV_SEL_3, 0x0);
1069
1070 WREG32(mmIC_PLL_DIV_SEL_0, 0x0);
1071 WREG32(mmIC_PLL_DIV_SEL_1, 0x0);
1072 WREG32(mmIC_PLL_DIV_SEL_2, 0x0);
1073 WREG32(mmIC_PLL_DIV_SEL_3, 0x0);
1074
1075 WREG32(mmMC_PLL_DIV_SEL_0, 0x0);
1076 WREG32(mmMC_PLL_DIV_SEL_1, 0x0);
1077 WREG32(mmMC_PLL_DIV_SEL_2, 0x0);
1078 WREG32(mmMC_PLL_DIV_SEL_3, 0x0);
1079
1080 WREG32(mmPSOC_MME_PLL_DIV_SEL_0, 0x0);
1081 WREG32(mmPSOC_MME_PLL_DIV_SEL_1, 0x0);
1082 WREG32(mmPSOC_MME_PLL_DIV_SEL_2, 0x0);
1083 WREG32(mmPSOC_MME_PLL_DIV_SEL_3, 0x0);
1084
1085 WREG32(mmPSOC_PCI_PLL_DIV_SEL_0, 0x0);
1086 WREG32(mmPSOC_PCI_PLL_DIV_SEL_1, 0x0);
1087 WREG32(mmPSOC_PCI_PLL_DIV_SEL_2, 0x0);
1088 WREG32(mmPSOC_PCI_PLL_DIV_SEL_3, 0x0);
1089
1090 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_0, 0x0);
1091 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_1, 0x0);
1092 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_2, 0x0);
1093 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_3, 0x0);
1094
1095 WREG32(mmTPC_PLL_DIV_SEL_0, 0x0);
1096 WREG32(mmTPC_PLL_DIV_SEL_1, 0x0);
1097 WREG32(mmTPC_PLL_DIV_SEL_2, 0x0);
1098 WREG32(mmTPC_PLL_DIV_SEL_3, 0x0);
1099 }
1100
1101 static void goya_disable_clk_rlx(struct hl_device *hdev)
1102 {
1103 WREG32(mmPSOC_MME_PLL_CLK_RLX_0, 0x100010);
1104 WREG32(mmIC_PLL_CLK_RLX_0, 0x100010);
1105 }
1106
1107 static void _goya_tpc_mbist_workaround(struct hl_device *hdev, u8 tpc_id)
1108 {
1109 u64 tpc_eml_address;
1110 u32 val, tpc_offset, tpc_eml_offset, tpc_slm_offset;
1111 int err, slm_index;
1112
1113 tpc_offset = tpc_id * 0x40000;
1114 tpc_eml_offset = tpc_id * 0x200000;
1115 tpc_eml_address = (mmTPC0_EML_CFG_BASE + tpc_eml_offset - CFG_BASE);
1116 tpc_slm_offset = tpc_eml_address + 0x100000;
1117
1118
1119
1120
1121
1122
1123 val = RREG32(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset);
1124 if (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_ACTIVE_MASK)
1125 dev_warn(hdev->dev, "TPC%d MBIST ACTIVE is not cleared\n",
1126 tpc_id);
1127
1128 WREG32(mmTPC0_CFG_FUNC_MBIST_PAT + tpc_offset, val & 0xFFFFF000);
1129
1130 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_0 + tpc_offset, 0x37FF);
1131 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_1 + tpc_offset, 0x303F);
1132 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_2 + tpc_offset, 0x71FF);
1133 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_3 + tpc_offset, 0x71FF);
1134 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_4 + tpc_offset, 0x70FF);
1135 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_5 + tpc_offset, 0x70FF);
1136 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_6 + tpc_offset, 0x70FF);
1137 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_7 + tpc_offset, 0x70FF);
1138 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_8 + tpc_offset, 0x70FF);
1139 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_9 + tpc_offset, 0x70FF);
1140
1141 WREG32_OR(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1142 1 << TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_START_SHIFT);
1143
1144 err = hl_poll_timeout(
1145 hdev,
1146 mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1147 val,
1148 (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_DONE_MASK),
1149 1000,
1150 HL_DEVICE_TIMEOUT_USEC);
1151
1152 if (err)
1153 dev_err(hdev->dev,
1154 "Timeout while waiting for TPC%d MBIST DONE\n", tpc_id);
1155
1156 WREG32_OR(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1157 1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT);
1158
1159 msleep(GOYA_RESET_WAIT_MSEC);
1160
1161 WREG32_AND(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1162 ~(1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT));
1163
1164 msleep(GOYA_RESET_WAIT_MSEC);
1165
1166 for (slm_index = 0 ; slm_index < 256 ; slm_index++)
1167 WREG32(tpc_slm_offset + (slm_index << 2), 0);
1168
1169 val = RREG32(tpc_slm_offset);
1170 }
1171
1172 static void goya_tpc_mbist_workaround(struct hl_device *hdev)
1173 {
1174 struct goya_device *goya = hdev->asic_specific;
1175 int i;
1176
1177 if (hdev->pldm)
1178 return;
1179
1180 if (goya->hw_cap_initialized & HW_CAP_TPC_MBIST)
1181 return;
1182
1183
1184
1185 for (i = 0 ; i < TPC_MAX_NUM ; i++)
1186 _goya_tpc_mbist_workaround(hdev, i);
1187
1188 goya->hw_cap_initialized |= HW_CAP_TPC_MBIST;
1189 }
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199 static void goya_init_golden_registers(struct hl_device *hdev)
1200 {
1201 struct goya_device *goya = hdev->asic_specific;
1202 u32 polynom[10], tpc_intr_mask, offset;
1203 int i;
1204
1205 if (goya->hw_cap_initialized & HW_CAP_GOLDEN)
1206 return;
1207
1208 polynom[0] = 0x00020080;
1209 polynom[1] = 0x00401000;
1210 polynom[2] = 0x00200800;
1211 polynom[3] = 0x00002000;
1212 polynom[4] = 0x00080200;
1213 polynom[5] = 0x00040100;
1214 polynom[6] = 0x00100400;
1215 polynom[7] = 0x00004000;
1216 polynom[8] = 0x00010000;
1217 polynom[9] = 0x00008000;
1218
1219
1220 tpc_intr_mask = 0x7FFF;
1221
1222 for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x20000) {
1223 WREG32(mmSRAM_Y0_X0_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1224 WREG32(mmSRAM_Y0_X1_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1225 WREG32(mmSRAM_Y0_X2_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1226 WREG32(mmSRAM_Y0_X3_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1227 WREG32(mmSRAM_Y0_X4_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1228
1229 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_L_ARB + offset, 0x204);
1230 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_L_ARB + offset, 0x204);
1231 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_L_ARB + offset, 0x204);
1232 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_L_ARB + offset, 0x204);
1233 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_L_ARB + offset, 0x204);
1234
1235
1236 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_E_ARB + offset, 0x206);
1237 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_E_ARB + offset, 0x206);
1238 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_E_ARB + offset, 0x206);
1239 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_E_ARB + offset, 0x207);
1240 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_E_ARB + offset, 0x207);
1241
1242 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_W_ARB + offset, 0x207);
1243 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_W_ARB + offset, 0x207);
1244 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_W_ARB + offset, 0x206);
1245 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_W_ARB + offset, 0x206);
1246 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_W_ARB + offset, 0x206);
1247
1248 WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_E_ARB + offset, 0x101);
1249 WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_E_ARB + offset, 0x102);
1250 WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_E_ARB + offset, 0x103);
1251 WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_E_ARB + offset, 0x104);
1252 WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_E_ARB + offset, 0x105);
1253
1254 WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_W_ARB + offset, 0x105);
1255 WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_W_ARB + offset, 0x104);
1256 WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_W_ARB + offset, 0x103);
1257 WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_W_ARB + offset, 0x102);
1258 WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_W_ARB + offset, 0x101);
1259 }
1260
1261 WREG32(mmMME_STORE_MAX_CREDIT, 0x21);
1262 WREG32(mmMME_AGU, 0x0f0f0f10);
1263 WREG32(mmMME_SEI_MASK, ~0x0);
1264
1265 WREG32(mmMME6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1266 WREG32(mmMME5_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1267 WREG32(mmMME4_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1268 WREG32(mmMME3_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1269 WREG32(mmMME2_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1270 WREG32(mmMME1_RTR_HBW_RD_RQ_N_ARB, 0x07010701);
1271 WREG32(mmMME6_RTR_HBW_RD_RQ_S_ARB, 0x04010401);
1272 WREG32(mmMME5_RTR_HBW_RD_RQ_S_ARB, 0x04050401);
1273 WREG32(mmMME4_RTR_HBW_RD_RQ_S_ARB, 0x03070301);
1274 WREG32(mmMME3_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1275 WREG32(mmMME2_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1276 WREG32(mmMME1_RTR_HBW_RD_RQ_S_ARB, 0x01050105);
1277 WREG32(mmMME6_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1278 WREG32(mmMME5_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1279 WREG32(mmMME4_RTR_HBW_RD_RQ_W_ARB, 0x01040301);
1280 WREG32(mmMME3_RTR_HBW_RD_RQ_W_ARB, 0x01030401);
1281 WREG32(mmMME2_RTR_HBW_RD_RQ_W_ARB, 0x01040101);
1282 WREG32(mmMME1_RTR_HBW_RD_RQ_W_ARB, 0x01050101);
1283 WREG32(mmMME6_RTR_HBW_WR_RQ_N_ARB, 0x02020202);
1284 WREG32(mmMME5_RTR_HBW_WR_RQ_N_ARB, 0x01070101);
1285 WREG32(mmMME4_RTR_HBW_WR_RQ_N_ARB, 0x02020201);
1286 WREG32(mmMME3_RTR_HBW_WR_RQ_N_ARB, 0x07020701);
1287 WREG32(mmMME2_RTR_HBW_WR_RQ_N_ARB, 0x01020101);
1288 WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1289 WREG32(mmMME6_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1290 WREG32(mmMME5_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1291 WREG32(mmMME4_RTR_HBW_WR_RQ_S_ARB, 0x07020701);
1292 WREG32(mmMME3_RTR_HBW_WR_RQ_S_ARB, 0x02020201);
1293 WREG32(mmMME2_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1294 WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01020102);
1295 WREG32(mmMME6_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1296 WREG32(mmMME5_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1297 WREG32(mmMME4_RTR_HBW_WR_RQ_W_ARB, 0x07020707);
1298 WREG32(mmMME3_RTR_HBW_WR_RQ_W_ARB, 0x01020201);
1299 WREG32(mmMME2_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1300 WREG32(mmMME1_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1301 WREG32(mmMME6_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1302 WREG32(mmMME5_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1303 WREG32(mmMME4_RTR_HBW_RD_RS_N_ARB, 0x01060102);
1304 WREG32(mmMME3_RTR_HBW_RD_RS_N_ARB, 0x01040102);
1305 WREG32(mmMME2_RTR_HBW_RD_RS_N_ARB, 0x01020102);
1306 WREG32(mmMME1_RTR_HBW_RD_RS_N_ARB, 0x01020107);
1307 WREG32(mmMME6_RTR_HBW_RD_RS_S_ARB, 0x01020106);
1308 WREG32(mmMME5_RTR_HBW_RD_RS_S_ARB, 0x01020102);
1309 WREG32(mmMME4_RTR_HBW_RD_RS_S_ARB, 0x01040102);
1310 WREG32(mmMME3_RTR_HBW_RD_RS_S_ARB, 0x01060102);
1311 WREG32(mmMME2_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1312 WREG32(mmMME1_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1313 WREG32(mmMME6_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1314 WREG32(mmMME5_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1315 WREG32(mmMME4_RTR_HBW_RD_RS_E_ARB, 0x01040602);
1316 WREG32(mmMME3_RTR_HBW_RD_RS_E_ARB, 0x01060402);
1317 WREG32(mmMME2_RTR_HBW_RD_RS_E_ARB, 0x01070202);
1318 WREG32(mmMME1_RTR_HBW_RD_RS_E_ARB, 0x01070102);
1319 WREG32(mmMME6_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1320 WREG32(mmMME5_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1321 WREG32(mmMME4_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1322 WREG32(mmMME3_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1323 WREG32(mmMME2_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1324 WREG32(mmMME1_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1325 WREG32(mmMME6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1326 WREG32(mmMME5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1327 WREG32(mmMME4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1328 WREG32(mmMME3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1329 WREG32(mmMME2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1330 WREG32(mmMME1_RTR_HBW_WR_RS_N_ARB, 0x01010107);
1331 WREG32(mmMME6_RTR_HBW_WR_RS_S_ARB, 0x01010107);
1332 WREG32(mmMME5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1333 WREG32(mmMME4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1334 WREG32(mmMME3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1335 WREG32(mmMME2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1336 WREG32(mmMME1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1337 WREG32(mmMME6_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1338 WREG32(mmMME5_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1339 WREG32(mmMME4_RTR_HBW_WR_RS_E_ARB, 0x01040301);
1340 WREG32(mmMME3_RTR_HBW_WR_RS_E_ARB, 0x01030401);
1341 WREG32(mmMME2_RTR_HBW_WR_RS_E_ARB, 0x01040101);
1342 WREG32(mmMME1_RTR_HBW_WR_RS_E_ARB, 0x01050101);
1343 WREG32(mmMME6_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1344 WREG32(mmMME5_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1345 WREG32(mmMME4_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1346 WREG32(mmMME3_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1347 WREG32(mmMME2_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1348 WREG32(mmMME1_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1349
1350 WREG32(mmTPC1_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1351 WREG32(mmTPC1_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1352 WREG32(mmTPC1_RTR_HBW_RD_RQ_E_ARB, 0x01060101);
1353 WREG32(mmTPC1_RTR_HBW_WR_RQ_N_ARB, 0x02020102);
1354 WREG32(mmTPC1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1355 WREG32(mmTPC1_RTR_HBW_WR_RQ_E_ARB, 0x02070202);
1356 WREG32(mmTPC1_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1357 WREG32(mmTPC1_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1358 WREG32(mmTPC1_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1359 WREG32(mmTPC1_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1360 WREG32(mmTPC1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1361 WREG32(mmTPC1_RTR_HBW_WR_RS_W_ARB, 0x01050101);
1362
1363 WREG32(mmTPC2_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1364 WREG32(mmTPC2_RTR_HBW_RD_RQ_S_ARB, 0x01050101);
1365 WREG32(mmTPC2_RTR_HBW_RD_RQ_E_ARB, 0x01010201);
1366 WREG32(mmTPC2_RTR_HBW_WR_RQ_N_ARB, 0x02040102);
1367 WREG32(mmTPC2_RTR_HBW_WR_RQ_S_ARB, 0x01050101);
1368 WREG32(mmTPC2_RTR_HBW_WR_RQ_E_ARB, 0x02060202);
1369 WREG32(mmTPC2_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1370 WREG32(mmTPC2_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1371 WREG32(mmTPC2_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1372 WREG32(mmTPC2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1373 WREG32(mmTPC2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1374 WREG32(mmTPC2_RTR_HBW_WR_RS_W_ARB, 0x01040101);
1375
1376 WREG32(mmTPC3_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1377 WREG32(mmTPC3_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1378 WREG32(mmTPC3_RTR_HBW_RD_RQ_E_ARB, 0x01040301);
1379 WREG32(mmTPC3_RTR_HBW_WR_RQ_N_ARB, 0x02060102);
1380 WREG32(mmTPC3_RTR_HBW_WR_RQ_S_ARB, 0x01040101);
1381 WREG32(mmTPC3_RTR_HBW_WR_RQ_E_ARB, 0x01040301);
1382 WREG32(mmTPC3_RTR_HBW_RD_RS_N_ARB, 0x01040201);
1383 WREG32(mmTPC3_RTR_HBW_RD_RS_S_ARB, 0x01060201);
1384 WREG32(mmTPC3_RTR_HBW_RD_RS_W_ARB, 0x01060402);
1385 WREG32(mmTPC3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1386 WREG32(mmTPC3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1387 WREG32(mmTPC3_RTR_HBW_WR_RS_W_ARB, 0x01030401);
1388
1389 WREG32(mmTPC4_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1390 WREG32(mmTPC4_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1391 WREG32(mmTPC4_RTR_HBW_RD_RQ_E_ARB, 0x01030401);
1392 WREG32(mmTPC4_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1393 WREG32(mmTPC4_RTR_HBW_WR_RQ_S_ARB, 0x01030101);
1394 WREG32(mmTPC4_RTR_HBW_WR_RQ_E_ARB, 0x02060702);
1395 WREG32(mmTPC4_RTR_HBW_RD_RS_N_ARB, 0x01060201);
1396 WREG32(mmTPC4_RTR_HBW_RD_RS_S_ARB, 0x01040201);
1397 WREG32(mmTPC4_RTR_HBW_RD_RS_W_ARB, 0x01040602);
1398 WREG32(mmTPC4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1399 WREG32(mmTPC4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1400 WREG32(mmTPC4_RTR_HBW_WR_RS_W_ARB, 0x01040301);
1401
1402 WREG32(mmTPC5_RTR_HBW_RD_RQ_N_ARB, 0x01050101);
1403 WREG32(mmTPC5_RTR_HBW_RD_RQ_S_ARB, 0x01020101);
1404 WREG32(mmTPC5_RTR_HBW_RD_RQ_E_ARB, 0x01200501);
1405 WREG32(mmTPC5_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1406 WREG32(mmTPC5_RTR_HBW_WR_RQ_S_ARB, 0x01020101);
1407 WREG32(mmTPC5_RTR_HBW_WR_RQ_E_ARB, 0x02020602);
1408 WREG32(mmTPC5_RTR_HBW_RD_RS_N_ARB, 0x01070201);
1409 WREG32(mmTPC5_RTR_HBW_RD_RS_S_ARB, 0x01020201);
1410 WREG32(mmTPC5_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1411 WREG32(mmTPC5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1412 WREG32(mmTPC5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1413 WREG32(mmTPC5_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1414
1415 WREG32(mmTPC6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1416 WREG32(mmTPC6_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1417 WREG32(mmTPC6_RTR_HBW_RD_RQ_E_ARB, 0x01010601);
1418 WREG32(mmTPC6_RTR_HBW_WR_RQ_N_ARB, 0x01010101);
1419 WREG32(mmTPC6_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1420 WREG32(mmTPC6_RTR_HBW_WR_RQ_E_ARB, 0x02020702);
1421 WREG32(mmTPC6_RTR_HBW_RD_RS_N_ARB, 0x01010101);
1422 WREG32(mmTPC6_RTR_HBW_RD_RS_S_ARB, 0x01010101);
1423 WREG32(mmTPC6_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1424 WREG32(mmTPC6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1425 WREG32(mmTPC6_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1426 WREG32(mmTPC6_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1427
1428 for (i = 0, offset = 0 ; i < 10 ; i++, offset += 4) {
1429 WREG32(mmMME1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1430 WREG32(mmMME2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1431 WREG32(mmMME3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1432 WREG32(mmMME4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1433 WREG32(mmMME5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1434 WREG32(mmMME6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1435
1436 WREG32(mmTPC0_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1437 WREG32(mmTPC1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1438 WREG32(mmTPC2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1439 WREG32(mmTPC3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1440 WREG32(mmTPC4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1441 WREG32(mmTPC5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1442 WREG32(mmTPC6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1443 WREG32(mmTPC7_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1444
1445 WREG32(mmPCI_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1446 WREG32(mmDMA_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1447 }
1448
1449 for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x40000) {
1450 WREG32(mmMME1_RTR_SCRAMB_EN + offset,
1451 1 << MME1_RTR_SCRAMB_EN_VAL_SHIFT);
1452 WREG32(mmMME1_RTR_NON_LIN_SCRAMB + offset,
1453 1 << MME1_RTR_NON_LIN_SCRAMB_EN_SHIFT);
1454 }
1455
1456 for (i = 0, offset = 0 ; i < 8 ; i++, offset += 0x40000) {
1457
1458
1459
1460
1461 WREG32(mmTPC0_CFG_TPC_INTR_MASK + offset, tpc_intr_mask);
1462
1463 WREG32(mmTPC0_NRTR_SCRAMB_EN + offset,
1464 1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT);
1465 WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset,
1466 1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1467 }
1468
1469 WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT);
1470 WREG32(mmDMA_NRTR_NON_LIN_SCRAMB,
1471 1 << DMA_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1472
1473 WREG32(mmPCI_NRTR_SCRAMB_EN, 1 << PCI_NRTR_SCRAMB_EN_VAL_SHIFT);
1474 WREG32(mmPCI_NRTR_NON_LIN_SCRAMB,
1475 1 << PCI_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1476
1477
1478
1479
1480
1481
1482
1483
1484 WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
1485
1486 WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
1487
1488 goya->hw_cap_initialized |= HW_CAP_GOLDEN;
1489 }
1490
1491 static void goya_init_mme_qman(struct hl_device *hdev)
1492 {
1493 u32 mtr_base_lo, mtr_base_hi;
1494 u32 so_base_lo, so_base_hi;
1495 u32 gic_base_lo, gic_base_hi;
1496 u64 qman_base_addr;
1497
1498 mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1499 mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1500 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1501 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1502
1503 gic_base_lo =
1504 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1505 gic_base_hi =
1506 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1507
1508 qman_base_addr = hdev->asic_prop.sram_base_address +
1509 MME_QMAN_BASE_OFFSET;
1510
1511 WREG32(mmMME_QM_PQ_BASE_LO, lower_32_bits(qman_base_addr));
1512 WREG32(mmMME_QM_PQ_BASE_HI, upper_32_bits(qman_base_addr));
1513 WREG32(mmMME_QM_PQ_SIZE, ilog2(MME_QMAN_LENGTH));
1514 WREG32(mmMME_QM_PQ_PI, 0);
1515 WREG32(mmMME_QM_PQ_CI, 0);
1516 WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET, 0x10C0);
1517 WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET, 0x10C4);
1518 WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET, 0x10C8);
1519 WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET, 0x10CC);
1520
1521 WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1522 WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1523 WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1524 WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1525
1526
1527 WREG32(mmMME_QM_CQ_CFG1, 0x00080008);
1528
1529 WREG32(mmMME_QM_GLBL_ERR_ADDR_LO, gic_base_lo);
1530 WREG32(mmMME_QM_GLBL_ERR_ADDR_HI, gic_base_hi);
1531
1532 WREG32(mmMME_QM_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_QM);
1533
1534 WREG32(mmMME_QM_GLBL_ERR_CFG, QMAN_MME_ERR_MSG_EN);
1535
1536 WREG32(mmMME_QM_GLBL_PROT, QMAN_MME_ERR_PROT);
1537
1538 WREG32(mmMME_QM_GLBL_CFG0, QMAN_MME_ENABLE);
1539 }
1540
1541 static void goya_init_mme_cmdq(struct hl_device *hdev)
1542 {
1543 u32 mtr_base_lo, mtr_base_hi;
1544 u32 so_base_lo, so_base_hi;
1545 u32 gic_base_lo, gic_base_hi;
1546 u64 qman_base_addr;
1547
1548 mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1549 mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1550 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1551 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1552
1553 gic_base_lo =
1554 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1555 gic_base_hi =
1556 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1557
1558 qman_base_addr = hdev->asic_prop.sram_base_address +
1559 MME_QMAN_BASE_OFFSET;
1560
1561 WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1562 WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1563 WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1564 WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1565
1566
1567 WREG32(mmMME_CMDQ_CQ_CFG1, 0x00140014);
1568
1569 WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO, gic_base_lo);
1570 WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI, gic_base_hi);
1571
1572 WREG32(mmMME_CMDQ_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_CMDQ);
1573
1574 WREG32(mmMME_CMDQ_GLBL_ERR_CFG, CMDQ_MME_ERR_MSG_EN);
1575
1576 WREG32(mmMME_CMDQ_GLBL_PROT, CMDQ_MME_ERR_PROT);
1577
1578 WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE);
1579 }
1580
1581 void goya_init_mme_qmans(struct hl_device *hdev)
1582 {
1583 struct goya_device *goya = hdev->asic_specific;
1584 u32 so_base_lo, so_base_hi;
1585
1586 if (goya->hw_cap_initialized & HW_CAP_MME)
1587 return;
1588
1589 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1590 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1591
1592 WREG32(mmMME_SM_BASE_ADDRESS_LOW, so_base_lo);
1593 WREG32(mmMME_SM_BASE_ADDRESS_HIGH, so_base_hi);
1594
1595 goya_init_mme_qman(hdev);
1596 goya_init_mme_cmdq(hdev);
1597
1598 goya->hw_cap_initialized |= HW_CAP_MME;
1599 }
1600
1601 static void goya_init_tpc_qman(struct hl_device *hdev, u32 base_off, int tpc_id)
1602 {
1603 u32 mtr_base_lo, mtr_base_hi;
1604 u32 so_base_lo, so_base_hi;
1605 u32 gic_base_lo, gic_base_hi;
1606 u64 qman_base_addr;
1607 u32 reg_off = tpc_id * (mmTPC1_QM_PQ_PI - mmTPC0_QM_PQ_PI);
1608
1609 mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1610 mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1611 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1612 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1613
1614 gic_base_lo =
1615 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1616 gic_base_hi =
1617 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1618
1619 qman_base_addr = hdev->asic_prop.sram_base_address + base_off;
1620
1621 WREG32(mmTPC0_QM_PQ_BASE_LO + reg_off, lower_32_bits(qman_base_addr));
1622 WREG32(mmTPC0_QM_PQ_BASE_HI + reg_off, upper_32_bits(qman_base_addr));
1623 WREG32(mmTPC0_QM_PQ_SIZE + reg_off, ilog2(TPC_QMAN_LENGTH));
1624 WREG32(mmTPC0_QM_PQ_PI + reg_off, 0);
1625 WREG32(mmTPC0_QM_PQ_CI + reg_off, 0);
1626 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET + reg_off, 0x10C0);
1627 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET + reg_off, 0x10C4);
1628 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET + reg_off, 0x10C8);
1629 WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET + reg_off, 0x10CC);
1630
1631 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1632 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1633 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1634 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1635
1636 WREG32(mmTPC0_QM_CQ_CFG1 + reg_off, 0x00080008);
1637
1638 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1639 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1640
1641 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + reg_off,
1642 GOYA_ASYNC_EVENT_ID_TPC0_QM + tpc_id);
1643
1644 WREG32(mmTPC0_QM_GLBL_ERR_CFG + reg_off, QMAN_TPC_ERR_MSG_EN);
1645
1646 WREG32(mmTPC0_QM_GLBL_PROT + reg_off, QMAN_TPC_ERR_PROT);
1647
1648 WREG32(mmTPC0_QM_GLBL_CFG0 + reg_off, QMAN_TPC_ENABLE);
1649 }
1650
1651 static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id)
1652 {
1653 u32 mtr_base_lo, mtr_base_hi;
1654 u32 so_base_lo, so_base_hi;
1655 u32 gic_base_lo, gic_base_hi;
1656 u32 reg_off = tpc_id * (mmTPC1_CMDQ_CQ_CFG1 - mmTPC0_CMDQ_CQ_CFG1);
1657
1658 mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1659 mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1660 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1661 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1662
1663 gic_base_lo =
1664 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1665 gic_base_hi =
1666 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1667
1668 WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1669 WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1670 WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1671 WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1672
1673 WREG32(mmTPC0_CMDQ_CQ_CFG1 + reg_off, 0x00140014);
1674
1675 WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1676 WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1677
1678 WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA + reg_off,
1679 GOYA_ASYNC_EVENT_ID_TPC0_CMDQ + tpc_id);
1680
1681 WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG + reg_off, CMDQ_TPC_ERR_MSG_EN);
1682
1683 WREG32(mmTPC0_CMDQ_GLBL_PROT + reg_off, CMDQ_TPC_ERR_PROT);
1684
1685 WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE);
1686 }
1687
1688 void goya_init_tpc_qmans(struct hl_device *hdev)
1689 {
1690 struct goya_device *goya = hdev->asic_specific;
1691 u32 so_base_lo, so_base_hi;
1692 u32 cfg_off = mmTPC1_CFG_SM_BASE_ADDRESS_LOW -
1693 mmTPC0_CFG_SM_BASE_ADDRESS_LOW;
1694 int i;
1695
1696 if (goya->hw_cap_initialized & HW_CAP_TPC)
1697 return;
1698
1699 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1700 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1701
1702 for (i = 0 ; i < TPC_MAX_NUM ; i++) {
1703 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW + i * cfg_off,
1704 so_base_lo);
1705 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + i * cfg_off,
1706 so_base_hi);
1707 }
1708
1709 goya_init_tpc_qman(hdev, TPC0_QMAN_BASE_OFFSET, 0);
1710 goya_init_tpc_qman(hdev, TPC1_QMAN_BASE_OFFSET, 1);
1711 goya_init_tpc_qman(hdev, TPC2_QMAN_BASE_OFFSET, 2);
1712 goya_init_tpc_qman(hdev, TPC3_QMAN_BASE_OFFSET, 3);
1713 goya_init_tpc_qman(hdev, TPC4_QMAN_BASE_OFFSET, 4);
1714 goya_init_tpc_qman(hdev, TPC5_QMAN_BASE_OFFSET, 5);
1715 goya_init_tpc_qman(hdev, TPC6_QMAN_BASE_OFFSET, 6);
1716 goya_init_tpc_qman(hdev, TPC7_QMAN_BASE_OFFSET, 7);
1717
1718 for (i = 0 ; i < TPC_MAX_NUM ; i++)
1719 goya_init_tpc_cmdq(hdev, i);
1720
1721 goya->hw_cap_initialized |= HW_CAP_TPC;
1722 }
1723
1724
1725
1726
1727
1728
1729
1730 static void goya_disable_internal_queues(struct hl_device *hdev)
1731 {
1732 struct goya_device *goya = hdev->asic_specific;
1733
1734 if (!(goya->hw_cap_initialized & HW_CAP_MME))
1735 goto disable_tpc;
1736
1737 WREG32(mmMME_QM_GLBL_CFG0, 0);
1738 WREG32(mmMME_CMDQ_GLBL_CFG0, 0);
1739
1740 disable_tpc:
1741 if (!(goya->hw_cap_initialized & HW_CAP_TPC))
1742 return;
1743
1744 WREG32(mmTPC0_QM_GLBL_CFG0, 0);
1745 WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0);
1746
1747 WREG32(mmTPC1_QM_GLBL_CFG0, 0);
1748 WREG32(mmTPC1_CMDQ_GLBL_CFG0, 0);
1749
1750 WREG32(mmTPC2_QM_GLBL_CFG0, 0);
1751 WREG32(mmTPC2_CMDQ_GLBL_CFG0, 0);
1752
1753 WREG32(mmTPC3_QM_GLBL_CFG0, 0);
1754 WREG32(mmTPC3_CMDQ_GLBL_CFG0, 0);
1755
1756 WREG32(mmTPC4_QM_GLBL_CFG0, 0);
1757 WREG32(mmTPC4_CMDQ_GLBL_CFG0, 0);
1758
1759 WREG32(mmTPC5_QM_GLBL_CFG0, 0);
1760 WREG32(mmTPC5_CMDQ_GLBL_CFG0, 0);
1761
1762 WREG32(mmTPC6_QM_GLBL_CFG0, 0);
1763 WREG32(mmTPC6_CMDQ_GLBL_CFG0, 0);
1764
1765 WREG32(mmTPC7_QM_GLBL_CFG0, 0);
1766 WREG32(mmTPC7_CMDQ_GLBL_CFG0, 0);
1767 }
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777 static int goya_stop_internal_queues(struct hl_device *hdev)
1778 {
1779 struct goya_device *goya = hdev->asic_specific;
1780 int rc, retval = 0;
1781
1782 if (!(goya->hw_cap_initialized & HW_CAP_MME))
1783 goto stop_tpc;
1784
1785
1786
1787
1788
1789
1790
1791 rc = goya_stop_queue(hdev,
1792 mmMME_QM_GLBL_CFG1,
1793 mmMME_QM_CP_STS,
1794 mmMME_QM_GLBL_STS0);
1795
1796 if (rc) {
1797 dev_err(hdev->dev, "failed to stop MME QMAN\n");
1798 retval = -EIO;
1799 }
1800
1801 rc = goya_stop_queue(hdev,
1802 mmMME_CMDQ_GLBL_CFG1,
1803 mmMME_CMDQ_CP_STS,
1804 mmMME_CMDQ_GLBL_STS0);
1805
1806 if (rc) {
1807 dev_err(hdev->dev, "failed to stop MME CMDQ\n");
1808 retval = -EIO;
1809 }
1810
1811 stop_tpc:
1812 if (!(goya->hw_cap_initialized & HW_CAP_TPC))
1813 return retval;
1814
1815 rc = goya_stop_queue(hdev,
1816 mmTPC0_QM_GLBL_CFG1,
1817 mmTPC0_QM_CP_STS,
1818 mmTPC0_QM_GLBL_STS0);
1819
1820 if (rc) {
1821 dev_err(hdev->dev, "failed to stop TPC 0 QMAN\n");
1822 retval = -EIO;
1823 }
1824
1825 rc = goya_stop_queue(hdev,
1826 mmTPC0_CMDQ_GLBL_CFG1,
1827 mmTPC0_CMDQ_CP_STS,
1828 mmTPC0_CMDQ_GLBL_STS0);
1829
1830 if (rc) {
1831 dev_err(hdev->dev, "failed to stop TPC 0 CMDQ\n");
1832 retval = -EIO;
1833 }
1834
1835 rc = goya_stop_queue(hdev,
1836 mmTPC1_QM_GLBL_CFG1,
1837 mmTPC1_QM_CP_STS,
1838 mmTPC1_QM_GLBL_STS0);
1839
1840 if (rc) {
1841 dev_err(hdev->dev, "failed to stop TPC 1 QMAN\n");
1842 retval = -EIO;
1843 }
1844
1845 rc = goya_stop_queue(hdev,
1846 mmTPC1_CMDQ_GLBL_CFG1,
1847 mmTPC1_CMDQ_CP_STS,
1848 mmTPC1_CMDQ_GLBL_STS0);
1849
1850 if (rc) {
1851 dev_err(hdev->dev, "failed to stop TPC 1 CMDQ\n");
1852 retval = -EIO;
1853 }
1854
1855 rc = goya_stop_queue(hdev,
1856 mmTPC2_QM_GLBL_CFG1,
1857 mmTPC2_QM_CP_STS,
1858 mmTPC2_QM_GLBL_STS0);
1859
1860 if (rc) {
1861 dev_err(hdev->dev, "failed to stop TPC 2 QMAN\n");
1862 retval = -EIO;
1863 }
1864
1865 rc = goya_stop_queue(hdev,
1866 mmTPC2_CMDQ_GLBL_CFG1,
1867 mmTPC2_CMDQ_CP_STS,
1868 mmTPC2_CMDQ_GLBL_STS0);
1869
1870 if (rc) {
1871 dev_err(hdev->dev, "failed to stop TPC 2 CMDQ\n");
1872 retval = -EIO;
1873 }
1874
1875 rc = goya_stop_queue(hdev,
1876 mmTPC3_QM_GLBL_CFG1,
1877 mmTPC3_QM_CP_STS,
1878 mmTPC3_QM_GLBL_STS0);
1879
1880 if (rc) {
1881 dev_err(hdev->dev, "failed to stop TPC 3 QMAN\n");
1882 retval = -EIO;
1883 }
1884
1885 rc = goya_stop_queue(hdev,
1886 mmTPC3_CMDQ_GLBL_CFG1,
1887 mmTPC3_CMDQ_CP_STS,
1888 mmTPC3_CMDQ_GLBL_STS0);
1889
1890 if (rc) {
1891 dev_err(hdev->dev, "failed to stop TPC 3 CMDQ\n");
1892 retval = -EIO;
1893 }
1894
1895 rc = goya_stop_queue(hdev,
1896 mmTPC4_QM_GLBL_CFG1,
1897 mmTPC4_QM_CP_STS,
1898 mmTPC4_QM_GLBL_STS0);
1899
1900 if (rc) {
1901 dev_err(hdev->dev, "failed to stop TPC 4 QMAN\n");
1902 retval = -EIO;
1903 }
1904
1905 rc = goya_stop_queue(hdev,
1906 mmTPC4_CMDQ_GLBL_CFG1,
1907 mmTPC4_CMDQ_CP_STS,
1908 mmTPC4_CMDQ_GLBL_STS0);
1909
1910 if (rc) {
1911 dev_err(hdev->dev, "failed to stop TPC 4 CMDQ\n");
1912 retval = -EIO;
1913 }
1914
1915 rc = goya_stop_queue(hdev,
1916 mmTPC5_QM_GLBL_CFG1,
1917 mmTPC5_QM_CP_STS,
1918 mmTPC5_QM_GLBL_STS0);
1919
1920 if (rc) {
1921 dev_err(hdev->dev, "failed to stop TPC 5 QMAN\n");
1922 retval = -EIO;
1923 }
1924
1925 rc = goya_stop_queue(hdev,
1926 mmTPC5_CMDQ_GLBL_CFG1,
1927 mmTPC5_CMDQ_CP_STS,
1928 mmTPC5_CMDQ_GLBL_STS0);
1929
1930 if (rc) {
1931 dev_err(hdev->dev, "failed to stop TPC 5 CMDQ\n");
1932 retval = -EIO;
1933 }
1934
1935 rc = goya_stop_queue(hdev,
1936 mmTPC6_QM_GLBL_CFG1,
1937 mmTPC6_QM_CP_STS,
1938 mmTPC6_QM_GLBL_STS0);
1939
1940 if (rc) {
1941 dev_err(hdev->dev, "failed to stop TPC 6 QMAN\n");
1942 retval = -EIO;
1943 }
1944
1945 rc = goya_stop_queue(hdev,
1946 mmTPC6_CMDQ_GLBL_CFG1,
1947 mmTPC6_CMDQ_CP_STS,
1948 mmTPC6_CMDQ_GLBL_STS0);
1949
1950 if (rc) {
1951 dev_err(hdev->dev, "failed to stop TPC 6 CMDQ\n");
1952 retval = -EIO;
1953 }
1954
1955 rc = goya_stop_queue(hdev,
1956 mmTPC7_QM_GLBL_CFG1,
1957 mmTPC7_QM_CP_STS,
1958 mmTPC7_QM_GLBL_STS0);
1959
1960 if (rc) {
1961 dev_err(hdev->dev, "failed to stop TPC 7 QMAN\n");
1962 retval = -EIO;
1963 }
1964
1965 rc = goya_stop_queue(hdev,
1966 mmTPC7_CMDQ_GLBL_CFG1,
1967 mmTPC7_CMDQ_CP_STS,
1968 mmTPC7_CMDQ_GLBL_STS0);
1969
1970 if (rc) {
1971 dev_err(hdev->dev, "failed to stop TPC 7 CMDQ\n");
1972 retval = -EIO;
1973 }
1974
1975 return retval;
1976 }
1977
1978 static void goya_dma_stall(struct hl_device *hdev)
1979 {
1980 struct goya_device *goya = hdev->asic_specific;
1981
1982 if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1983 return;
1984
1985 WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
1986 WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT);
1987 WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT);
1988 WREG32(mmDMA_QM_3_GLBL_CFG1, 1 << DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT);
1989 WREG32(mmDMA_QM_4_GLBL_CFG1, 1 << DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT);
1990 }
1991
1992 static void goya_tpc_stall(struct hl_device *hdev)
1993 {
1994 struct goya_device *goya = hdev->asic_specific;
1995
1996 if (!(goya->hw_cap_initialized & HW_CAP_TPC))
1997 return;
1998
1999 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2000 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT);
2001 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT);
2002 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC3_CFG_TPC_STALL_V_SHIFT);
2003 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC4_CFG_TPC_STALL_V_SHIFT);
2004 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC5_CFG_TPC_STALL_V_SHIFT);
2005 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC6_CFG_TPC_STALL_V_SHIFT);
2006 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC7_CFG_TPC_STALL_V_SHIFT);
2007 }
2008
2009 static void goya_mme_stall(struct hl_device *hdev)
2010 {
2011 struct goya_device *goya = hdev->asic_specific;
2012
2013 if (!(goya->hw_cap_initialized & HW_CAP_MME))
2014 return;
2015
2016 WREG32(mmMME_STALL, 0xFFFFFFFF);
2017 }
2018
2019 static int goya_enable_msix(struct hl_device *hdev)
2020 {
2021 struct goya_device *goya = hdev->asic_specific;
2022 int cq_cnt = hdev->asic_prop.completion_queues_count;
2023 int rc, i, irq_cnt_init, irq;
2024
2025 if (goya->hw_cap_initialized & HW_CAP_MSIX)
2026 return 0;
2027
2028 rc = pci_alloc_irq_vectors(hdev->pdev, GOYA_MSIX_ENTRIES,
2029 GOYA_MSIX_ENTRIES, PCI_IRQ_MSIX);
2030 if (rc < 0) {
2031 dev_err(hdev->dev,
2032 "MSI-X: Failed to enable support -- %d/%d\n",
2033 GOYA_MSIX_ENTRIES, rc);
2034 return rc;
2035 }
2036
2037 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2038 irq = pci_irq_vector(hdev->pdev, i);
2039 rc = request_irq(irq, hl_irq_handler_cq, 0, goya_irq_name[i],
2040 &hdev->completion_queue[i]);
2041 if (rc) {
2042 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2043 goto free_irqs;
2044 }
2045 }
2046
2047 irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2048
2049 rc = request_irq(irq, hl_irq_handler_eq, 0,
2050 goya_irq_name[GOYA_EVENT_QUEUE_MSIX_IDX],
2051 &hdev->event_queue);
2052 if (rc) {
2053 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2054 goto free_irqs;
2055 }
2056
2057 goya->hw_cap_initialized |= HW_CAP_MSIX;
2058 return 0;
2059
2060 free_irqs:
2061 for (i = 0 ; i < irq_cnt_init ; i++)
2062 free_irq(pci_irq_vector(hdev->pdev, i),
2063 &hdev->completion_queue[i]);
2064
2065 pci_free_irq_vectors(hdev->pdev);
2066 return rc;
2067 }
2068
2069 static void goya_sync_irqs(struct hl_device *hdev)
2070 {
2071 struct goya_device *goya = hdev->asic_specific;
2072 int i;
2073
2074 if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2075 return;
2076
2077
2078 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
2079 synchronize_irq(pci_irq_vector(hdev->pdev, i));
2080
2081 synchronize_irq(pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX));
2082 }
2083
2084 static void goya_disable_msix(struct hl_device *hdev)
2085 {
2086 struct goya_device *goya = hdev->asic_specific;
2087 int i, irq;
2088
2089 if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2090 return;
2091
2092 goya_sync_irqs(hdev);
2093
2094 irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2095 free_irq(irq, &hdev->event_queue);
2096
2097 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
2098 irq = pci_irq_vector(hdev->pdev, i);
2099 free_irq(irq, &hdev->completion_queue[i]);
2100 }
2101
2102 pci_free_irq_vectors(hdev->pdev);
2103
2104 goya->hw_cap_initialized &= ~HW_CAP_MSIX;
2105 }
2106
2107 static void goya_enable_timestamp(struct hl_device *hdev)
2108 {
2109
2110 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2111
2112
2113 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2114 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2115
2116
2117 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2118 }
2119
2120 static void goya_disable_timestamp(struct hl_device *hdev)
2121 {
2122
2123 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2124 }
2125
2126 static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
2127 {
2128 u32 wait_timeout_ms, cpu_timeout_ms;
2129
2130 dev_info(hdev->dev,
2131 "Halting compute engines and disabling interrupts\n");
2132
2133 if (hdev->pldm) {
2134 wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2135 cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2136 } else {
2137 wait_timeout_ms = GOYA_RESET_WAIT_MSEC;
2138 cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
2139 }
2140
2141 if (hard_reset) {
2142
2143
2144
2145
2146 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
2147 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2148 GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
2149 msleep(cpu_timeout_ms);
2150 }
2151
2152 goya_stop_external_queues(hdev);
2153 goya_stop_internal_queues(hdev);
2154
2155 msleep(wait_timeout_ms);
2156
2157 goya_dma_stall(hdev);
2158 goya_tpc_stall(hdev);
2159 goya_mme_stall(hdev);
2160
2161 msleep(wait_timeout_ms);
2162
2163 goya_disable_external_queues(hdev);
2164 goya_disable_internal_queues(hdev);
2165
2166 goya_disable_timestamp(hdev);
2167
2168 if (hard_reset) {
2169 goya_disable_msix(hdev);
2170 goya_mmu_remove_device_cpu_mappings(hdev);
2171 } else {
2172 goya_sync_irqs(hdev);
2173 }
2174 }
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184 static int goya_push_uboot_to_device(struct hl_device *hdev)
2185 {
2186 char fw_name[200];
2187 void __iomem *dst;
2188
2189 snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-u-boot.bin");
2190 dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + UBOOT_FW_OFFSET;
2191
2192 return hl_fw_push_fw_to_device(hdev, fw_name, dst);
2193 }
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203 static int goya_push_linux_to_device(struct hl_device *hdev)
2204 {
2205 char fw_name[200];
2206 void __iomem *dst;
2207
2208 snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb");
2209 dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
2210
2211 return hl_fw_push_fw_to_device(hdev, fw_name, dst);
2212 }
2213
2214 static int goya_pldm_init_cpu(struct hl_device *hdev)
2215 {
2216 u32 unit_rst_val;
2217 int rc;
2218
2219
2220 goya_init_golden_registers(hdev);
2221
2222
2223 WREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL, CPU_RESET_ASSERT);
2224 RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL);
2225
2226
2227 unit_rst_val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
2228 WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, CA53_RESET);
2229 RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
2230 WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, unit_rst_val);
2231 RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
2232
2233 rc = goya_push_uboot_to_device(hdev);
2234 if (rc)
2235 return rc;
2236
2237 rc = goya_push_linux_to_device(hdev);
2238 if (rc)
2239 return rc;
2240
2241 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_FIT_RDY);
2242 WREG32(mmPSOC_GLOBAL_CONF_WARM_REBOOT, CPU_BOOT_STATUS_NA);
2243
2244 WREG32(mmCPU_CA53_CFG_RST_ADDR_LSB_0,
2245 lower_32_bits(SRAM_BASE_ADDR + UBOOT_FW_OFFSET));
2246 WREG32(mmCPU_CA53_CFG_RST_ADDR_MSB_0,
2247 upper_32_bits(SRAM_BASE_ADDR + UBOOT_FW_OFFSET));
2248
2249
2250 WREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL,
2251 CPU_RESET_CORE0_DEASSERT);
2252 RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL);
2253
2254 return 0;
2255 }
2256
2257
2258
2259
2260
2261 static void goya_read_device_fw_version(struct hl_device *hdev,
2262 enum goya_fw_component fwc)
2263 {
2264 const char *name;
2265 u32 ver_off;
2266 char *dest;
2267
2268 switch (fwc) {
2269 case FW_COMP_UBOOT:
2270 ver_off = RREG32(mmUBOOT_VER_OFFSET);
2271 dest = hdev->asic_prop.uboot_ver;
2272 name = "U-Boot";
2273 break;
2274 case FW_COMP_PREBOOT:
2275 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2276 dest = hdev->asic_prop.preboot_ver;
2277 name = "Preboot";
2278 break;
2279 default:
2280 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2281 return;
2282 }
2283
2284 ver_off &= ~((u32)SRAM_BASE_ADDR);
2285
2286 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2287 memcpy_fromio(dest, hdev->pcie_bar[SRAM_CFG_BAR_ID] + ver_off,
2288 VERSION_MAX_LEN);
2289 } else {
2290 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2291 name, ver_off);
2292 strcpy(dest, "unavailable");
2293 }
2294 }
2295
2296 static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout)
2297 {
2298 struct goya_device *goya = hdev->asic_specific;
2299 u32 status;
2300 int rc;
2301
2302 if (!hdev->cpu_enable)
2303 return 0;
2304
2305 if (goya->hw_cap_initialized & HW_CAP_CPU)
2306 return 0;
2307
2308
2309
2310
2311
2312 if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
2313 dev_err(hdev->dev,
2314 "failed to map DDR bar to DRAM base address\n");
2315 return -EIO;
2316 }
2317
2318 if (hdev->pldm) {
2319 rc = goya_pldm_init_cpu(hdev);
2320 if (rc)
2321 return rc;
2322
2323 goto out;
2324 }
2325
2326
2327 rc = hl_poll_timeout(
2328 hdev,
2329 mmPSOC_GLOBAL_CONF_WARM_REBOOT,
2330 status,
2331 (status == CPU_BOOT_STATUS_DRAM_RDY) ||
2332 (status == CPU_BOOT_STATUS_SRAM_AVAIL),
2333 10000,
2334 cpu_timeout);
2335
2336 if (rc) {
2337 dev_err(hdev->dev, "Error in ARM u-boot!");
2338 switch (status) {
2339 case CPU_BOOT_STATUS_NA:
2340 dev_err(hdev->dev,
2341 "ARM status %d - BTL did NOT run\n", status);
2342 break;
2343 case CPU_BOOT_STATUS_IN_WFE:
2344 dev_err(hdev->dev,
2345 "ARM status %d - Inside WFE loop\n", status);
2346 break;
2347 case CPU_BOOT_STATUS_IN_BTL:
2348 dev_err(hdev->dev,
2349 "ARM status %d - Stuck in BTL\n", status);
2350 break;
2351 case CPU_BOOT_STATUS_IN_PREBOOT:
2352 dev_err(hdev->dev,
2353 "ARM status %d - Stuck in Preboot\n", status);
2354 break;
2355 case CPU_BOOT_STATUS_IN_SPL:
2356 dev_err(hdev->dev,
2357 "ARM status %d - Stuck in SPL\n", status);
2358 break;
2359 case CPU_BOOT_STATUS_IN_UBOOT:
2360 dev_err(hdev->dev,
2361 "ARM status %d - Stuck in u-boot\n", status);
2362 break;
2363 case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
2364 dev_err(hdev->dev,
2365 "ARM status %d - DDR initialization failed\n",
2366 status);
2367 break;
2368 case CPU_BOOT_STATUS_UBOOT_NOT_READY:
2369 dev_err(hdev->dev,
2370 "ARM status %d - u-boot stopped by user\n",
2371 status);
2372 break;
2373 default:
2374 dev_err(hdev->dev,
2375 "ARM status %d - Invalid status code\n",
2376 status);
2377 break;
2378 }
2379 return -EIO;
2380 }
2381
2382
2383 goya_read_device_fw_version(hdev, FW_COMP_UBOOT);
2384 goya_read_device_fw_version(hdev, FW_COMP_PREBOOT);
2385
2386 if (!hdev->fw_loading) {
2387 dev_info(hdev->dev, "Skip loading FW\n");
2388 goto out;
2389 }
2390
2391 if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
2392 goto out;
2393
2394 rc = goya_push_linux_to_device(hdev);
2395 if (rc)
2396 return rc;
2397
2398 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_FIT_RDY);
2399
2400 rc = hl_poll_timeout(
2401 hdev,
2402 mmPSOC_GLOBAL_CONF_WARM_REBOOT,
2403 status,
2404 (status == CPU_BOOT_STATUS_SRAM_AVAIL),
2405 10000,
2406 cpu_timeout);
2407
2408 if (rc) {
2409 if (status == CPU_BOOT_STATUS_FIT_CORRUPTED)
2410 dev_err(hdev->dev,
2411 "ARM u-boot reports FIT image is corrupted\n");
2412 else
2413 dev_err(hdev->dev,
2414 "ARM Linux failed to load, %d\n", status);
2415 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_NA);
2416 return -EIO;
2417 }
2418
2419 dev_info(hdev->dev, "Successfully loaded firmware to device\n");
2420
2421 out:
2422 goya->hw_cap_initialized |= HW_CAP_CPU;
2423
2424 return 0;
2425 }
2426
2427 static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
2428 u64 phys_addr)
2429 {
2430 u32 status, timeout_usec;
2431 int rc;
2432
2433 if (hdev->pldm)
2434 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
2435 else
2436 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
2437
2438 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
2439 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
2440 WREG32(MMU_ASID_BUSY, 0x80000000 | asid);
2441
2442 rc = hl_poll_timeout(
2443 hdev,
2444 MMU_ASID_BUSY,
2445 status,
2446 !(status & 0x80000000),
2447 1000,
2448 timeout_usec);
2449
2450 if (rc) {
2451 dev_err(hdev->dev,
2452 "Timeout during MMU hop0 config of asid %d\n", asid);
2453 return rc;
2454 }
2455
2456 return 0;
2457 }
2458
2459 int goya_mmu_init(struct hl_device *hdev)
2460 {
2461 struct asic_fixed_properties *prop = &hdev->asic_prop;
2462 struct goya_device *goya = hdev->asic_specific;
2463 u64 hop0_addr;
2464 int rc, i;
2465
2466 if (!hdev->mmu_enable)
2467 return 0;
2468
2469 if (goya->hw_cap_initialized & HW_CAP_MMU)
2470 return 0;
2471
2472 hdev->dram_supports_virtual_memory = true;
2473 hdev->dram_default_page_mapping = true;
2474
2475 for (i = 0 ; i < prop->max_asid ; i++) {
2476 hop0_addr = prop->mmu_pgt_addr +
2477 (i * prop->mmu_hop_table_size);
2478
2479 rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2480 if (rc) {
2481 dev_err(hdev->dev,
2482 "failed to set hop0 addr for asid %d\n", i);
2483 goto err;
2484 }
2485 }
2486
2487 goya->hw_cap_initialized |= HW_CAP_MMU;
2488
2489
2490 WREG32(mmSTLB_CACHE_INV_BASE_39_8,
2491 lower_32_bits(MMU_CACHE_MNG_ADDR >> 8));
2492 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2493
2494
2495 WREG32_AND(mmSTLB_STLB_FEATURE_EN,
2496 (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
2497
2498 hdev->asic_funcs->mmu_invalidate_cache(hdev, true);
2499
2500 WREG32(mmMMU_MMU_ENABLE, 1);
2501 WREG32(mmMMU_SPI_MASK, 0xF);
2502
2503 return 0;
2504
2505 err:
2506 return rc;
2507 }
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517 static int goya_hw_init(struct hl_device *hdev)
2518 {
2519 struct asic_fixed_properties *prop = &hdev->asic_prop;
2520 int rc;
2521
2522 dev_info(hdev->dev, "Starting initialization of H/W\n");
2523
2524
2525 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2526
2527
2528
2529
2530
2531
2532
2533 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2534
2535 rc = goya_init_cpu(hdev, GOYA_CPU_TIMEOUT_USEC);
2536 if (rc) {
2537 dev_err(hdev->dev, "failed to initialize CPU\n");
2538 return rc;
2539 }
2540
2541 goya_tpc_mbist_workaround(hdev);
2542
2543 goya_init_golden_registers(hdev);
2544
2545
2546
2547
2548
2549 if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE +
2550 (MMU_PAGE_TABLES_ADDR &
2551 ~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) {
2552 dev_err(hdev->dev,
2553 "failed to map DDR bar to MMU page tables\n");
2554 return -EIO;
2555 }
2556
2557 rc = goya_mmu_init(hdev);
2558 if (rc)
2559 return rc;
2560
2561 goya_init_security(hdev);
2562
2563 goya_init_dma_qmans(hdev);
2564
2565 goya_init_mme_qmans(hdev);
2566
2567 goya_init_tpc_qmans(hdev);
2568
2569 goya_enable_timestamp(hdev);
2570
2571
2572 rc = goya_enable_msix(hdev);
2573 if (rc)
2574 goto disable_queues;
2575
2576
2577 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2578
2579 return 0;
2580
2581 disable_queues:
2582 goya_disable_internal_queues(hdev);
2583 goya_disable_external_queues(hdev);
2584
2585 return rc;
2586 }
2587
2588
2589
2590
2591
2592
2593
2594
2595 static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
2596 {
2597 struct goya_device *goya = hdev->asic_specific;
2598 u32 reset_timeout_ms, status;
2599
2600 if (hdev->pldm)
2601 reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC;
2602 else
2603 reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC;
2604
2605 if (hard_reset) {
2606 goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
2607 goya_disable_clk_rlx(hdev);
2608 goya_set_pll_refclk(hdev);
2609
2610 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, RESET_ALL);
2611 dev_info(hdev->dev,
2612 "Issued HARD reset command, going to wait %dms\n",
2613 reset_timeout_ms);
2614 } else {
2615 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, DMA_MME_TPC_RESET);
2616 dev_info(hdev->dev,
2617 "Issued SOFT reset command, going to wait %dms\n",
2618 reset_timeout_ms);
2619 }
2620
2621
2622
2623
2624
2625
2626 msleep(reset_timeout_ms);
2627
2628 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
2629 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
2630 dev_err(hdev->dev,
2631 "Timeout while waiting for device to reset 0x%x\n",
2632 status);
2633
2634 if (!hard_reset) {
2635 goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME |
2636 HW_CAP_GOLDEN | HW_CAP_TPC);
2637 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2638 GOYA_ASYNC_EVENT_ID_SOFT_RESET);
2639 return;
2640 }
2641
2642
2643 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START,
2644 1 << PSOC_GLOBAL_CONF_BOOT_SEQ_RE_START_IND_SHIFT);
2645
2646 WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM,
2647 0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT);
2648
2649 goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
2650 HW_CAP_DDR_0 | HW_CAP_DDR_1 |
2651 HW_CAP_DMA | HW_CAP_MME |
2652 HW_CAP_MMU | HW_CAP_TPC_MBIST |
2653 HW_CAP_GOLDEN | HW_CAP_TPC);
2654 memset(goya->events_stat, 0, sizeof(goya->events_stat));
2655
2656 if (!hdev->pldm) {
2657 int rc;
2658
2659
2660
2661
2662
2663
2664 dev_info(hdev->dev,
2665 "Going to wait up to %ds for CPU boot loader\n",
2666 GOYA_CPU_TIMEOUT_USEC / 1000 / 1000);
2667
2668 rc = hl_poll_timeout(
2669 hdev,
2670 mmPSOC_GLOBAL_CONF_WARM_REBOOT,
2671 status,
2672 (status == CPU_BOOT_STATUS_DRAM_RDY),
2673 10000,
2674 GOYA_CPU_TIMEOUT_USEC);
2675 if (rc)
2676 dev_err(hdev->dev,
2677 "failed to wait for CPU boot loader\n");
2678 }
2679 }
2680
2681 int goya_suspend(struct hl_device *hdev)
2682 {
2683 int rc;
2684
2685 rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
2686 if (rc)
2687 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
2688
2689 return rc;
2690 }
2691
2692 int goya_resume(struct hl_device *hdev)
2693 {
2694 return goya_init_iatu(hdev);
2695 }
2696
2697 static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
2698 u64 kaddress, phys_addr_t paddress, u32 size)
2699 {
2700 int rc;
2701
2702 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
2703 VM_DONTCOPY | VM_NORESERVE;
2704
2705 rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT,
2706 size, vma->vm_page_prot);
2707 if (rc)
2708 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
2709
2710 return rc;
2711 }
2712
2713 void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
2714 {
2715 u32 db_reg_offset, db_value;
2716
2717 switch (hw_queue_id) {
2718 case GOYA_QUEUE_ID_DMA_0:
2719 db_reg_offset = mmDMA_QM_0_PQ_PI;
2720 break;
2721
2722 case GOYA_QUEUE_ID_DMA_1:
2723 db_reg_offset = mmDMA_QM_1_PQ_PI;
2724 break;
2725
2726 case GOYA_QUEUE_ID_DMA_2:
2727 db_reg_offset = mmDMA_QM_2_PQ_PI;
2728 break;
2729
2730 case GOYA_QUEUE_ID_DMA_3:
2731 db_reg_offset = mmDMA_QM_3_PQ_PI;
2732 break;
2733
2734 case GOYA_QUEUE_ID_DMA_4:
2735 db_reg_offset = mmDMA_QM_4_PQ_PI;
2736 break;
2737
2738 case GOYA_QUEUE_ID_CPU_PQ:
2739 db_reg_offset = mmCPU_IF_PF_PQ_PI;
2740 break;
2741
2742 case GOYA_QUEUE_ID_MME:
2743 db_reg_offset = mmMME_QM_PQ_PI;
2744 break;
2745
2746 case GOYA_QUEUE_ID_TPC0:
2747 db_reg_offset = mmTPC0_QM_PQ_PI;
2748 break;
2749
2750 case GOYA_QUEUE_ID_TPC1:
2751 db_reg_offset = mmTPC1_QM_PQ_PI;
2752 break;
2753
2754 case GOYA_QUEUE_ID_TPC2:
2755 db_reg_offset = mmTPC2_QM_PQ_PI;
2756 break;
2757
2758 case GOYA_QUEUE_ID_TPC3:
2759 db_reg_offset = mmTPC3_QM_PQ_PI;
2760 break;
2761
2762 case GOYA_QUEUE_ID_TPC4:
2763 db_reg_offset = mmTPC4_QM_PQ_PI;
2764 break;
2765
2766 case GOYA_QUEUE_ID_TPC5:
2767 db_reg_offset = mmTPC5_QM_PQ_PI;
2768 break;
2769
2770 case GOYA_QUEUE_ID_TPC6:
2771 db_reg_offset = mmTPC6_QM_PQ_PI;
2772 break;
2773
2774 case GOYA_QUEUE_ID_TPC7:
2775 db_reg_offset = mmTPC7_QM_PQ_PI;
2776 break;
2777
2778 default:
2779
2780 dev_err(hdev->dev, "H/W queue %d is invalid. Can't set pi\n",
2781 hw_queue_id);
2782 return;
2783 }
2784
2785 db_value = pi;
2786
2787
2788 WREG32(db_reg_offset, db_value);
2789
2790 if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ)
2791 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2792 GOYA_ASYNC_EVENT_ID_PI_UPDATE);
2793 }
2794
2795 void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
2796 {
2797
2798 memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd));
2799 }
2800
2801 static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
2802 dma_addr_t *dma_handle, gfp_t flags)
2803 {
2804 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
2805 dma_handle, flags);
2806
2807
2808 if (kernel_addr)
2809 *dma_handle += HOST_PHYS_BASE;
2810
2811 return kernel_addr;
2812 }
2813
2814 static void goya_dma_free_coherent(struct hl_device *hdev, size_t size,
2815 void *cpu_addr, dma_addr_t dma_handle)
2816 {
2817
2818 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
2819
2820 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
2821 }
2822
2823 void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
2824 dma_addr_t *dma_handle, u16 *queue_len)
2825 {
2826 void *base;
2827 u32 offset;
2828
2829 *dma_handle = hdev->asic_prop.sram_base_address;
2830
2831 base = (void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
2832
2833 switch (queue_id) {
2834 case GOYA_QUEUE_ID_MME:
2835 offset = MME_QMAN_BASE_OFFSET;
2836 *queue_len = MME_QMAN_LENGTH;
2837 break;
2838 case GOYA_QUEUE_ID_TPC0:
2839 offset = TPC0_QMAN_BASE_OFFSET;
2840 *queue_len = TPC_QMAN_LENGTH;
2841 break;
2842 case GOYA_QUEUE_ID_TPC1:
2843 offset = TPC1_QMAN_BASE_OFFSET;
2844 *queue_len = TPC_QMAN_LENGTH;
2845 break;
2846 case GOYA_QUEUE_ID_TPC2:
2847 offset = TPC2_QMAN_BASE_OFFSET;
2848 *queue_len = TPC_QMAN_LENGTH;
2849 break;
2850 case GOYA_QUEUE_ID_TPC3:
2851 offset = TPC3_QMAN_BASE_OFFSET;
2852 *queue_len = TPC_QMAN_LENGTH;
2853 break;
2854 case GOYA_QUEUE_ID_TPC4:
2855 offset = TPC4_QMAN_BASE_OFFSET;
2856 *queue_len = TPC_QMAN_LENGTH;
2857 break;
2858 case GOYA_QUEUE_ID_TPC5:
2859 offset = TPC5_QMAN_BASE_OFFSET;
2860 *queue_len = TPC_QMAN_LENGTH;
2861 break;
2862 case GOYA_QUEUE_ID_TPC6:
2863 offset = TPC6_QMAN_BASE_OFFSET;
2864 *queue_len = TPC_QMAN_LENGTH;
2865 break;
2866 case GOYA_QUEUE_ID_TPC7:
2867 offset = TPC7_QMAN_BASE_OFFSET;
2868 *queue_len = TPC_QMAN_LENGTH;
2869 break;
2870 default:
2871 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
2872 return NULL;
2873 }
2874
2875 base += offset;
2876 *dma_handle += offset;
2877
2878 return base;
2879 }
2880
2881 static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
2882 {
2883 struct packet_msg_prot *fence_pkt;
2884 u32 *fence_ptr;
2885 dma_addr_t fence_dma_addr;
2886 struct hl_cb *cb;
2887 u32 tmp, timeout;
2888 int rc;
2889
2890 if (hdev->pldm)
2891 timeout = GOYA_PLDM_QMAN0_TIMEOUT_USEC;
2892 else
2893 timeout = HL_DEVICE_TIMEOUT_USEC;
2894
2895 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
2896 dev_err_ratelimited(hdev->dev,
2897 "Can't send driver job on QMAN0 because the device is not idle\n");
2898 return -EBUSY;
2899 }
2900
2901 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
2902 &fence_dma_addr);
2903 if (!fence_ptr) {
2904 dev_err(hdev->dev,
2905 "Failed to allocate fence memory for QMAN0\n");
2906 return -ENOMEM;
2907 }
2908
2909 goya_qman0_set_security(hdev, true);
2910
2911 cb = job->patched_cb;
2912
2913 fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
2914 job->job_cb_size - sizeof(struct packet_msg_prot));
2915
2916 tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
2917 (1 << GOYA_PKT_CTL_EB_SHIFT) |
2918 (1 << GOYA_PKT_CTL_MB_SHIFT);
2919 fence_pkt->ctl = cpu_to_le32(tmp);
2920 fence_pkt->value = cpu_to_le32(GOYA_QMAN0_FENCE_VAL);
2921 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
2922
2923 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_DMA_0,
2924 job->job_cb_size, cb->bus_address);
2925 if (rc) {
2926 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
2927 goto free_fence_ptr;
2928 }
2929
2930 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
2931 (tmp == GOYA_QMAN0_FENCE_VAL), 1000,
2932 timeout, true);
2933
2934 hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
2935
2936 if (rc == -ETIMEDOUT) {
2937 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
2938 goto free_fence_ptr;
2939 }
2940
2941 free_fence_ptr:
2942 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
2943 fence_dma_addr);
2944
2945 goya_qman0_set_security(hdev, false);
2946
2947 return rc;
2948 }
2949
2950 int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
2951 u32 timeout, long *result)
2952 {
2953 struct goya_device *goya = hdev->asic_specific;
2954
2955 if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) {
2956 if (result)
2957 *result = 0;
2958 return 0;
2959 }
2960
2961 return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len,
2962 timeout, result);
2963 }
2964
2965 int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
2966 {
2967 struct packet_msg_prot *fence_pkt;
2968 dma_addr_t pkt_dma_addr;
2969 u32 fence_val, tmp;
2970 dma_addr_t fence_dma_addr;
2971 u32 *fence_ptr;
2972 int rc;
2973
2974 fence_val = GOYA_QMAN0_FENCE_VAL;
2975
2976 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
2977 &fence_dma_addr);
2978 if (!fence_ptr) {
2979 dev_err(hdev->dev,
2980 "Failed to allocate memory for queue testing\n");
2981 return -ENOMEM;
2982 }
2983
2984 *fence_ptr = 0;
2985
2986 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
2987 sizeof(struct packet_msg_prot),
2988 GFP_KERNEL, &pkt_dma_addr);
2989 if (!fence_pkt) {
2990 dev_err(hdev->dev,
2991 "Failed to allocate packet for queue testing\n");
2992 rc = -ENOMEM;
2993 goto free_fence_ptr;
2994 }
2995
2996 tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
2997 (1 << GOYA_PKT_CTL_EB_SHIFT) |
2998 (1 << GOYA_PKT_CTL_MB_SHIFT);
2999 fence_pkt->ctl = cpu_to_le32(tmp);
3000 fence_pkt->value = cpu_to_le32(fence_val);
3001 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3002
3003 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3004 sizeof(struct packet_msg_prot),
3005 pkt_dma_addr);
3006 if (rc) {
3007 dev_err(hdev->dev,
3008 "Failed to send fence packet\n");
3009 goto free_pkt;
3010 }
3011
3012 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3013 1000, GOYA_TEST_QUEUE_WAIT_USEC, true);
3014
3015 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3016
3017 if (rc == -ETIMEDOUT) {
3018 dev_err(hdev->dev,
3019 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3020 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3021 rc = -EIO;
3022 } else {
3023 dev_info(hdev->dev, "queue test on H/W queue %d succeeded\n",
3024 hw_queue_id);
3025 }
3026
3027 free_pkt:
3028 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3029 pkt_dma_addr);
3030 free_fence_ptr:
3031 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3032 fence_dma_addr);
3033 return rc;
3034 }
3035
3036 int goya_test_cpu_queue(struct hl_device *hdev)
3037 {
3038 struct goya_device *goya = hdev->asic_specific;
3039
3040
3041
3042
3043
3044 if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
3045 return 0;
3046
3047 return hl_fw_test_cpu_queue(hdev);
3048 }
3049
3050 int goya_test_queues(struct hl_device *hdev)
3051 {
3052 int i, rc, ret_val = 0;
3053
3054 for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
3055 rc = goya_test_queue(hdev, i);
3056 if (rc)
3057 ret_val = -EINVAL;
3058 }
3059
3060 return ret_val;
3061 }
3062
3063 static void *goya_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3064 gfp_t mem_flags, dma_addr_t *dma_handle)
3065 {
3066 void *kernel_addr;
3067
3068 if (size > GOYA_DMA_POOL_BLK_SIZE)
3069 return NULL;
3070
3071 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3072
3073
3074 if (kernel_addr)
3075 *dma_handle += HOST_PHYS_BASE;
3076
3077 return kernel_addr;
3078 }
3079
3080 static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
3081 dma_addr_t dma_addr)
3082 {
3083
3084 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3085
3086 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3087 }
3088
3089 void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
3090 dma_addr_t *dma_handle)
3091 {
3092 void *vaddr;
3093
3094 vaddr = hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3095 *dma_handle = (*dma_handle) - hdev->cpu_accessible_dma_address +
3096 VA_CPU_ACCESSIBLE_MEM_ADDR;
3097
3098 return vaddr;
3099 }
3100
3101 void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
3102 void *vaddr)
3103 {
3104 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3105 }
3106
3107 static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3108 int nents, enum dma_data_direction dir)
3109 {
3110 struct scatterlist *sg;
3111 int i;
3112
3113 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3114 return -ENOMEM;
3115
3116
3117 for_each_sg(sgl, sg, nents, i)
3118 sg->dma_address += HOST_PHYS_BASE;
3119
3120 return 0;
3121 }
3122
3123 static void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3124 int nents, enum dma_data_direction dir)
3125 {
3126 struct scatterlist *sg;
3127 int i;
3128
3129
3130 for_each_sg(sgl, sg, nents, i)
3131 sg->dma_address -= HOST_PHYS_BASE;
3132
3133 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3134 }
3135
3136 u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
3137 {
3138 struct scatterlist *sg, *sg_next_iter;
3139 u32 count, dma_desc_cnt;
3140 u64 len, len_next;
3141 dma_addr_t addr, addr_next;
3142
3143 dma_desc_cnt = 0;
3144
3145 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3146
3147 len = sg_dma_len(sg);
3148 addr = sg_dma_address(sg);
3149
3150 if (len == 0)
3151 break;
3152
3153 while ((count + 1) < sgt->nents) {
3154 sg_next_iter = sg_next(sg);
3155 len_next = sg_dma_len(sg_next_iter);
3156 addr_next = sg_dma_address(sg_next_iter);
3157
3158 if (len_next == 0)
3159 break;
3160
3161 if ((addr + len == addr_next) &&
3162 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3163 len += len_next;
3164 count++;
3165 sg = sg_next_iter;
3166 } else {
3167 break;
3168 }
3169 }
3170
3171 dma_desc_cnt++;
3172 }
3173
3174 return dma_desc_cnt * sizeof(struct packet_lin_dma);
3175 }
3176
3177 static int goya_pin_memory_before_cs(struct hl_device *hdev,
3178 struct hl_cs_parser *parser,
3179 struct packet_lin_dma *user_dma_pkt,
3180 u64 addr, enum dma_data_direction dir)
3181 {
3182 struct hl_userptr *userptr;
3183 int rc;
3184
3185 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3186 parser->job_userptr_list, &userptr))
3187 goto already_pinned;
3188
3189 userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3190 if (!userptr)
3191 return -ENOMEM;
3192
3193 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3194 userptr);
3195 if (rc)
3196 goto free_userptr;
3197
3198 list_add_tail(&userptr->job_node, parser->job_userptr_list);
3199
3200 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3201 userptr->sgt->nents, dir);
3202 if (rc) {
3203 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3204 goto unpin_memory;
3205 }
3206
3207 userptr->dma_mapped = true;
3208 userptr->dir = dir;
3209
3210 already_pinned:
3211 parser->patched_cb_size +=
3212 goya_get_dma_desc_list_size(hdev, userptr->sgt);
3213
3214 return 0;
3215
3216 unpin_memory:
3217 hl_unpin_host_memory(hdev, userptr);
3218 free_userptr:
3219 kfree(userptr);
3220 return rc;
3221 }
3222
3223 static int goya_validate_dma_pkt_host(struct hl_device *hdev,
3224 struct hl_cs_parser *parser,
3225 struct packet_lin_dma *user_dma_pkt)
3226 {
3227 u64 device_memory_addr, addr;
3228 enum dma_data_direction dir;
3229 enum goya_dma_direction user_dir;
3230 bool sram_addr = true;
3231 bool skip_host_mem_pin = false;
3232 bool user_memset;
3233 u32 ctl;
3234 int rc = 0;
3235
3236 ctl = le32_to_cpu(user_dma_pkt->ctl);
3237
3238 user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3239 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3240
3241 user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3242 GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3243
3244 switch (user_dir) {
3245 case DMA_HOST_TO_DRAM:
3246 dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n");
3247 dir = DMA_TO_DEVICE;
3248 sram_addr = false;
3249 addr = le64_to_cpu(user_dma_pkt->src_addr);
3250 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3251 if (user_memset)
3252 skip_host_mem_pin = true;
3253 break;
3254
3255 case DMA_DRAM_TO_HOST:
3256 dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n");
3257 dir = DMA_FROM_DEVICE;
3258 sram_addr = false;
3259 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3260 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3261 break;
3262
3263 case DMA_HOST_TO_SRAM:
3264 dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n");
3265 dir = DMA_TO_DEVICE;
3266 addr = le64_to_cpu(user_dma_pkt->src_addr);
3267 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3268 if (user_memset)
3269 skip_host_mem_pin = true;
3270 break;
3271
3272 case DMA_SRAM_TO_HOST:
3273 dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n");
3274 dir = DMA_FROM_DEVICE;
3275 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3276 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3277 break;
3278 default:
3279 dev_err(hdev->dev, "DMA direction is undefined\n");
3280 return -EFAULT;
3281 }
3282
3283 if (sram_addr) {
3284 if (!hl_mem_area_inside_range(device_memory_addr,
3285 le32_to_cpu(user_dma_pkt->tsize),
3286 hdev->asic_prop.sram_user_base_address,
3287 hdev->asic_prop.sram_end_address)) {
3288
3289 dev_err(hdev->dev,
3290 "SRAM address 0x%llx + 0x%x is invalid\n",
3291 device_memory_addr,
3292 user_dma_pkt->tsize);
3293 return -EFAULT;
3294 }
3295 } else {
3296 if (!hl_mem_area_inside_range(device_memory_addr,
3297 le32_to_cpu(user_dma_pkt->tsize),
3298 hdev->asic_prop.dram_user_base_address,
3299 hdev->asic_prop.dram_end_address)) {
3300
3301 dev_err(hdev->dev,
3302 "DRAM address 0x%llx + 0x%x is invalid\n",
3303 device_memory_addr,
3304 user_dma_pkt->tsize);
3305 return -EFAULT;
3306 }
3307 }
3308
3309 if (skip_host_mem_pin)
3310 parser->patched_cb_size += sizeof(*user_dma_pkt);
3311 else {
3312 if ((dir == DMA_TO_DEVICE) &&
3313 (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1)) {
3314 dev_err(hdev->dev,
3315 "Can't DMA from host on queue other then 1\n");
3316 return -EFAULT;
3317 }
3318
3319 rc = goya_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3320 addr, dir);
3321 }
3322
3323 return rc;
3324 }
3325
3326 static int goya_validate_dma_pkt_no_host(struct hl_device *hdev,
3327 struct hl_cs_parser *parser,
3328 struct packet_lin_dma *user_dma_pkt)
3329 {
3330 u64 sram_memory_addr, dram_memory_addr;
3331 enum goya_dma_direction user_dir;
3332 u32 ctl;
3333
3334 ctl = le32_to_cpu(user_dma_pkt->ctl);
3335 user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3336 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3337
3338 if (user_dir == DMA_DRAM_TO_SRAM) {
3339 dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n");
3340 dram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3341 sram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3342 } else {
3343 dev_dbg(hdev->dev, "DMA direction is SRAM --> DRAM\n");
3344 sram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3345 dram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3346 }
3347
3348 if (!hl_mem_area_inside_range(sram_memory_addr,
3349 le32_to_cpu(user_dma_pkt->tsize),
3350 hdev->asic_prop.sram_user_base_address,
3351 hdev->asic_prop.sram_end_address)) {
3352 dev_err(hdev->dev, "SRAM address 0x%llx + 0x%x is invalid\n",
3353 sram_memory_addr, user_dma_pkt->tsize);
3354 return -EFAULT;
3355 }
3356
3357 if (!hl_mem_area_inside_range(dram_memory_addr,
3358 le32_to_cpu(user_dma_pkt->tsize),
3359 hdev->asic_prop.dram_user_base_address,
3360 hdev->asic_prop.dram_end_address)) {
3361 dev_err(hdev->dev, "DRAM address 0x%llx + 0x%x is invalid\n",
3362 dram_memory_addr, user_dma_pkt->tsize);
3363 return -EFAULT;
3364 }
3365
3366 parser->patched_cb_size += sizeof(*user_dma_pkt);
3367
3368 return 0;
3369 }
3370
3371 static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3372 struct hl_cs_parser *parser,
3373 struct packet_lin_dma *user_dma_pkt)
3374 {
3375 enum goya_dma_direction user_dir;
3376 u32 ctl;
3377 int rc;
3378
3379 dev_dbg(hdev->dev, "DMA packet details:\n");
3380 dev_dbg(hdev->dev, "source == 0x%llx\n",
3381 le64_to_cpu(user_dma_pkt->src_addr));
3382 dev_dbg(hdev->dev, "destination == 0x%llx\n",
3383 le64_to_cpu(user_dma_pkt->dst_addr));
3384 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3385
3386 ctl = le32_to_cpu(user_dma_pkt->ctl);
3387 user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3388 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3389
3390
3391
3392
3393
3394 if (user_dma_pkt->tsize == 0) {
3395 dev_err(hdev->dev,
3396 "Got DMA with size 0, might reset the device\n");
3397 return -EINVAL;
3398 }
3399
3400 if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM))
3401 rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt);
3402 else
3403 rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt);
3404
3405 return rc;
3406 }
3407
3408 static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
3409 struct hl_cs_parser *parser,
3410 struct packet_lin_dma *user_dma_pkt)
3411 {
3412 dev_dbg(hdev->dev, "DMA packet details:\n");
3413 dev_dbg(hdev->dev, "source == 0x%llx\n",
3414 le64_to_cpu(user_dma_pkt->src_addr));
3415 dev_dbg(hdev->dev, "destination == 0x%llx\n",
3416 le64_to_cpu(user_dma_pkt->dst_addr));
3417 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3418
3419
3420
3421
3422
3423 if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
3424 hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
3425 le32_to_cpu(user_dma_pkt->tsize),
3426 hdev->asic_prop.va_space_host_start_address,
3427 hdev->asic_prop.va_space_host_end_address)) {
3428 dev_err(hdev->dev,
3429 "Can't DMA from host on queue other then 1\n");
3430 return -EFAULT;
3431 }
3432
3433 if (user_dma_pkt->tsize == 0) {
3434 dev_err(hdev->dev,
3435 "Got DMA with size 0, might reset the device\n");
3436 return -EINVAL;
3437 }
3438
3439 parser->patched_cb_size += sizeof(*user_dma_pkt);
3440
3441 return 0;
3442 }
3443
3444 static int goya_validate_wreg32(struct hl_device *hdev,
3445 struct hl_cs_parser *parser,
3446 struct packet_wreg32 *wreg_pkt)
3447 {
3448 struct goya_device *goya = hdev->asic_specific;
3449 u32 sob_start_addr, sob_end_addr;
3450 u16 reg_offset;
3451
3452 reg_offset = le32_to_cpu(wreg_pkt->ctl) &
3453 GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK;
3454
3455 dev_dbg(hdev->dev, "WREG32 packet details:\n");
3456 dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
3457 dev_dbg(hdev->dev, "value == 0x%x\n",
3458 le32_to_cpu(wreg_pkt->value));
3459
3460 if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) {
3461 dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
3462 reg_offset);
3463 return -EPERM;
3464 }
3465
3466
3467
3468
3469
3470
3471 if (goya->hw_cap_initialized & HW_CAP_MMU)
3472 return 0;
3473
3474 sob_start_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
3475 sob_end_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1023);
3476
3477 if ((le32_to_cpu(wreg_pkt->value) < sob_start_addr) ||
3478 (le32_to_cpu(wreg_pkt->value) > sob_end_addr)) {
3479
3480 dev_err(hdev->dev, "WREG32 packet with illegal value 0x%x\n",
3481 wreg_pkt->value);
3482 return -EPERM;
3483 }
3484
3485 return 0;
3486 }
3487
3488 static int goya_validate_cb(struct hl_device *hdev,
3489 struct hl_cs_parser *parser, bool is_mmu)
3490 {
3491 u32 cb_parsed_length = 0;
3492 int rc = 0;
3493
3494 parser->patched_cb_size = 0;
3495
3496
3497 while (cb_parsed_length < parser->user_cb_size) {
3498 enum packet_id pkt_id;
3499 u16 pkt_size;
3500 struct goya_packet *user_pkt;
3501
3502 user_pkt = (struct goya_packet *) (uintptr_t)
3503 (parser->user_cb->kernel_address + cb_parsed_length);
3504
3505 pkt_id = (enum packet_id) (
3506 (le64_to_cpu(user_pkt->header) &
3507 PACKET_HEADER_PACKET_ID_MASK) >>
3508 PACKET_HEADER_PACKET_ID_SHIFT);
3509
3510 pkt_size = goya_packet_sizes[pkt_id];
3511 cb_parsed_length += pkt_size;
3512 if (cb_parsed_length > parser->user_cb_size) {
3513 dev_err(hdev->dev,
3514 "packet 0x%x is out of CB boundary\n", pkt_id);
3515 rc = -EINVAL;
3516 break;
3517 }
3518
3519 switch (pkt_id) {
3520 case PACKET_WREG_32:
3521
3522
3523
3524
3525
3526 rc = goya_validate_wreg32(hdev,
3527 parser, (struct packet_wreg32 *) user_pkt);
3528 break;
3529
3530 case PACKET_WREG_BULK:
3531 dev_err(hdev->dev,
3532 "User not allowed to use WREG_BULK\n");
3533 rc = -EPERM;
3534 break;
3535
3536 case PACKET_MSG_PROT:
3537 dev_err(hdev->dev,
3538 "User not allowed to use MSG_PROT\n");
3539 rc = -EPERM;
3540 break;
3541
3542 case PACKET_CP_DMA:
3543 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3544 rc = -EPERM;
3545 break;
3546
3547 case PACKET_STOP:
3548 dev_err(hdev->dev, "User not allowed to use STOP\n");
3549 rc = -EPERM;
3550 break;
3551
3552 case PACKET_LIN_DMA:
3553 if (is_mmu)
3554 rc = goya_validate_dma_pkt_mmu(hdev, parser,
3555 (struct packet_lin_dma *) user_pkt);
3556 else
3557 rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
3558 (struct packet_lin_dma *) user_pkt);
3559 break;
3560
3561 case PACKET_MSG_LONG:
3562 case PACKET_MSG_SHORT:
3563 case PACKET_FENCE:
3564 case PACKET_NOP:
3565 parser->patched_cb_size += pkt_size;
3566 break;
3567
3568 default:
3569 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3570 pkt_id);
3571 rc = -EINVAL;
3572 break;
3573 }
3574
3575 if (rc)
3576 break;
3577 }
3578
3579
3580
3581
3582
3583
3584 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3585
3586 return rc;
3587 }
3588
3589 static int goya_patch_dma_packet(struct hl_device *hdev,
3590 struct hl_cs_parser *parser,
3591 struct packet_lin_dma *user_dma_pkt,
3592 struct packet_lin_dma *new_dma_pkt,
3593 u32 *new_dma_pkt_size)
3594 {
3595 struct hl_userptr *userptr;
3596 struct scatterlist *sg, *sg_next_iter;
3597 u32 count, dma_desc_cnt;
3598 u64 len, len_next;
3599 dma_addr_t dma_addr, dma_addr_next;
3600 enum goya_dma_direction user_dir;
3601 u64 device_memory_addr, addr;
3602 enum dma_data_direction dir;
3603 struct sg_table *sgt;
3604 bool skip_host_mem_pin = false;
3605 bool user_memset;
3606 u32 user_rdcomp_mask, user_wrcomp_mask, ctl;
3607
3608 ctl = le32_to_cpu(user_dma_pkt->ctl);
3609
3610 user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3611 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3612
3613 user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3614 GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3615
3616 if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM) ||
3617 (user_dma_pkt->tsize == 0)) {
3618 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt));
3619 *new_dma_pkt_size = sizeof(*new_dma_pkt);
3620 return 0;
3621 }
3622
3623 if ((user_dir == DMA_HOST_TO_DRAM) || (user_dir == DMA_HOST_TO_SRAM)) {
3624 addr = le64_to_cpu(user_dma_pkt->src_addr);
3625 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3626 dir = DMA_TO_DEVICE;
3627 if (user_memset)
3628 skip_host_mem_pin = true;
3629 } else {
3630 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3631 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3632 dir = DMA_FROM_DEVICE;
3633 }
3634
3635 if ((!skip_host_mem_pin) &&
3636 (hl_userptr_is_pinned(hdev, addr,
3637 le32_to_cpu(user_dma_pkt->tsize),
3638 parser->job_userptr_list, &userptr) == false)) {
3639 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3640 addr, user_dma_pkt->tsize);
3641 return -EFAULT;
3642 }
3643
3644 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3645 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3646 *new_dma_pkt_size = sizeof(*user_dma_pkt);
3647 return 0;
3648 }
3649
3650 user_rdcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK;
3651
3652 user_wrcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK;
3653
3654 sgt = userptr->sgt;
3655 dma_desc_cnt = 0;
3656
3657 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3658 len = sg_dma_len(sg);
3659 dma_addr = sg_dma_address(sg);
3660
3661 if (len == 0)
3662 break;
3663
3664 while ((count + 1) < sgt->nents) {
3665 sg_next_iter = sg_next(sg);
3666 len_next = sg_dma_len(sg_next_iter);
3667 dma_addr_next = sg_dma_address(sg_next_iter);
3668
3669 if (len_next == 0)
3670 break;
3671
3672 if ((dma_addr + len == dma_addr_next) &&
3673 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3674 len += len_next;
3675 count++;
3676 sg = sg_next_iter;
3677 } else {
3678 break;
3679 }
3680 }
3681
3682 ctl = le32_to_cpu(user_dma_pkt->ctl);
3683 if (likely(dma_desc_cnt))
3684 ctl &= ~GOYA_PKT_CTL_EB_MASK;
3685 ctl &= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK |
3686 GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
3687 new_dma_pkt->ctl = cpu_to_le32(ctl);
3688 new_dma_pkt->tsize = cpu_to_le32((u32) len);
3689
3690 if (dir == DMA_TO_DEVICE) {
3691 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3692 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3693 } else {
3694 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3695 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3696 }
3697
3698 if (!user_memset)
3699 device_memory_addr += len;
3700 dma_desc_cnt++;
3701 new_dma_pkt++;
3702 }
3703
3704 if (!dma_desc_cnt) {
3705 dev_err(hdev->dev,
3706 "Error of 0 SG entries when patching DMA packet\n");
3707 return -EFAULT;
3708 }
3709
3710
3711 new_dma_pkt--;
3712 new_dma_pkt->ctl |= cpu_to_le32(user_rdcomp_mask | user_wrcomp_mask);
3713
3714 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
3715
3716 return 0;
3717 }
3718
3719 static int goya_patch_cb(struct hl_device *hdev,
3720 struct hl_cs_parser *parser)
3721 {
3722 u32 cb_parsed_length = 0;
3723 u32 cb_patched_cur_length = 0;
3724 int rc = 0;
3725
3726
3727 while (cb_parsed_length < parser->user_cb_size) {
3728 enum packet_id pkt_id;
3729 u16 pkt_size;
3730 u32 new_pkt_size = 0;
3731 struct goya_packet *user_pkt, *kernel_pkt;
3732
3733 user_pkt = (struct goya_packet *) (uintptr_t)
3734 (parser->user_cb->kernel_address + cb_parsed_length);
3735 kernel_pkt = (struct goya_packet *) (uintptr_t)
3736 (parser->patched_cb->kernel_address +
3737 cb_patched_cur_length);
3738
3739 pkt_id = (enum packet_id) (
3740 (le64_to_cpu(user_pkt->header) &
3741 PACKET_HEADER_PACKET_ID_MASK) >>
3742 PACKET_HEADER_PACKET_ID_SHIFT);
3743
3744 pkt_size = goya_packet_sizes[pkt_id];
3745 cb_parsed_length += pkt_size;
3746 if (cb_parsed_length > parser->user_cb_size) {
3747 dev_err(hdev->dev,
3748 "packet 0x%x is out of CB boundary\n", pkt_id);
3749 rc = -EINVAL;
3750 break;
3751 }
3752
3753 switch (pkt_id) {
3754 case PACKET_LIN_DMA:
3755 rc = goya_patch_dma_packet(hdev, parser,
3756 (struct packet_lin_dma *) user_pkt,
3757 (struct packet_lin_dma *) kernel_pkt,
3758 &new_pkt_size);
3759 cb_patched_cur_length += new_pkt_size;
3760 break;
3761
3762 case PACKET_WREG_32:
3763 memcpy(kernel_pkt, user_pkt, pkt_size);
3764 cb_patched_cur_length += pkt_size;
3765 rc = goya_validate_wreg32(hdev, parser,
3766 (struct packet_wreg32 *) kernel_pkt);
3767 break;
3768
3769 case PACKET_WREG_BULK:
3770 dev_err(hdev->dev,
3771 "User not allowed to use WREG_BULK\n");
3772 rc = -EPERM;
3773 break;
3774
3775 case PACKET_MSG_PROT:
3776 dev_err(hdev->dev,
3777 "User not allowed to use MSG_PROT\n");
3778 rc = -EPERM;
3779 break;
3780
3781 case PACKET_CP_DMA:
3782 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3783 rc = -EPERM;
3784 break;
3785
3786 case PACKET_STOP:
3787 dev_err(hdev->dev, "User not allowed to use STOP\n");
3788 rc = -EPERM;
3789 break;
3790
3791 case PACKET_MSG_LONG:
3792 case PACKET_MSG_SHORT:
3793 case PACKET_FENCE:
3794 case PACKET_NOP:
3795 memcpy(kernel_pkt, user_pkt, pkt_size);
3796 cb_patched_cur_length += pkt_size;
3797 break;
3798
3799 default:
3800 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3801 pkt_id);
3802 rc = -EINVAL;
3803 break;
3804 }
3805
3806 if (rc)
3807 break;
3808 }
3809
3810 return rc;
3811 }
3812
3813 static int goya_parse_cb_mmu(struct hl_device *hdev,
3814 struct hl_cs_parser *parser)
3815 {
3816 u64 patched_cb_handle;
3817 u32 patched_cb_size;
3818 struct hl_cb *user_cb;
3819 int rc;
3820
3821
3822
3823
3824
3825
3826 parser->patched_cb_size = parser->user_cb_size +
3827 sizeof(struct packet_msg_prot) * 2;
3828
3829 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
3830 parser->patched_cb_size,
3831 &patched_cb_handle, HL_KERNEL_ASID_ID);
3832
3833 if (rc) {
3834 dev_err(hdev->dev,
3835 "Failed to allocate patched CB for DMA CS %d\n",
3836 rc);
3837 return rc;
3838 }
3839
3840 patched_cb_handle >>= PAGE_SHIFT;
3841 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
3842 (u32) patched_cb_handle);
3843
3844 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
3845 (u32) patched_cb_handle);
3846 if (!parser->patched_cb) {
3847 rc = -EFAULT;
3848 goto out;
3849 }
3850
3851
3852
3853
3854
3855 memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address,
3856 (void *) (uintptr_t) parser->user_cb->kernel_address,
3857 parser->user_cb_size);
3858
3859 patched_cb_size = parser->patched_cb_size;
3860
3861
3862 user_cb = parser->user_cb;
3863 parser->user_cb = parser->patched_cb;
3864 rc = goya_validate_cb(hdev, parser, true);
3865 parser->user_cb = user_cb;
3866
3867 if (rc) {
3868 hl_cb_put(parser->patched_cb);
3869 goto out;
3870 }
3871
3872 if (patched_cb_size != parser->patched_cb_size) {
3873 dev_err(hdev->dev, "user CB size mismatch\n");
3874 hl_cb_put(parser->patched_cb);
3875 rc = -EINVAL;
3876 goto out;
3877 }
3878
3879 out:
3880
3881
3882
3883
3884
3885
3886 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
3887 patched_cb_handle << PAGE_SHIFT);
3888
3889 return rc;
3890 }
3891
3892 static int goya_parse_cb_no_mmu(struct hl_device *hdev,
3893 struct hl_cs_parser *parser)
3894 {
3895 u64 patched_cb_handle;
3896 int rc;
3897
3898 rc = goya_validate_cb(hdev, parser, false);
3899
3900 if (rc)
3901 goto free_userptr;
3902
3903 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
3904 parser->patched_cb_size,
3905 &patched_cb_handle, HL_KERNEL_ASID_ID);
3906 if (rc) {
3907 dev_err(hdev->dev,
3908 "Failed to allocate patched CB for DMA CS %d\n", rc);
3909 goto free_userptr;
3910 }
3911
3912 patched_cb_handle >>= PAGE_SHIFT;
3913 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
3914 (u32) patched_cb_handle);
3915
3916 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
3917 (u32) patched_cb_handle);
3918 if (!parser->patched_cb) {
3919 rc = -EFAULT;
3920 goto out;
3921 }
3922
3923 rc = goya_patch_cb(hdev, parser);
3924
3925 if (rc)
3926 hl_cb_put(parser->patched_cb);
3927
3928 out:
3929
3930
3931
3932
3933
3934
3935 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
3936 patched_cb_handle << PAGE_SHIFT);
3937
3938 free_userptr:
3939 if (rc)
3940 hl_userptr_delete_list(hdev, parser->job_userptr_list);
3941 return rc;
3942 }
3943
3944 static int goya_parse_cb_no_ext_queue(struct hl_device *hdev,
3945 struct hl_cs_parser *parser)
3946 {
3947 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
3948 struct goya_device *goya = hdev->asic_specific;
3949
3950 if (goya->hw_cap_initialized & HW_CAP_MMU)
3951 return 0;
3952
3953
3954 if (hl_mem_area_inside_range(
3955 (u64) (uintptr_t) parser->user_cb,
3956 parser->user_cb_size,
3957 asic_prop->sram_user_base_address,
3958 asic_prop->sram_end_address))
3959 return 0;
3960
3961 if (hl_mem_area_inside_range(
3962 (u64) (uintptr_t) parser->user_cb,
3963 parser->user_cb_size,
3964 asic_prop->dram_user_base_address,
3965 asic_prop->dram_end_address))
3966 return 0;
3967
3968 dev_err(hdev->dev,
3969 "Internal CB address %px + 0x%x is not in SRAM nor in DRAM\n",
3970 parser->user_cb, parser->user_cb_size);
3971
3972 return -EFAULT;
3973 }
3974
3975 int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
3976 {
3977 struct goya_device *goya = hdev->asic_specific;
3978
3979 if (!parser->ext_queue)
3980 return goya_parse_cb_no_ext_queue(hdev, parser);
3981
3982 if (goya->hw_cap_initialized & HW_CAP_MMU)
3983 return goya_parse_cb_mmu(hdev, parser);
3984 else
3985 return goya_parse_cb_no_mmu(hdev, parser);
3986 }
3987
3988 void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
3989 u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec)
3990 {
3991 struct packet_msg_prot *cq_pkt;
3992 u32 tmp;
3993
3994 cq_pkt = (struct packet_msg_prot *) (uintptr_t)
3995 (kernel_address + len - (sizeof(struct packet_msg_prot) * 2));
3996
3997 tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3998 (1 << GOYA_PKT_CTL_EB_SHIFT) |
3999 (1 << GOYA_PKT_CTL_MB_SHIFT);
4000 cq_pkt->ctl = cpu_to_le32(tmp);
4001 cq_pkt->value = cpu_to_le32(cq_val);
4002 cq_pkt->addr = cpu_to_le64(cq_addr);
4003
4004 cq_pkt++;
4005
4006 tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
4007 (1 << GOYA_PKT_CTL_MB_SHIFT);
4008 cq_pkt->ctl = cpu_to_le32(tmp);
4009 cq_pkt->value = cpu_to_le32(msix_vec & 0x7FF);
4010 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF);
4011 }
4012
4013 void goya_update_eq_ci(struct hl_device *hdev, u32 val)
4014 {
4015 WREG32(mmCPU_EQ_CI, val);
4016 }
4017
4018 void goya_restore_phase_topology(struct hl_device *hdev)
4019 {
4020
4021 }
4022
4023 static void goya_clear_sm_regs(struct hl_device *hdev)
4024 {
4025 int i, num_of_sob_in_longs, num_of_mon_in_longs;
4026
4027 num_of_sob_in_longs =
4028 ((mmSYNC_MNGR_SOB_OBJ_1023 - mmSYNC_MNGR_SOB_OBJ_0) + 4);
4029
4030 num_of_mon_in_longs =
4031 ((mmSYNC_MNGR_MON_STATUS_255 - mmSYNC_MNGR_MON_STATUS_0) + 4);
4032
4033 for (i = 0 ; i < num_of_sob_in_longs ; i += 4)
4034 WREG32(mmSYNC_MNGR_SOB_OBJ_0 + i, 0);
4035
4036 for (i = 0 ; i < num_of_mon_in_longs ; i += 4)
4037 WREG32(mmSYNC_MNGR_MON_STATUS_0 + i, 0);
4038
4039
4040 i = RREG32(mmSYNC_MNGR_SOB_OBJ_0);
4041 }
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058 static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4059 {
4060 struct asic_fixed_properties *prop = &hdev->asic_prop;
4061 u64 ddr_bar_addr;
4062 int rc = 0;
4063
4064 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4065 *val = RREG32(addr - CFG_BASE);
4066
4067 } else if ((addr >= SRAM_BASE_ADDR) &&
4068 (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
4069
4070 *val = readl(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4071 (addr - SRAM_BASE_ADDR));
4072
4073 } else if ((addr >= DRAM_PHYS_BASE) &&
4074 (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size)) {
4075
4076 u64 bar_base_addr = DRAM_PHYS_BASE +
4077 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4078
4079 ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4080 if (ddr_bar_addr != U64_MAX) {
4081 *val = readl(hdev->pcie_bar[DDR_BAR_ID] +
4082 (addr - bar_base_addr));
4083
4084 ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4085 ddr_bar_addr);
4086 }
4087 if (ddr_bar_addr == U64_MAX)
4088 rc = -EIO;
4089
4090 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4091 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4092
4093 } else {
4094 rc = -EFAULT;
4095 }
4096
4097 return rc;
4098 }
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115 static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4116 {
4117 struct asic_fixed_properties *prop = &hdev->asic_prop;
4118 u64 ddr_bar_addr;
4119 int rc = 0;
4120
4121 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4122 WREG32(addr - CFG_BASE, val);
4123
4124 } else if ((addr >= SRAM_BASE_ADDR) &&
4125 (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
4126
4127 writel(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4128 (addr - SRAM_BASE_ADDR));
4129
4130 } else if ((addr >= DRAM_PHYS_BASE) &&
4131 (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size)) {
4132
4133 u64 bar_base_addr = DRAM_PHYS_BASE +
4134 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4135
4136 ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4137 if (ddr_bar_addr != U64_MAX) {
4138 writel(val, hdev->pcie_bar[DDR_BAR_ID] +
4139 (addr - bar_base_addr));
4140
4141 ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4142 ddr_bar_addr);
4143 }
4144 if (ddr_bar_addr == U64_MAX)
4145 rc = -EIO;
4146
4147 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4148 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4149
4150 } else {
4151 rc = -EFAULT;
4152 }
4153
4154 return rc;
4155 }
4156
4157 static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
4158 {
4159 struct goya_device *goya = hdev->asic_specific;
4160
4161 if (hdev->hard_reset_pending)
4162 return U64_MAX;
4163
4164 return readq(hdev->pcie_bar[DDR_BAR_ID] +
4165 (addr - goya->ddr_bar_cur_addr));
4166 }
4167
4168 static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4169 {
4170 struct goya_device *goya = hdev->asic_specific;
4171
4172 if (hdev->hard_reset_pending)
4173 return;
4174
4175 writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4176 (addr - goya->ddr_bar_cur_addr));
4177 }
4178
4179 static const char *_goya_get_event_desc(u16 event_type)
4180 {
4181 switch (event_type) {
4182 case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4183 return "PCIe_if";
4184 case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4185 case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4186 case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4187 case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4188 case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4189 case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4190 case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4191 case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4192 return "TPC%d_ecc";
4193 case GOYA_ASYNC_EVENT_ID_MME_ECC:
4194 return "MME_ecc";
4195 case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4196 return "MME_ecc_ext";
4197 case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4198 return "MMU_ecc";
4199 case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4200 return "DMA_macro";
4201 case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4202 return "DMA_ecc";
4203 case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4204 return "CPU_if_ecc";
4205 case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4206 return "PSOC_mem";
4207 case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4208 return "PSOC_coresight";
4209 case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4210 return "SRAM%d";
4211 case GOYA_ASYNC_EVENT_ID_GIC500:
4212 return "GIC500";
4213 case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4214 return "PLL%d";
4215 case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4216 return "AXI_ecc";
4217 case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4218 return "L2_ram_ecc";
4219 case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4220 return "PSOC_gpio_05_sw_reset";
4221 case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4222 return "PSOC_gpio_10_vrhot_icrit";
4223 case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4224 return "PCIe_dec";
4225 case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4226 case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4227 case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4228 case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4229 case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4230 case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4231 case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4232 case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4233 return "TPC%d_dec";
4234 case GOYA_ASYNC_EVENT_ID_MME_WACS:
4235 return "MME_wacs";
4236 case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4237 return "MME_wacsd";
4238 case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4239 return "CPU_axi_splitter";
4240 case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4241 return "PSOC_axi_dec";
4242 case GOYA_ASYNC_EVENT_ID_PSOC:
4243 return "PSOC";
4244 case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4245 case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4246 case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4247 case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4248 case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4249 case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4250 case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4251 case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4252 return "TPC%d_krn_err";
4253 case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4254 return "TPC%d_cq";
4255 case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4256 return "TPC%d_qm";
4257 case GOYA_ASYNC_EVENT_ID_MME_QM:
4258 return "MME_qm";
4259 case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4260 return "MME_cq";
4261 case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4262 return "DMA%d_qm";
4263 case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4264 return "DMA%d_ch";
4265 case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4266 case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4267 case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4268 case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4269 case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4270 case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4271 case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4272 case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4273 return "TPC%d_bmon_spmu";
4274 case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4275 return "DMA_bm_ch%d";
4276 default:
4277 return "N/A";
4278 }
4279 }
4280
4281 static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
4282 {
4283 u8 index;
4284
4285 switch (event_type) {
4286 case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4287 case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4288 case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4289 case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4290 case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4291 case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4292 case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4293 case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4294 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_ECC) / 3;
4295 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4296 break;
4297 case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4298 index = event_type - GOYA_ASYNC_EVENT_ID_SRAM0;
4299 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4300 break;
4301 case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4302 index = event_type - GOYA_ASYNC_EVENT_ID_PLL0;
4303 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4304 break;
4305 case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4306 case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4307 case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4308 case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4309 case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4310 case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4311 case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4312 case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4313 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_DEC) / 3;
4314 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4315 break;
4316 case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4317 case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4318 case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4319 case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4320 case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4321 case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4322 case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4323 case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4324 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR) / 10;
4325 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4326 break;
4327 case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4328 index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_CMDQ;
4329 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4330 break;
4331 case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4332 index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_QM;
4333 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4334 break;
4335 case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4336 index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_QM;
4337 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4338 break;
4339 case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4340 index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH;
4341 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4342 break;
4343 case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4344 case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4345 case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4346 case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4347 case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4348 case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4349 case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4350 case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4351 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU) / 10;
4352 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4353 break;
4354 case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4355 index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0;
4356 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4357 break;
4358 default:
4359 snprintf(desc, size, _goya_get_event_desc(event_type));
4360 break;
4361 }
4362 }
4363
4364 static void goya_print_razwi_info(struct hl_device *hdev)
4365 {
4366 if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) {
4367 dev_err(hdev->dev, "Illegal write to LBW\n");
4368 WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0);
4369 }
4370
4371 if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) {
4372 dev_err(hdev->dev, "Illegal read from LBW\n");
4373 WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0);
4374 }
4375
4376 if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) {
4377 dev_err(hdev->dev, "Illegal write to HBW\n");
4378 WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0);
4379 }
4380
4381 if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) {
4382 dev_err(hdev->dev, "Illegal read from HBW\n");
4383 WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0);
4384 }
4385 }
4386
4387 static void goya_print_mmu_error_info(struct hl_device *hdev)
4388 {
4389 struct goya_device *goya = hdev->asic_specific;
4390 u64 addr;
4391 u32 val;
4392
4393 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4394 return;
4395
4396 val = RREG32(mmMMU_PAGE_ERROR_CAPTURE);
4397 if (val & MMU_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
4398 addr = val & MMU_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
4399 addr <<= 32;
4400 addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA);
4401
4402 dev_err(hdev->dev, "MMU page fault on va 0x%llx\n", addr);
4403
4404 WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0);
4405 }
4406 }
4407
4408 static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
4409 bool razwi)
4410 {
4411 char desc[20] = "";
4412
4413 goya_get_event_desc(event_type, desc, sizeof(desc));
4414 dev_err(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
4415 event_type, desc);
4416
4417 if (razwi) {
4418 goya_print_razwi_info(hdev);
4419 goya_print_mmu_error_info(hdev);
4420 }
4421 }
4422
4423 static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
4424 size_t irq_arr_size)
4425 {
4426 struct armcp_unmask_irq_arr_packet *pkt;
4427 size_t total_pkt_size;
4428 long result;
4429 int rc;
4430 int irq_num_entries, irq_arr_index;
4431 __le32 *goya_irq_arr;
4432
4433 total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) +
4434 irq_arr_size;
4435
4436
4437 total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
4438
4439
4440 if (total_pkt_size > USHRT_MAX) {
4441 dev_err(hdev->dev, "too many elements in IRQ array\n");
4442 return -EINVAL;
4443 }
4444
4445 pkt = kzalloc(total_pkt_size, GFP_KERNEL);
4446 if (!pkt)
4447 return -ENOMEM;
4448
4449 irq_num_entries = irq_arr_size / sizeof(irq_arr[0]);
4450 pkt->length = cpu_to_le32(irq_num_entries);
4451
4452
4453
4454
4455 for (irq_arr_index = 0, goya_irq_arr = (__le32 *) &pkt->irqs;
4456 irq_arr_index < irq_num_entries ; irq_arr_index++)
4457 goya_irq_arr[irq_arr_index] =
4458 cpu_to_le32(irq_arr[irq_arr_index]);
4459
4460 pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
4461 ARMCP_PKT_CTL_OPCODE_SHIFT);
4462
4463 rc = goya_send_cpu_message(hdev, (u32 *) pkt, total_pkt_size,
4464 HL_DEVICE_TIMEOUT_USEC, &result);
4465
4466 if (rc)
4467 dev_err(hdev->dev, "failed to unmask IRQ array\n");
4468
4469 kfree(pkt);
4470
4471 return rc;
4472 }
4473
4474 static int goya_soft_reset_late_init(struct hl_device *hdev)
4475 {
4476
4477
4478
4479
4480 return goya_unmask_irq_arr(hdev, goya_all_events,
4481 sizeof(goya_all_events));
4482 }
4483
4484 static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
4485 {
4486 struct armcp_packet pkt;
4487 long result;
4488 int rc;
4489
4490 memset(&pkt, 0, sizeof(pkt));
4491
4492 pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ <<
4493 ARMCP_PKT_CTL_OPCODE_SHIFT);
4494 pkt.value = cpu_to_le64(event_type);
4495
4496 rc = goya_send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
4497 HL_DEVICE_TIMEOUT_USEC, &result);
4498
4499 if (rc)
4500 dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
4501
4502 return rc;
4503 }
4504
4505 void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
4506 {
4507 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
4508 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
4509 >> EQ_CTL_EVENT_TYPE_SHIFT);
4510 struct goya_device *goya = hdev->asic_specific;
4511
4512 goya->events_stat[event_type]++;
4513 goya->events_stat_aggregate[event_type]++;
4514
4515 switch (event_type) {
4516 case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4517 case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4518 case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4519 case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4520 case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4521 case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4522 case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4523 case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4524 case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4525 case GOYA_ASYNC_EVENT_ID_MME_ECC:
4526 case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4527 case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4528 case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4529 case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4530 case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4531 case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4532 case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4533 case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4534 case GOYA_ASYNC_EVENT_ID_GIC500:
4535 case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4536 case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4537 case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4538 case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4539 goya_print_irq_info(hdev, event_type, false);
4540 hl_device_reset(hdev, true, false);
4541 break;
4542
4543 case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4544 case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4545 case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4546 case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4547 case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4548 case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4549 case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4550 case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4551 case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4552 case GOYA_ASYNC_EVENT_ID_MME_WACS:
4553 case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4554 case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4555 case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4556 case GOYA_ASYNC_EVENT_ID_PSOC:
4557 case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4558 case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4559 case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4560 case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4561 case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4562 case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4563 case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4564 case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4565 case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4566 case GOYA_ASYNC_EVENT_ID_MME_QM:
4567 case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4568 case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4569 case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4570 goya_print_irq_info(hdev, event_type, true);
4571 goya_unmask_irq(hdev, event_type);
4572 break;
4573
4574 case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4575 case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4576 case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4577 case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4578 case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4579 case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4580 case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4581 case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4582 case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4583 case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4584 goya_print_irq_info(hdev, event_type, false);
4585 goya_unmask_irq(hdev, event_type);
4586 break;
4587
4588 default:
4589 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
4590 event_type);
4591 break;
4592 }
4593 }
4594
4595 void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
4596 {
4597 struct goya_device *goya = hdev->asic_specific;
4598
4599 if (aggregate) {
4600 *size = (u32) sizeof(goya->events_stat_aggregate);
4601 return goya->events_stat_aggregate;
4602 }
4603
4604 *size = (u32) sizeof(goya->events_stat);
4605 return goya->events_stat;
4606 }
4607
4608 static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
4609 u64 val, bool is_dram)
4610 {
4611 struct packet_lin_dma *lin_dma_pkt;
4612 struct hl_cs_job *job;
4613 u32 cb_size, ctl;
4614 struct hl_cb *cb;
4615 int rc, lin_dma_pkts_cnt;
4616
4617 lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
4618 cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
4619 sizeof(struct packet_msg_prot);
4620 cb = hl_cb_kernel_create(hdev, cb_size);
4621 if (!cb)
4622 return -ENOMEM;
4623
4624 lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address;
4625
4626 do {
4627 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4628
4629 ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
4630 (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4631 (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
4632 (1 << GOYA_PKT_CTL_RB_SHIFT) |
4633 (1 << GOYA_PKT_CTL_MB_SHIFT));
4634 ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) <<
4635 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
4636 lin_dma_pkt->ctl = cpu_to_le32(ctl);
4637
4638 lin_dma_pkt->src_addr = cpu_to_le64(val);
4639 lin_dma_pkt->dst_addr = cpu_to_le64(addr);
4640 if (lin_dma_pkts_cnt > 1)
4641 lin_dma_pkt->tsize = cpu_to_le32(SZ_2G);
4642 else
4643 lin_dma_pkt->tsize = cpu_to_le32(size);
4644
4645 size -= SZ_2G;
4646 addr += SZ_2G;
4647 lin_dma_pkt++;
4648 } while (--lin_dma_pkts_cnt);
4649
4650 job = hl_cs_allocate_job(hdev, true);
4651 if (!job) {
4652 dev_err(hdev->dev, "Failed to allocate a new job\n");
4653 rc = -ENOMEM;
4654 goto release_cb;
4655 }
4656
4657 job->id = 0;
4658 job->user_cb = cb;
4659 job->user_cb->cs_cnt++;
4660 job->user_cb_size = cb_size;
4661 job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
4662 job->patched_cb = job->user_cb;
4663 job->job_cb_size = job->user_cb_size;
4664
4665 hl_debugfs_add_job(hdev, job);
4666
4667 rc = goya_send_job_on_qman0(hdev, job);
4668
4669 hl_debugfs_remove_job(hdev, job);
4670 kfree(job);
4671 cb->cs_cnt--;
4672
4673 release_cb:
4674 hl_cb_put(cb);
4675 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4676
4677 return rc;
4678 }
4679
4680 int goya_context_switch(struct hl_device *hdev, u32 asid)
4681 {
4682 struct asic_fixed_properties *prop = &hdev->asic_prop;
4683 u64 addr = prop->sram_base_address, sob_addr;
4684 u32 size = hdev->pldm ? 0x10000 : prop->sram_size;
4685 u64 val = 0x7777777777777777ull;
4686 int rc, dma_id;
4687 u32 channel_off = mmDMA_CH_1_WR_COMP_ADDR_LO -
4688 mmDMA_CH_0_WR_COMP_ADDR_LO;
4689
4690 rc = goya_memset_device_memory(hdev, addr, size, val, false);
4691 if (rc) {
4692 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4693 return rc;
4694 }
4695
4696
4697 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
4698 WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO, lower_32_bits(sob_addr));
4699
4700 for (dma_id = 1 ; dma_id < NUMBER_OF_EXT_HW_QUEUES ; dma_id++) {
4701 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
4702 (dma_id - 1) * 4;
4703 WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + channel_off * dma_id,
4704 lower_32_bits(sob_addr));
4705 }
4706
4707 WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
4708
4709 goya_mmu_prepare(hdev, asid);
4710
4711 goya_clear_sm_regs(hdev);
4712
4713 return 0;
4714 }
4715
4716 static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
4717 {
4718 struct asic_fixed_properties *prop = &hdev->asic_prop;
4719 struct goya_device *goya = hdev->asic_specific;
4720 u64 addr = prop->mmu_pgt_addr;
4721 u32 size = prop->mmu_pgt_size + MMU_DRAM_DEFAULT_PAGE_SIZE +
4722 MMU_CACHE_MNG_SIZE;
4723
4724 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4725 return 0;
4726
4727 return goya_memset_device_memory(hdev, addr, size, 0, true);
4728 }
4729
4730 static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
4731 {
4732 struct goya_device *goya = hdev->asic_specific;
4733 u64 addr = hdev->asic_prop.mmu_dram_default_page_addr;
4734 u32 size = MMU_DRAM_DEFAULT_PAGE_SIZE;
4735 u64 val = 0x9999999999999999ull;
4736
4737 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4738 return 0;
4739
4740 return goya_memset_device_memory(hdev, addr, size, val, true);
4741 }
4742
4743 static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
4744 {
4745 struct asic_fixed_properties *prop = &hdev->asic_prop;
4746 struct goya_device *goya = hdev->asic_specific;
4747 s64 off, cpu_off;
4748 int rc;
4749
4750 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4751 return 0;
4752
4753 for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
4754 rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
4755 prop->dram_base_address + off, PAGE_SIZE_2MB);
4756 if (rc) {
4757 dev_err(hdev->dev, "Map failed for address 0x%llx\n",
4758 prop->dram_base_address + off);
4759 goto unmap;
4760 }
4761 }
4762
4763 if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
4764 rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
4765 hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB);
4766
4767 if (rc) {
4768 dev_err(hdev->dev,
4769 "Map failed for CPU accessible memory\n");
4770 off -= PAGE_SIZE_2MB;
4771 goto unmap;
4772 }
4773 } else {
4774 for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) {
4775 rc = hl_mmu_map(hdev->kernel_ctx,
4776 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4777 hdev->cpu_accessible_dma_address + cpu_off,
4778 PAGE_SIZE_4KB);
4779 if (rc) {
4780 dev_err(hdev->dev,
4781 "Map failed for CPU accessible memory\n");
4782 cpu_off -= PAGE_SIZE_4KB;
4783 goto unmap_cpu;
4784 }
4785 }
4786 }
4787
4788 goya_mmu_prepare_reg(hdev, mmCPU_IF_ARUSER_OVR, HL_KERNEL_ASID_ID);
4789 goya_mmu_prepare_reg(hdev, mmCPU_IF_AWUSER_OVR, HL_KERNEL_ASID_ID);
4790 WREG32(mmCPU_IF_ARUSER_OVR_EN, 0x7FF);
4791 WREG32(mmCPU_IF_AWUSER_OVR_EN, 0x7FF);
4792
4793
4794 RREG32(mmCPU_IF_AWUSER_OVR_EN);
4795
4796 goya->device_cpu_mmu_mappings_done = true;
4797
4798 return 0;
4799
4800 unmap_cpu:
4801 for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
4802 if (hl_mmu_unmap(hdev->kernel_ctx,
4803 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4804 PAGE_SIZE_4KB))
4805 dev_warn_ratelimited(hdev->dev,
4806 "failed to unmap address 0x%llx\n",
4807 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
4808 unmap:
4809 for (; off >= 0 ; off -= PAGE_SIZE_2MB)
4810 if (hl_mmu_unmap(hdev->kernel_ctx,
4811 prop->dram_base_address + off, PAGE_SIZE_2MB))
4812 dev_warn_ratelimited(hdev->dev,
4813 "failed to unmap address 0x%llx\n",
4814 prop->dram_base_address + off);
4815
4816 return rc;
4817 }
4818
4819 void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
4820 {
4821 struct asic_fixed_properties *prop = &hdev->asic_prop;
4822 struct goya_device *goya = hdev->asic_specific;
4823 u32 off, cpu_off;
4824
4825 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4826 return;
4827
4828 if (!goya->device_cpu_mmu_mappings_done)
4829 return;
4830
4831 WREG32(mmCPU_IF_ARUSER_OVR_EN, 0);
4832 WREG32(mmCPU_IF_AWUSER_OVR_EN, 0);
4833
4834 if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
4835 if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
4836 PAGE_SIZE_2MB))
4837 dev_warn(hdev->dev,
4838 "Failed to unmap CPU accessible memory\n");
4839 } else {
4840 for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
4841 if (hl_mmu_unmap(hdev->kernel_ctx,
4842 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4843 PAGE_SIZE_4KB))
4844 dev_warn_ratelimited(hdev->dev,
4845 "failed to unmap address 0x%llx\n",
4846 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
4847 }
4848
4849 for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
4850 if (hl_mmu_unmap(hdev->kernel_ctx,
4851 prop->dram_base_address + off, PAGE_SIZE_2MB))
4852 dev_warn_ratelimited(hdev->dev,
4853 "Failed to unmap address 0x%llx\n",
4854 prop->dram_base_address + off);
4855
4856 goya->device_cpu_mmu_mappings_done = false;
4857 }
4858
4859 static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
4860 {
4861 struct goya_device *goya = hdev->asic_specific;
4862 int i;
4863
4864 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4865 return;
4866
4867 if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
4868 WARN(1, "asid %u is too big\n", asid);
4869 return;
4870 }
4871
4872
4873 for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++)
4874 goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid);
4875 }
4876
4877 static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard)
4878 {
4879 struct goya_device *goya = hdev->asic_specific;
4880 u32 status, timeout_usec;
4881 int rc;
4882
4883 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4884 return;
4885
4886
4887 if (!is_hard)
4888 return;
4889
4890 if (hdev->pldm)
4891 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
4892 else
4893 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
4894
4895 mutex_lock(&hdev->mmu_cache_lock);
4896
4897
4898 WREG32(mmSTLB_INV_ALL_START, 1);
4899
4900 rc = hl_poll_timeout(
4901 hdev,
4902 mmSTLB_INV_ALL_START,
4903 status,
4904 !status,
4905 1000,
4906 timeout_usec);
4907
4908 mutex_unlock(&hdev->mmu_cache_lock);
4909
4910 if (rc)
4911 dev_notice_ratelimited(hdev->dev,
4912 "Timeout when waiting for MMU cache invalidation\n");
4913 }
4914
4915 static void goya_mmu_invalidate_cache_range(struct hl_device *hdev,
4916 bool is_hard, u32 asid, u64 va, u64 size)
4917 {
4918 struct goya_device *goya = hdev->asic_specific;
4919 u32 status, timeout_usec, inv_data, pi;
4920 int rc;
4921
4922 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4923 return;
4924
4925
4926 if (!is_hard)
4927 return;
4928
4929 if (hdev->pldm)
4930 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
4931 else
4932 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
4933
4934 mutex_lock(&hdev->mmu_cache_lock);
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944 inv_data = RREG32(mmSTLB_CACHE_INV);
4945
4946 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
4947 WREG32(mmSTLB_CACHE_INV,
4948 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
4949
4950 rc = hl_poll_timeout(
4951 hdev,
4952 mmSTLB_INV_CONSUMER_INDEX,
4953 status,
4954 status == pi,
4955 1000,
4956 timeout_usec);
4957
4958 mutex_unlock(&hdev->mmu_cache_lock);
4959
4960 if (rc)
4961 dev_notice_ratelimited(hdev->dev,
4962 "Timeout when waiting for MMU cache invalidation\n");
4963 }
4964
4965 int goya_send_heartbeat(struct hl_device *hdev)
4966 {
4967 struct goya_device *goya = hdev->asic_specific;
4968
4969 if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
4970 return 0;
4971
4972 return hl_fw_send_heartbeat(hdev);
4973 }
4974
4975 int goya_armcp_info_get(struct hl_device *hdev)
4976 {
4977 struct goya_device *goya = hdev->asic_specific;
4978 struct asic_fixed_properties *prop = &hdev->asic_prop;
4979 u64 dram_size;
4980 int rc;
4981
4982 if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
4983 return 0;
4984
4985 rc = hl_fw_armcp_info_get(hdev);
4986 if (rc)
4987 return rc;
4988
4989 dram_size = le64_to_cpu(prop->armcp_info.dram_size);
4990 if (dram_size) {
4991 if ((!is_power_of_2(dram_size)) ||
4992 (dram_size < DRAM_PHYS_DEFAULT_SIZE)) {
4993 dev_err(hdev->dev,
4994 "F/W reported invalid DRAM size %llu. Trying to use default size\n",
4995 dram_size);
4996 dram_size = DRAM_PHYS_DEFAULT_SIZE;
4997 }
4998
4999 prop->dram_size = dram_size;
5000 prop->dram_end_address = prop->dram_base_address + dram_size;
5001 }
5002
5003 if (!strlen(prop->armcp_info.card_name))
5004 strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME,
5005 CARD_NAME_MAX_LEN);
5006
5007 return 0;
5008 }
5009
5010 static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask,
5011 struct seq_file *s)
5012 {
5013 const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
5014 const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
5015 u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
5016 mme_arch_sts;
5017 bool is_idle = true, is_eng_idle;
5018 u64 offset;
5019 int i;
5020
5021 if (s)
5022 seq_puts(s, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n"
5023 "--- ------- ------------ -------------\n");
5024
5025 offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
5026
5027 for (i = 0 ; i < DMA_MAX_NUM ; i++) {
5028 qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset);
5029 dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset);
5030 is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) &&
5031 IS_DMA_IDLE(dma_core_sts0);
5032 is_idle &= is_eng_idle;
5033
5034 if (mask)
5035 *mask |= !is_eng_idle << (GOYA_ENGINE_ID_DMA_0 + i);
5036 if (s)
5037 seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
5038 qm_glbl_sts0, dma_core_sts0);
5039 }
5040
5041 if (s)
5042 seq_puts(s,
5043 "\nTPC is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 CFG_STATUS\n"
5044 "--- ------- ------------ -------------- ----------\n");
5045
5046 offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
5047
5048 for (i = 0 ; i < TPC_MAX_NUM ; i++) {
5049 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset);
5050 cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset);
5051 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset);
5052 is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) &&
5053 IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) &&
5054 IS_TPC_IDLE(tpc_cfg_sts);
5055 is_idle &= is_eng_idle;
5056
5057 if (mask)
5058 *mask |= !is_eng_idle << (GOYA_ENGINE_ID_TPC_0 + i);
5059 if (s)
5060 seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
5061 qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
5062 }
5063
5064 if (s)
5065 seq_puts(s,
5066 "\nMME is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 ARCH_STATUS\n"
5067 "--- ------- ------------ -------------- -----------\n");
5068
5069 qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0);
5070 cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0);
5071 mme_arch_sts = RREG32(mmMME_ARCH_STATUS);
5072 is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) &&
5073 IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) &&
5074 IS_MME_IDLE(mme_arch_sts);
5075 is_idle &= is_eng_idle;
5076
5077 if (mask)
5078 *mask |= !is_eng_idle << GOYA_ENGINE_ID_MME_0;
5079 if (s) {
5080 seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
5081 cmdq_glbl_sts0, mme_arch_sts);
5082 seq_puts(s, "\n");
5083 }
5084
5085 return is_idle;
5086 }
5087
5088 static void goya_hw_queues_lock(struct hl_device *hdev)
5089 {
5090 struct goya_device *goya = hdev->asic_specific;
5091
5092 spin_lock(&goya->hw_queues_lock);
5093 }
5094
5095 static void goya_hw_queues_unlock(struct hl_device *hdev)
5096 {
5097 struct goya_device *goya = hdev->asic_specific;
5098
5099 spin_unlock(&goya->hw_queues_lock);
5100 }
5101
5102 static u32 goya_get_pci_id(struct hl_device *hdev)
5103 {
5104 return hdev->pdev->device;
5105 }
5106
5107 static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
5108 size_t max_size)
5109 {
5110 struct goya_device *goya = hdev->asic_specific;
5111
5112 if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5113 return 0;
5114
5115 return hl_fw_get_eeprom_data(hdev, data, max_size);
5116 }
5117
5118 static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
5119 {
5120 return RREG32(mmHW_STATE);
5121 }
5122
5123 static const struct hl_asic_funcs goya_funcs = {
5124 .early_init = goya_early_init,
5125 .early_fini = goya_early_fini,
5126 .late_init = goya_late_init,
5127 .late_fini = goya_late_fini,
5128 .sw_init = goya_sw_init,
5129 .sw_fini = goya_sw_fini,
5130 .hw_init = goya_hw_init,
5131 .hw_fini = goya_hw_fini,
5132 .halt_engines = goya_halt_engines,
5133 .suspend = goya_suspend,
5134 .resume = goya_resume,
5135 .cb_mmap = goya_cb_mmap,
5136 .ring_doorbell = goya_ring_doorbell,
5137 .pqe_write = goya_pqe_write,
5138 .asic_dma_alloc_coherent = goya_dma_alloc_coherent,
5139 .asic_dma_free_coherent = goya_dma_free_coherent,
5140 .get_int_queue_base = goya_get_int_queue_base,
5141 .test_queues = goya_test_queues,
5142 .asic_dma_pool_zalloc = goya_dma_pool_zalloc,
5143 .asic_dma_pool_free = goya_dma_pool_free,
5144 .cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
5145 .cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
5146 .hl_dma_unmap_sg = goya_dma_unmap_sg,
5147 .cs_parser = goya_cs_parser,
5148 .asic_dma_map_sg = goya_dma_map_sg,
5149 .get_dma_desc_list_size = goya_get_dma_desc_list_size,
5150 .add_end_of_cb_packets = goya_add_end_of_cb_packets,
5151 .update_eq_ci = goya_update_eq_ci,
5152 .context_switch = goya_context_switch,
5153 .restore_phase_topology = goya_restore_phase_topology,
5154 .debugfs_read32 = goya_debugfs_read32,
5155 .debugfs_write32 = goya_debugfs_write32,
5156 .add_device_attr = goya_add_device_attr,
5157 .handle_eqe = goya_handle_eqe,
5158 .set_pll_profile = goya_set_pll_profile,
5159 .get_events_stat = goya_get_events_stat,
5160 .read_pte = goya_read_pte,
5161 .write_pte = goya_write_pte,
5162 .mmu_invalidate_cache = goya_mmu_invalidate_cache,
5163 .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
5164 .send_heartbeat = goya_send_heartbeat,
5165 .debug_coresight = goya_debug_coresight,
5166 .is_device_idle = goya_is_device_idle,
5167 .soft_reset_late_init = goya_soft_reset_late_init,
5168 .hw_queues_lock = goya_hw_queues_lock,
5169 .hw_queues_unlock = goya_hw_queues_unlock,
5170 .get_pci_id = goya_get_pci_id,
5171 .get_eeprom_data = goya_get_eeprom_data,
5172 .send_cpu_message = goya_send_cpu_message,
5173 .get_hw_state = goya_get_hw_state,
5174 .pci_bars_map = goya_pci_bars_map,
5175 .set_dram_bar_base = goya_set_ddr_bar_base,
5176 .init_iatu = goya_init_iatu,
5177 .rreg = hl_rreg,
5178 .wreg = hl_wreg,
5179 .halt_coresight = goya_halt_coresight
5180 };
5181
5182
5183
5184
5185
5186
5187
5188 void goya_set_asic_funcs(struct hl_device *hdev)
5189 {
5190 hdev->asic_funcs = &goya_funcs;
5191 }