1/* 2 * Instruction SRAM accessor functions for the Blackfin 3 * 4 * Copyright 2008 Analog Devices Inc. 5 * 6 * Licensed under the GPL-2 or later 7 */ 8 9#define pr_fmt(fmt) "isram: " fmt 10 11#include <linux/module.h> 12#include <linux/kernel.h> 13#include <linux/types.h> 14#include <linux/slab.h> 15#include <linux/spinlock.h> 16#include <linux/sched.h> 17 18#include <asm/blackfin.h> 19#include <asm/dma.h> 20 21/* 22 * IMPORTANT WARNING ABOUT THESE FUNCTIONS 23 * 24 * The emulator will not function correctly if a write command is left in 25 * ITEST_COMMAND or DTEST_COMMAND AND access to cache memory is needed by 26 * the emulator. To avoid such problems, ensure that both ITEST_COMMAND 27 * and DTEST_COMMAND are zero when exiting these functions. 28 */ 29 30 31/* 32 * On the Blackfin, L1 instruction sram (which operates at core speeds) can not 33 * be accessed by a normal core load, so we need to go through a few hoops to 34 * read/write it. 35 * To try to make it easier - we export a memcpy interface, where either src or 36 * dest can be in this special L1 memory area. 37 * The low level read/write functions should not be exposed to the rest of the 38 * kernel, since they operate on 64-bit data, and need specific address alignment 39 */ 40 41static DEFINE_SPINLOCK(dtest_lock); 42 43/* Takes a void pointer */ 44#define IADDR2DTEST(x) \ 45 ({ unsigned long __addr = (unsigned long)(x); \ 46 ((__addr & (1 << 11)) << (26 - 11)) | /* addr bit 11 (Way0/Way1) */ \ 47 (1 << 24) | /* instruction access = 1 */ \ 48 ((__addr & (1 << 15)) << (23 - 15)) | /* addr bit 15 (Data Bank) */ \ 49 ((__addr & (3 << 12)) << (16 - 12)) | /* addr bits 13:12 (Subbank) */ \ 50 (__addr & 0x47F8) | /* addr bits 14 & 10:3 */ \ 51 (1 << 2); /* data array = 1 */ \ 52 }) 53 54/* Takes a pointer, and returns the offset (in bits) which things should be shifted */ 55#define ADDR2OFFSET(x) ((((unsigned long)(x)) & 0x7) * 8) 56 57/* Takes a pointer, determines if it is the last byte in the isram 64-bit data type */ 58#define ADDR2LAST(x) ((((unsigned long)x) & 0x7) == 0x7) 59 60static void isram_write(const void *addr, uint64_t data) 61{ 62 uint32_t cmd; 63 unsigned long flags; 64 65 if (unlikely(addr >= (void *)(L1_CODE_START + L1_CODE_LENGTH))) 66 return; 67 68 cmd = IADDR2DTEST(addr) | 2; /* write */ 69 70 /* 71 * Writes to DTEST_DATA[0:1] need to be atomic with write to DTEST_COMMAND 72 * While in exception context - atomicity is guaranteed or double fault 73 */ 74 spin_lock_irqsave(&dtest_lock, flags); 75 76 bfin_write_DTEST_DATA0(data & 0xFFFFFFFF); 77 bfin_write_DTEST_DATA1(data >> 32); 78 79 /* use the builtin, since interrupts are already turned off */ 80 __builtin_bfin_csync(); 81 bfin_write_DTEST_COMMAND(cmd); 82 __builtin_bfin_csync(); 83 84 bfin_write_DTEST_COMMAND(0); 85 __builtin_bfin_csync(); 86 87 spin_unlock_irqrestore(&dtest_lock, flags); 88} 89 90static uint64_t isram_read(const void *addr) 91{ 92 uint32_t cmd; 93 unsigned long flags; 94 uint64_t ret; 95 96 if (unlikely(addr > (void *)(L1_CODE_START + L1_CODE_LENGTH))) 97 return 0; 98 99 cmd = IADDR2DTEST(addr) | 0; /* read */ 100 101 /* 102 * Reads of DTEST_DATA[0:1] need to be atomic with write to DTEST_COMMAND 103 * While in exception context - atomicity is guaranteed or double fault 104 */ 105 spin_lock_irqsave(&dtest_lock, flags); 106 /* use the builtin, since interrupts are already turned off */ 107 __builtin_bfin_csync(); 108 bfin_write_DTEST_COMMAND(cmd); 109 __builtin_bfin_csync(); 110 ret = bfin_read_DTEST_DATA0() | ((uint64_t)bfin_read_DTEST_DATA1() << 32); 111 112 bfin_write_DTEST_COMMAND(0); 113 __builtin_bfin_csync(); 114 spin_unlock_irqrestore(&dtest_lock, flags); 115 116 return ret; 117} 118 119static bool isram_check_addr(const void *addr, size_t n) 120{ 121 if ((addr >= (void *)L1_CODE_START) && 122 (addr < (void *)(L1_CODE_START + L1_CODE_LENGTH))) { 123 if (unlikely((addr + n) > (void *)(L1_CODE_START + L1_CODE_LENGTH))) { 124 show_stack(NULL, NULL); 125 pr_err("copy involving %p length (%zu) too long\n", addr, n); 126 } 127 return true; 128 } 129 return false; 130} 131 132/* 133 * The isram_memcpy() function copies n bytes from memory area src to memory area dest. 134 * The isram_memcpy() function returns a pointer to dest. 135 * Either dest or src can be in L1 instruction sram. 136 */ 137void *isram_memcpy(void *dest, const void *src, size_t n) 138{ 139 uint64_t data_in = 0, data_out = 0; 140 size_t count; 141 bool dest_in_l1, src_in_l1, need_data, put_data; 142 unsigned char byte, *src_byte, *dest_byte; 143 144 src_byte = (unsigned char *)src; 145 dest_byte = (unsigned char *)dest; 146 147 dest_in_l1 = isram_check_addr(dest, n); 148 src_in_l1 = isram_check_addr(src, n); 149 150 need_data = true; 151 put_data = true; 152 for (count = 0; count < n; count++) { 153 if (src_in_l1) { 154 if (need_data) { 155 data_in = isram_read(src + count); 156 need_data = false; 157 } 158 159 if (ADDR2LAST(src + count)) 160 need_data = true; 161 162 byte = (unsigned char)((data_in >> ADDR2OFFSET(src + count)) & 0xff); 163 164 } else { 165 /* src is in L2 or L3 - so just dereference*/ 166 byte = src_byte[count]; 167 } 168 169 if (dest_in_l1) { 170 if (put_data) { 171 data_out = isram_read(dest + count); 172 put_data = false; 173 } 174 175 data_out &= ~((uint64_t)0xff << ADDR2OFFSET(dest + count)); 176 data_out |= ((uint64_t)byte << ADDR2OFFSET(dest + count)); 177 178 if (ADDR2LAST(dest + count)) { 179 put_data = true; 180 isram_write(dest + count, data_out); 181 } 182 } else { 183 /* dest in L2 or L3 - so just dereference */ 184 dest_byte[count] = byte; 185 } 186 } 187 188 /* make sure we dump the last byte if necessary */ 189 if (dest_in_l1 && !put_data) 190 isram_write(dest + count, data_out); 191 192 return dest; 193} 194EXPORT_SYMBOL(isram_memcpy); 195 196#ifdef CONFIG_BFIN_ISRAM_SELF_TEST 197 198static int test_len = 0x20000; 199 200static __init void hex_dump(unsigned char *buf, int len) 201{ 202 while (len--) 203 pr_cont("%02x", *buf++); 204} 205 206static __init int isram_read_test(char *sdram, void *l1inst) 207{ 208 int i, ret = 0; 209 uint64_t data1, data2; 210 211 pr_info("INFO: running isram_read tests\n"); 212 213 /* setup some different data to play with */ 214 for (i = 0; i < test_len; ++i) 215 sdram[i] = i % 255; 216 dma_memcpy(l1inst, sdram, test_len); 217 218 /* make sure we can read the L1 inst */ 219 for (i = 0; i < test_len; i += sizeof(uint64_t)) { 220 data1 = isram_read(l1inst + i); 221 memcpy(&data2, sdram + i, sizeof(data2)); 222 if (data1 != data2) { 223 pr_err("FAIL: isram_read(%p) returned %#llx but wanted %#llx\n", 224 l1inst + i, data1, data2); 225 ++ret; 226 } 227 } 228 229 return ret; 230} 231 232static __init int isram_write_test(char *sdram, void *l1inst) 233{ 234 int i, ret = 0; 235 uint64_t data1, data2; 236 237 pr_info("INFO: running isram_write tests\n"); 238 239 /* setup some different data to play with */ 240 memset(sdram, 0, test_len * 2); 241 dma_memcpy(l1inst, sdram, test_len); 242 for (i = 0; i < test_len; ++i) 243 sdram[i] = i % 255; 244 245 /* make sure we can write the L1 inst */ 246 for (i = 0; i < test_len; i += sizeof(uint64_t)) { 247 memcpy(&data1, sdram + i, sizeof(data1)); 248 isram_write(l1inst + i, data1); 249 data2 = isram_read(l1inst + i); 250 if (data1 != data2) { 251 pr_err("FAIL: isram_write(%p, %#llx) != %#llx\n", 252 l1inst + i, data1, data2); 253 ++ret; 254 } 255 } 256 257 dma_memcpy(sdram + test_len, l1inst, test_len); 258 if (memcmp(sdram, sdram + test_len, test_len)) { 259 pr_err("FAIL: isram_write() did not work properly\n"); 260 ++ret; 261 } 262 263 return ret; 264} 265 266static __init int 267_isram_memcpy_test(char pattern, void *sdram, void *l1inst, const char *smemcpy, 268 void *(*fmemcpy)(void *, const void *, size_t)) 269{ 270 memset(sdram, pattern, test_len); 271 fmemcpy(l1inst, sdram, test_len); 272 fmemcpy(sdram + test_len, l1inst, test_len); 273 if (memcmp(sdram, sdram + test_len, test_len)) { 274 pr_err("FAIL: %s(%p <=> %p, %#x) failed (data is %#x)\n", 275 smemcpy, l1inst, sdram, test_len, pattern); 276 return 1; 277 } 278 return 0; 279} 280#define _isram_memcpy_test(a, b, c, d) _isram_memcpy_test(a, b, c, #d, d) 281 282static __init int isram_memcpy_test(char *sdram, void *l1inst) 283{ 284 int i, j, thisret, ret = 0; 285 286 /* check broad isram_memcpy() */ 287 pr_info("INFO: running broad isram_memcpy tests\n"); 288 for (i = 0xf; i >= 0; --i) 289 ret += _isram_memcpy_test(i, sdram, l1inst, isram_memcpy); 290 291 /* check read of small, unaligned, and hardware 64bit limits */ 292 pr_info("INFO: running isram_memcpy (read) tests\n"); 293 294 /* setup some different data to play with */ 295 for (i = 0; i < test_len; ++i) 296 sdram[i] = i % 255; 297 dma_memcpy(l1inst, sdram, test_len); 298 299 thisret = 0; 300 for (i = 0; i < test_len - 32; ++i) { 301 unsigned char cmp[32]; 302 for (j = 1; j <= 32; ++j) { 303 memset(cmp, 0, sizeof(cmp)); 304 isram_memcpy(cmp, l1inst + i, j); 305 if (memcmp(cmp, sdram + i, j)) { 306 pr_err("FAIL: %p:", l1inst + 1); 307 hex_dump(cmp, j); 308 pr_cont(" SDRAM:"); 309 hex_dump(sdram + i, j); 310 pr_cont("\n"); 311 if (++thisret > 20) { 312 pr_err("FAIL: skipping remaining series\n"); 313 i = test_len; 314 break; 315 } 316 } 317 } 318 } 319 ret += thisret; 320 321 /* check write of small, unaligned, and hardware 64bit limits */ 322 pr_info("INFO: running isram_memcpy (write) tests\n"); 323 324 memset(sdram + test_len, 0, test_len); 325 dma_memcpy(l1inst, sdram + test_len, test_len); 326 327 thisret = 0; 328 for (i = 0; i < test_len - 32; ++i) { 329 unsigned char cmp[32]; 330 for (j = 1; j <= 32; ++j) { 331 isram_memcpy(l1inst + i, sdram + i, j); 332 dma_memcpy(cmp, l1inst + i, j); 333 if (memcmp(cmp, sdram + i, j)) { 334 pr_err("FAIL: %p:", l1inst + i); 335 hex_dump(cmp, j); 336 pr_cont(" SDRAM:"); 337 hex_dump(sdram + i, j); 338 pr_cont("\n"); 339 if (++thisret > 20) { 340 pr_err("FAIL: skipping remaining series\n"); 341 i = test_len; 342 break; 343 } 344 } 345 } 346 } 347 ret += thisret; 348 349 return ret; 350} 351 352static __init int isram_test_init(void) 353{ 354 int ret; 355 char *sdram; 356 void *l1inst; 357 358 /* Try to test as much of L1SRAM as possible */ 359 while (test_len) { 360 test_len >>= 1; 361 l1inst = l1_inst_sram_alloc(test_len); 362 if (l1inst) 363 break; 364 } 365 if (!l1inst) { 366 pr_warning("SKIP: could not allocate L1 inst\n"); 367 return 0; 368 } 369 pr_info("INFO: testing %#x bytes (%p - %p)\n", 370 test_len, l1inst, l1inst + test_len); 371 372 sdram = kmalloc(test_len * 2, GFP_KERNEL); 373 if (!sdram) { 374 sram_free(l1inst); 375 pr_warning("SKIP: could not allocate sdram\n"); 376 return 0; 377 } 378 379 /* sanity check initial L1 inst state */ 380 ret = 1; 381 pr_info("INFO: running initial dma_memcpy checks %p\n", sdram); 382 if (_isram_memcpy_test(0xa, sdram, l1inst, dma_memcpy)) 383 goto abort; 384 if (_isram_memcpy_test(0x5, sdram, l1inst, dma_memcpy)) 385 goto abort; 386 387 ret = 0; 388 ret += isram_read_test(sdram, l1inst); 389 ret += isram_write_test(sdram, l1inst); 390 ret += isram_memcpy_test(sdram, l1inst); 391 392 abort: 393 sram_free(l1inst); 394 kfree(sdram); 395 396 if (ret) 397 return -EIO; 398 399 pr_info("PASS: all tests worked !\n"); 400 return 0; 401} 402late_initcall(isram_test_init); 403 404static __exit void isram_test_exit(void) 405{ 406 /* stub to allow unloading */ 407} 408module_exit(isram_test_exit); 409 410#endif 411