1/* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Copyright (c) 2000-2007 Silicon Graphics, Inc. All Rights Reserved. 7 */ 8 9#include <linux/module.h> 10#include <asm/sn/nodepda.h> 11#include <asm/sn/addrs.h> 12#include <asm/sn/arch.h> 13#include <asm/sn/sn_cpuid.h> 14#include <asm/sn/pda.h> 15#include <asm/sn/shubio.h> 16#include <asm/nodedata.h> 17#include <asm/delay.h> 18 19#include <linux/bootmem.h> 20#include <linux/string.h> 21#include <linux/sched.h> 22#include <linux/slab.h> 23 24#include <asm/sn/bte.h> 25 26#ifndef L1_CACHE_MASK 27#define L1_CACHE_MASK (L1_CACHE_BYTES - 1) 28#endif 29 30/* two interfaces on two btes */ 31#define MAX_INTERFACES_TO_TRY 4 32#define MAX_NODES_TO_TRY 2 33 34static struct bteinfo_s *bte_if_on_node(nasid_t nasid, int interface) 35{ 36 nodepda_t *tmp_nodepda; 37 38 if (nasid_to_cnodeid(nasid) == -1) 39 return (struct bteinfo_s *)NULL; 40 41 tmp_nodepda = NODEPDA(nasid_to_cnodeid(nasid)); 42 return &tmp_nodepda->bte_if[interface]; 43 44} 45 46static inline void bte_start_transfer(struct bteinfo_s *bte, u64 len, u64 mode) 47{ 48 if (is_shub2()) { 49 BTE_CTRL_STORE(bte, (IBLS_BUSY | ((len) | (mode) << 24))); 50 } else { 51 BTE_LNSTAT_STORE(bte, len); 52 BTE_CTRL_STORE(bte, mode); 53 } 54} 55 56/************************************************************************ 57 * Block Transfer Engine copy related functions. 58 * 59 ***********************************************************************/ 60 61/* 62 * bte_copy(src, dest, len, mode, notification) 63 * 64 * Use the block transfer engine to move kernel memory from src to dest 65 * using the assigned mode. 66 * 67 * Parameters: 68 * src - physical address of the transfer source. 69 * dest - physical address of the transfer destination. 70 * len - number of bytes to transfer from source to dest. 71 * mode - hardware defined. See reference information 72 * for IBCT0/1 in the SHUB Programmers Reference 73 * notification - kernel virtual address of the notification cache 74 * line. If NULL, the default is used and 75 * the bte_copy is synchronous. 76 * 77 * NOTE: This function requires src, dest, and len to 78 * be cacheline aligned. 79 */ 80bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) 81{ 82 u64 transfer_size; 83 u64 transfer_stat; 84 u64 notif_phys_addr; 85 struct bteinfo_s *bte; 86 bte_result_t bte_status; 87 unsigned long irq_flags; 88 unsigned long itc_end = 0; 89 int nasid_to_try[MAX_NODES_TO_TRY]; 90 int my_nasid = cpuid_to_nasid(raw_smp_processor_id()); 91 int bte_if_index, nasid_index; 92 int bte_first, btes_per_node = BTES_PER_NODE; 93 94 BTE_PRINTK(("bte_copy(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%p)\n", 95 src, dest, len, mode, notification)); 96 97 if (len == 0) { 98 return BTE_SUCCESS; 99 } 100 101 BUG_ON(len & L1_CACHE_MASK); 102 BUG_ON(src & L1_CACHE_MASK); 103 BUG_ON(dest & L1_CACHE_MASK); 104 BUG_ON(len > BTE_MAX_XFER); 105 106 /* 107 * Start with interface corresponding to cpu number 108 */ 109 bte_first = raw_smp_processor_id() % btes_per_node; 110 111 if (mode & BTE_USE_DEST) { 112 /* try remote then local */ 113 nasid_to_try[0] = NASID_GET(dest); 114 if (mode & BTE_USE_ANY) { 115 nasid_to_try[1] = my_nasid; 116 } else { 117 nasid_to_try[1] = 0; 118 } 119 } else { 120 /* try local then remote */ 121 nasid_to_try[0] = my_nasid; 122 if (mode & BTE_USE_ANY) { 123 nasid_to_try[1] = NASID_GET(dest); 124 } else { 125 nasid_to_try[1] = 0; 126 } 127 } 128 129retry_bteop: 130 do { 131 local_irq_save(irq_flags); 132 133 bte_if_index = bte_first; 134 nasid_index = 0; 135 136 /* Attempt to lock one of the BTE interfaces. */ 137 while (nasid_index < MAX_NODES_TO_TRY) { 138 bte = bte_if_on_node(nasid_to_try[nasid_index],bte_if_index); 139 140 if (bte == NULL) { 141 nasid_index++; 142 continue; 143 } 144 145 if (spin_trylock(&bte->spinlock)) { 146 if (!(*bte->most_rcnt_na & BTE_WORD_AVAILABLE) || 147 (BTE_LNSTAT_LOAD(bte) & BTE_ACTIVE)) { 148 /* Got the lock but BTE still busy */ 149 spin_unlock(&bte->spinlock); 150 } else { 151 /* we got the lock and it's not busy */ 152 break; 153 } 154 } 155 156 bte_if_index = (bte_if_index + 1) % btes_per_node; /* Next interface */ 157 if (bte_if_index == bte_first) { 158 /* 159 * We've tried all interfaces on this node 160 */ 161 nasid_index++; 162 } 163 164 bte = NULL; 165 } 166 167 if (bte != NULL) { 168 break; 169 } 170 171 local_irq_restore(irq_flags); 172 173 if (!(mode & BTE_WACQUIRE)) { 174 return BTEFAIL_NOTAVAIL; 175 } 176 } while (1); 177 178 if (notification == NULL) { 179 /* User does not want to be notified. */ 180 bte->most_rcnt_na = &bte->notify; 181 } else { 182 bte->most_rcnt_na = notification; 183 } 184 185 /* Calculate the number of cache lines to transfer. */ 186 transfer_size = ((len >> L1_CACHE_SHIFT) & BTE_LEN_MASK); 187 188 /* Initialize the notification to a known value. */ 189 *bte->most_rcnt_na = BTE_WORD_BUSY; 190 notif_phys_addr = (u64)bte->most_rcnt_na; 191 192 /* Set the source and destination registers */ 193 BTE_PRINTKV(("IBSA = 0x%lx)\n", src)); 194 BTE_SRC_STORE(bte, src); 195 BTE_PRINTKV(("IBDA = 0x%lx)\n", dest)); 196 BTE_DEST_STORE(bte, dest); 197 198 /* Set the notification register */ 199 BTE_PRINTKV(("IBNA = 0x%lx)\n", notif_phys_addr)); 200 BTE_NOTIF_STORE(bte, notif_phys_addr); 201 202 /* Initiate the transfer */ 203 BTE_PRINTK(("IBCT = 0x%lx)\n", BTE_VALID_MODE(mode))); 204 bte_start_transfer(bte, transfer_size, BTE_VALID_MODE(mode)); 205 206 itc_end = ia64_get_itc() + (40000000 * local_cpu_data->cyc_per_usec); 207 208 spin_unlock_irqrestore(&bte->spinlock, irq_flags); 209 210 if (notification != NULL) { 211 return BTE_SUCCESS; 212 } 213 214 while ((transfer_stat = *bte->most_rcnt_na) == BTE_WORD_BUSY) { 215 cpu_relax(); 216 if (ia64_get_itc() > itc_end) { 217 BTE_PRINTK(("BTE timeout nasid 0x%x bte%d IBLS = 0x%lx na 0x%lx\n", 218 NASID_GET(bte->bte_base_addr), bte->bte_num, 219 BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na) ); 220 bte->bte_error_count++; 221 bte->bh_error = IBLS_ERROR; 222 bte_error_handler((unsigned long)NODEPDA(bte->bte_cnode)); 223 *bte->most_rcnt_na = BTE_WORD_AVAILABLE; 224 goto retry_bteop; 225 } 226 } 227 228 BTE_PRINTKV((" Delay Done. IBLS = 0x%lx, most_rcnt_na = 0x%lx\n", 229 BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na)); 230 231 if (transfer_stat & IBLS_ERROR) { 232 bte_status = BTE_GET_ERROR_STATUS(transfer_stat); 233 } else { 234 bte_status = BTE_SUCCESS; 235 } 236 *bte->most_rcnt_na = BTE_WORD_AVAILABLE; 237 238 BTE_PRINTK(("Returning status is 0x%lx and most_rcnt_na is 0x%lx\n", 239 BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na)); 240 241 return bte_status; 242} 243 244EXPORT_SYMBOL(bte_copy); 245 246/* 247 * bte_unaligned_copy(src, dest, len, mode) 248 * 249 * use the block transfer engine to move kernel 250 * memory from src to dest using the assigned mode. 251 * 252 * Parameters: 253 * src - physical address of the transfer source. 254 * dest - physical address of the transfer destination. 255 * len - number of bytes to transfer from source to dest. 256 * mode - hardware defined. See reference information 257 * for IBCT0/1 in the SGI documentation. 258 * 259 * NOTE: If the source, dest, and len are all cache line aligned, 260 * then it would be _FAR_ preferable to use bte_copy instead. 261 */ 262bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode) 263{ 264 int destFirstCacheOffset; 265 u64 headBteSource; 266 u64 headBteLen; 267 u64 headBcopySrcOffset; 268 u64 headBcopyDest; 269 u64 headBcopyLen; 270 u64 footBteSource; 271 u64 footBteLen; 272 u64 footBcopyDest; 273 u64 footBcopyLen; 274 bte_result_t rv; 275 char *bteBlock, *bteBlock_unaligned; 276 277 if (len == 0) { 278 return BTE_SUCCESS; 279 } 280 281 /* temporary buffer used during unaligned transfers */ 282 bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES, GFP_KERNEL); 283 if (bteBlock_unaligned == NULL) { 284 return BTEFAIL_NOTAVAIL; 285 } 286 bteBlock = (char *)L1_CACHE_ALIGN((u64) bteBlock_unaligned); 287 288 headBcopySrcOffset = src & L1_CACHE_MASK; 289 destFirstCacheOffset = dest & L1_CACHE_MASK; 290 291 /* 292 * At this point, the transfer is broken into 293 * (up to) three sections. The first section is 294 * from the start address to the first physical 295 * cache line, the second is from the first physical 296 * cache line to the last complete cache line, 297 * and the third is from the last cache line to the 298 * end of the buffer. The first and third sections 299 * are handled by bte copying into a temporary buffer 300 * and then bcopy'ing the necessary section into the 301 * final location. The middle section is handled with 302 * a standard bte copy. 303 * 304 * One nasty exception to the above rule is when the 305 * source and destination are not symmetrically 306 * mis-aligned. If the source offset from the first 307 * cache line is different from the destination offset, 308 * we make the first section be the entire transfer 309 * and the bcopy the entire block into place. 310 */ 311 if (headBcopySrcOffset == destFirstCacheOffset) { 312 313 /* 314 * Both the source and destination are the same 315 * distance from a cache line boundary so we can 316 * use the bte to transfer the bulk of the 317 * data. 318 */ 319 headBteSource = src & ~L1_CACHE_MASK; 320 headBcopyDest = dest; 321 if (headBcopySrcOffset) { 322 headBcopyLen = 323 (len > 324 (L1_CACHE_BYTES - 325 headBcopySrcOffset) ? L1_CACHE_BYTES 326 - headBcopySrcOffset : len); 327 headBteLen = L1_CACHE_BYTES; 328 } else { 329 headBcopyLen = 0; 330 headBteLen = 0; 331 } 332 333 if (len > headBcopyLen) { 334 footBcopyLen = (len - headBcopyLen) & L1_CACHE_MASK; 335 footBteLen = L1_CACHE_BYTES; 336 337 footBteSource = src + len - footBcopyLen; 338 footBcopyDest = dest + len - footBcopyLen; 339 340 if (footBcopyDest == (headBcopyDest + headBcopyLen)) { 341 /* 342 * We have two contiguous bcopy 343 * blocks. Merge them. 344 */ 345 headBcopyLen += footBcopyLen; 346 headBteLen += footBteLen; 347 } else if (footBcopyLen > 0) { 348 rv = bte_copy(footBteSource, 349 ia64_tpa((unsigned long)bteBlock), 350 footBteLen, mode, NULL); 351 if (rv != BTE_SUCCESS) { 352 kfree(bteBlock_unaligned); 353 return rv; 354 } 355 356 memcpy(__va(footBcopyDest), 357 (char *)bteBlock, footBcopyLen); 358 } 359 } else { 360 footBcopyLen = 0; 361 footBteLen = 0; 362 } 363 364 if (len > (headBcopyLen + footBcopyLen)) { 365 /* now transfer the middle. */ 366 rv = bte_copy((src + headBcopyLen), 367 (dest + 368 headBcopyLen), 369 (len - headBcopyLen - 370 footBcopyLen), mode, NULL); 371 if (rv != BTE_SUCCESS) { 372 kfree(bteBlock_unaligned); 373 return rv; 374 } 375 376 } 377 } else { 378 379 /* 380 * The transfer is not symmetric, we will 381 * allocate a buffer large enough for all the 382 * data, bte_copy into that buffer and then 383 * bcopy to the destination. 384 */ 385 386 headBcopySrcOffset = src & L1_CACHE_MASK; 387 headBcopyDest = dest; 388 headBcopyLen = len; 389 390 headBteSource = src - headBcopySrcOffset; 391 /* Add the leading and trailing bytes from source */ 392 headBteLen = L1_CACHE_ALIGN(len + headBcopySrcOffset); 393 } 394 395 if (headBcopyLen > 0) { 396 rv = bte_copy(headBteSource, 397 ia64_tpa((unsigned long)bteBlock), headBteLen, 398 mode, NULL); 399 if (rv != BTE_SUCCESS) { 400 kfree(bteBlock_unaligned); 401 return rv; 402 } 403 404 memcpy(__va(headBcopyDest), ((char *)bteBlock + 405 headBcopySrcOffset), headBcopyLen); 406 } 407 kfree(bteBlock_unaligned); 408 return BTE_SUCCESS; 409} 410 411EXPORT_SYMBOL(bte_unaligned_copy); 412 413/************************************************************************ 414 * Block Transfer Engine initialization functions. 415 * 416 ***********************************************************************/ 417 418/* 419 * bte_init_node(nodepda, cnode) 420 * 421 * Initialize the nodepda structure with BTE base addresses and 422 * spinlocks. 423 */ 424void bte_init_node(nodepda_t * mynodepda, cnodeid_t cnode) 425{ 426 int i; 427 428 /* 429 * Indicate that all the block transfer engines on this node 430 * are available. 431 */ 432 433 /* 434 * Allocate one bte_recover_t structure per node. It holds 435 * the recovery lock for node. All the bte interface structures 436 * will point at this one bte_recover structure to get the lock. 437 */ 438 spin_lock_init(&mynodepda->bte_recovery_lock); 439 init_timer(&mynodepda->bte_recovery_timer); 440 mynodepda->bte_recovery_timer.function = bte_error_handler; 441 mynodepda->bte_recovery_timer.data = (unsigned long)mynodepda; 442 443 for (i = 0; i < BTES_PER_NODE; i++) { 444 u64 *base_addr; 445 446 /* Which link status register should we use? */ 447 base_addr = (u64 *) 448 REMOTE_HUB_ADDR(cnodeid_to_nasid(cnode), BTE_BASE_ADDR(i)); 449 mynodepda->bte_if[i].bte_base_addr = base_addr; 450 mynodepda->bte_if[i].bte_source_addr = BTE_SOURCE_ADDR(base_addr); 451 mynodepda->bte_if[i].bte_destination_addr = BTE_DEST_ADDR(base_addr); 452 mynodepda->bte_if[i].bte_control_addr = BTE_CTRL_ADDR(base_addr); 453 mynodepda->bte_if[i].bte_notify_addr = BTE_NOTIF_ADDR(base_addr); 454 455 /* 456 * Initialize the notification and spinlock 457 * so the first transfer can occur. 458 */ 459 mynodepda->bte_if[i].most_rcnt_na = 460 &(mynodepda->bte_if[i].notify); 461 mynodepda->bte_if[i].notify = BTE_WORD_AVAILABLE; 462 spin_lock_init(&mynodepda->bte_if[i].spinlock); 463 464 mynodepda->bte_if[i].bte_cnode = cnode; 465 mynodepda->bte_if[i].bte_error_count = 0; 466 mynodepda->bte_if[i].bte_num = i; 467 mynodepda->bte_if[i].cleanup_active = 0; 468 mynodepda->bte_if[i].bh_error = 0; 469 } 470 471} 472