1 2IPF Machine Check (MC) error inject tool 3======================================== 4 5IPF Machine Check (MC) error inject tool is used to inject MC 6errors from Linux. The tool is a test bed for IPF MC work flow including 7hardware correctable error handling, OS recoverable error handling, MC 8event logging, etc. 9 10The tool includes two parts: a kernel driver and a user application 11sample. The driver provides interface to PAL to inject error 12and query error injection capabilities. The driver code is in 13arch/ia64/kernel/err_inject.c. The application sample (shown below) 14provides a combination of various errors and calls the driver's interface 15(sysfs interface) to inject errors or query error injection capabilities. 16 17The tool can be used to test Intel IPF machine MC handling capabilities. 18It's especially useful for people who can not access hardware MC injection 19tool to inject error. It's also very useful to integrate with other 20software test suits to do stressful testing on IPF. 21 22Below is a sample application as part of the whole tool. The sample 23can be used as a working test tool. Or it can be expanded to include 24more features. It also can be a integrated into a library or other user 25application to have more thorough test. 26 27The sample application takes err.conf as error configuration input. GCC 28compiles the code. After you install err_inject driver, you can run 29this sample application to inject errors. 30 31Errata: Itanium 2 Processors Specification Update lists some errata against 32the pal_mc_error_inject PAL procedure. The following err.conf has been tested 33on latest Montecito PAL. 34 35err.conf: 36 37#This is configuration file for err_inject_tool. 38#The format of the each line is: 39#cpu, loop, interval, err_type_info, err_struct_info, err_data_buffer 40#where 41# cpu: logical cpu number the error will be inject in. 42# loop: times the error will be injected. 43# interval: In second. every so often one error is injected. 44# err_type_info, err_struct_info: PAL parameters. 45# 46#Note: All values are hex w/o or w/ 0x prefix. 47 48 49#On cpu2, inject only total 0x10 errors, interval 5 seconds 50#corrected, data cache, hier-2, physical addr(assigned by tool code). 51#working on Montecito latest PAL. 522, 10, 5, 4101, 95 53 54#On cpu4, inject and consume total 0x10 errors, interval 5 seconds 55#corrected, data cache, hier-2, physical addr(assigned by tool code). 56#working on Montecito latest PAL. 574, 10, 5, 4109, 95 58 59#On cpu15, inject and consume total 0x10 errors, interval 5 seconds 60#recoverable, DTR0, hier-2. 61#working on Montecito latest PAL. 620xf, 0x10, 5, 4249, 15 63 64The sample application source code: 65 66err_injection_tool.c: 67 68/* 69 * This program is free software; you can redistribute it and/or modify 70 * it under the terms of the GNU General Public License as published by 71 * the Free Software Foundation; either version 2 of the License, or 72 * (at your option) any later version. 73 * 74 * This program is distributed in the hope that it will be useful, but 75 * WITHOUT ANY WARRANTY; without even the implied warranty of 76 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 77 * NON INFRINGEMENT. See the GNU General Public License for more 78 * details. 79 * 80 * You should have received a copy of the GNU General Public License 81 * along with this program; if not, write to the Free Software 82 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 83 * 84 * Copyright (C) 2006 Intel Co 85 * Fenghua Yu <fenghua.yu@intel.com> 86 * 87 */ 88#include <sys/types.h> 89#include <sys/stat.h> 90#include <fcntl.h> 91#include <stdio.h> 92#include <sched.h> 93#include <unistd.h> 94#include <stdlib.h> 95#include <stdarg.h> 96#include <string.h> 97#include <errno.h> 98#include <time.h> 99#include <sys/ipc.h> 100#include <sys/sem.h> 101#include <sys/wait.h> 102#include <sys/mman.h> 103#include <sys/shm.h> 104 105#define MAX_FN_SIZE 256 106#define MAX_BUF_SIZE 256 107#define DATA_BUF_SIZE 256 108#define NR_CPUS 512 109#define MAX_TASK_NUM 2048 110#define MIN_INTERVAL 5 // seconds 111#define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte. 112#define PARA_FIELD_NUM 5 113#define MASK_SIZE (NR_CPUS/64) 114#define PATH_FORMAT "/sys/devices/system/cpu/cpu%d/err_inject/" 115 116int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask); 117 118int verbose; 119#define vbprintf if (verbose) printf 120 121int log_info(int cpu, const char *fmt, ...) 122{ 123 FILE *log; 124 char fn[MAX_FN_SIZE]; 125 char buf[MAX_BUF_SIZE]; 126 va_list args; 127 128 sprintf(fn, "%d.log", cpu); 129 log=fopen(fn, "a+"); 130 if (log==NULL) { 131 perror("Error open:"); 132 return -1; 133 } 134 135 va_start(args, fmt); 136 vprintf(fmt, args); 137 memset(buf, 0, MAX_BUF_SIZE); 138 vsprintf(buf, fmt, args); 139 va_end(args); 140 141 fwrite(buf, sizeof(buf), 1, log); 142 fclose(log); 143 144 return 0; 145} 146 147typedef unsigned long u64; 148typedef unsigned int u32; 149 150typedef union err_type_info_u { 151 struct { 152 u64 mode : 3, /* 0-2 */ 153 err_inj : 3, /* 3-5 */ 154 err_sev : 2, /* 6-7 */ 155 err_struct : 5, /* 8-12 */ 156 struct_hier : 3, /* 13-15 */ 157 reserved : 48; /* 16-63 */ 158 } err_type_info_u; 159 u64 err_type_info; 160} err_type_info_t; 161 162typedef union err_struct_info_u { 163 struct { 164 u64 siv : 1, /* 0 */ 165 c_t : 2, /* 1-2 */ 166 cl_p : 3, /* 3-5 */ 167 cl_id : 3, /* 6-8 */ 168 cl_dp : 1, /* 9 */ 169 reserved1 : 22, /* 10-31 */ 170 tiv : 1, /* 32 */ 171 trigger : 4, /* 33-36 */ 172 trigger_pl : 3, /* 37-39 */ 173 reserved2 : 24; /* 40-63 */ 174 } err_struct_info_cache; 175 struct { 176 u64 siv : 1, /* 0 */ 177 tt : 2, /* 1-2 */ 178 tc_tr : 2, /* 3-4 */ 179 tr_slot : 8, /* 5-12 */ 180 reserved1 : 19, /* 13-31 */ 181 tiv : 1, /* 32 */ 182 trigger : 4, /* 33-36 */ 183 trigger_pl : 3, /* 37-39 */ 184 reserved2 : 24; /* 40-63 */ 185 } err_struct_info_tlb; 186 struct { 187 u64 siv : 1, /* 0 */ 188 regfile_id : 4, /* 1-4 */ 189 reg_num : 7, /* 5-11 */ 190 reserved1 : 20, /* 12-31 */ 191 tiv : 1, /* 32 */ 192 trigger : 4, /* 33-36 */ 193 trigger_pl : 3, /* 37-39 */ 194 reserved2 : 24; /* 40-63 */ 195 } err_struct_info_register; 196 struct { 197 u64 reserved; 198 } err_struct_info_bus_processor_interconnect; 199 u64 err_struct_info; 200} err_struct_info_t; 201 202typedef union err_data_buffer_u { 203 struct { 204 u64 trigger_addr; /* 0-63 */ 205 u64 inj_addr; /* 64-127 */ 206 u64 way : 5, /* 128-132 */ 207 index : 20, /* 133-152 */ 208 : 39; /* 153-191 */ 209 } err_data_buffer_cache; 210 struct { 211 u64 trigger_addr; /* 0-63 */ 212 u64 inj_addr; /* 64-127 */ 213 u64 way : 5, /* 128-132 */ 214 index : 20, /* 133-152 */ 215 reserved : 39; /* 153-191 */ 216 } err_data_buffer_tlb; 217 struct { 218 u64 trigger_addr; /* 0-63 */ 219 } err_data_buffer_register; 220 struct { 221 u64 reserved; /* 0-63 */ 222 } err_data_buffer_bus_processor_interconnect; 223 u64 err_data_buffer[ERR_DATA_BUFFER_SIZE]; 224} err_data_buffer_t; 225 226typedef union capabilities_u { 227 struct { 228 u64 i : 1, 229 d : 1, 230 rv : 1, 231 tag : 1, 232 data : 1, 233 mesi : 1, 234 dp : 1, 235 reserved1 : 3, 236 pa : 1, 237 va : 1, 238 wi : 1, 239 reserved2 : 20, 240 trigger : 1, 241 trigger_pl : 1, 242 reserved3 : 30; 243 } capabilities_cache; 244 struct { 245 u64 d : 1, 246 i : 1, 247 rv : 1, 248 tc : 1, 249 tr : 1, 250 reserved1 : 27, 251 trigger : 1, 252 trigger_pl : 1, 253 reserved2 : 30; 254 } capabilities_tlb; 255 struct { 256 u64 gr_b0 : 1, 257 gr_b1 : 1, 258 fr : 1, 259 br : 1, 260 pr : 1, 261 ar : 1, 262 cr : 1, 263 rr : 1, 264 pkr : 1, 265 dbr : 1, 266 ibr : 1, 267 pmc : 1, 268 pmd : 1, 269 reserved1 : 3, 270 regnum : 1, 271 reserved2 : 15, 272 trigger : 1, 273 trigger_pl : 1, 274 reserved3 : 30; 275 } capabilities_register; 276 struct { 277 u64 reserved; 278 } capabilities_bus_processor_interconnect; 279} capabilities_t; 280 281typedef struct resources_s { 282 u64 ibr0 : 1, 283 ibr2 : 1, 284 ibr4 : 1, 285 ibr6 : 1, 286 dbr0 : 1, 287 dbr2 : 1, 288 dbr4 : 1, 289 dbr6 : 1, 290 reserved : 48; 291} resources_t; 292 293 294long get_page_size(void) 295{ 296 long page_size=sysconf(_SC_PAGESIZE); 297 return page_size; 298} 299 300#define PAGE_SIZE (get_page_size()==-1?0x4000:get_page_size()) 301#define SHM_SIZE (2*PAGE_SIZE*NR_CPUS) 302#define SHM_VA 0x2000000100000000 303 304int shmid; 305void *shmaddr; 306 307int create_shm(void) 308{ 309 key_t key; 310 char fn[MAX_FN_SIZE]; 311 312 /* cpu0 is always existing */ 313 sprintf(fn, PATH_FORMAT, 0); 314 if ((key = ftok(fn, 's')) == -1) { 315 perror("ftok"); 316 return -1; 317 } 318 319 shmid = shmget(key, SHM_SIZE, 0644 | IPC_CREAT); 320 if (shmid == -1) { 321 if (errno==EEXIST) { 322 shmid = shmget(key, SHM_SIZE, 0); 323 if (shmid == -1) { 324 perror("shmget"); 325 return -1; 326 } 327 } 328 else { 329 perror("shmget"); 330 return -1; 331 } 332 } 333 vbprintf("shmid=%d", shmid); 334 335 /* connect to the segment: */ 336 shmaddr = shmat(shmid, (void *)SHM_VA, 0); 337 if (shmaddr == (void*)-1) { 338 perror("shmat"); 339 return -1; 340 } 341 342 memset(shmaddr, 0, SHM_SIZE); 343 mlock(shmaddr, SHM_SIZE); 344 345 return 0; 346} 347 348int free_shm() 349{ 350 munlock(shmaddr, SHM_SIZE); 351 shmdt(shmaddr); 352 semctl(shmid, 0, IPC_RMID); 353 354 return 0; 355} 356 357#ifdef _SEM_SEMUN_UNDEFINED 358union semun 359{ 360 int val; 361 struct semid_ds *buf; 362 unsigned short int *array; 363 struct seminfo *__buf; 364}; 365#endif 366 367u32 mode=1; /* 1: physical mode; 2: virtual mode. */ 368int one_lock=1; 369key_t key[NR_CPUS]; 370int semid[NR_CPUS]; 371 372int create_sem(int cpu) 373{ 374 union semun arg; 375 char fn[MAX_FN_SIZE]; 376 int sid; 377 378 sprintf(fn, PATH_FORMAT, cpu); 379 sprintf(fn, "%s/%s", fn, "err_type_info"); 380 if ((key[cpu] = ftok(fn, 'e')) == -1) { 381 perror("ftok"); 382 return -1; 383 } 384 385 if (semid[cpu]!=0) 386 return 0; 387 388 /* clear old semaphore */ 389 if ((sid = semget(key[cpu], 1, 0)) != -1) 390 semctl(sid, 0, IPC_RMID); 391 392 /* get one semaphore */ 393 if ((semid[cpu] = semget(key[cpu], 1, IPC_CREAT | IPC_EXCL)) == -1) { 394 perror("semget"); 395 printf("Please remove semaphore with key=0x%lx, then run the tool.\n", 396 (u64)key[cpu]); 397 return -1; 398 } 399 400 vbprintf("semid[%d]=0x%lx, key[%d]=%lx\n",cpu,(u64)semid[cpu],cpu, 401 (u64)key[cpu]); 402 /* initialize the semaphore to 1: */ 403 arg.val = 1; 404 if (semctl(semid[cpu], 0, SETVAL, arg) == -1) { 405 perror("semctl"); 406 return -1; 407 } 408 409 return 0; 410} 411 412static int lock(int cpu) 413{ 414 struct sembuf lock; 415 416 lock.sem_num = cpu; 417 lock.sem_op = 1; 418 semop(semid[cpu], &lock, 1); 419 420 return 0; 421} 422 423static int unlock(int cpu) 424{ 425 struct sembuf unlock; 426 427 unlock.sem_num = cpu; 428 unlock.sem_op = -1; 429 semop(semid[cpu], &unlock, 1); 430 431 return 0; 432} 433 434void free_sem(int cpu) 435{ 436 semctl(semid[cpu], 0, IPC_RMID); 437} 438 439int wr_multi(char *fn, unsigned long *data, int size) 440{ 441 int fd; 442 char buf[MAX_BUF_SIZE]; 443 int ret; 444 445 if (size==1) 446 sprintf(buf, "%lx", *data); 447 else if (size==3) 448 sprintf(buf, "%lx,%lx,%lx", data[0], data[1], data[2]); 449 else { 450 fprintf(stderr,"write to file with wrong size!\n"); 451 return -1; 452 } 453 454 fd=open(fn, O_RDWR); 455 if (!fd) { 456 perror("Error:"); 457 return -1; 458 } 459 ret=write(fd, buf, sizeof(buf)); 460 close(fd); 461 return ret; 462} 463 464int wr(char *fn, unsigned long data) 465{ 466 return wr_multi(fn, &data, 1); 467} 468 469int rd(char *fn, unsigned long *data) 470{ 471 int fd; 472 char buf[MAX_BUF_SIZE]; 473 474 fd=open(fn, O_RDONLY); 475 if (fd<0) { 476 perror("Error:"); 477 return -1; 478 } 479 read(fd, buf, MAX_BUF_SIZE); 480 *data=strtoul(buf, NULL, 16); 481 close(fd); 482 return 0; 483} 484 485int rd_status(char *path, int *status) 486{ 487 char fn[MAX_FN_SIZE]; 488 sprintf(fn, "%s/status", path); 489 if (rd(fn, (u64*)status)<0) { 490 perror("status reading error.\n"); 491 return -1; 492 } 493 494 return 0; 495} 496 497int rd_capabilities(char *path, u64 *capabilities) 498{ 499 char fn[MAX_FN_SIZE]; 500 sprintf(fn, "%s/capabilities", path); 501 if (rd(fn, capabilities)<0) { 502 perror("capabilities reading error.\n"); 503 return -1; 504 } 505 506 return 0; 507} 508 509int rd_all(char *path) 510{ 511 unsigned long err_type_info, err_struct_info, err_data_buffer; 512 int status; 513 unsigned long capabilities, resources; 514 char fn[MAX_FN_SIZE]; 515 516 sprintf(fn, "%s/err_type_info", path); 517 if (rd(fn, &err_type_info)<0) { 518 perror("err_type_info reading error.\n"); 519 return -1; 520 } 521 printf("err_type_info=%lx\n", err_type_info); 522 523 sprintf(fn, "%s/err_struct_info", path); 524 if (rd(fn, &err_struct_info)<0) { 525 perror("err_struct_info reading error.\n"); 526 return -1; 527 } 528 printf("err_struct_info=%lx\n", err_struct_info); 529 530 sprintf(fn, "%s/err_data_buffer", path); 531 if (rd(fn, &err_data_buffer)<0) { 532 perror("err_data_buffer reading error.\n"); 533 return -1; 534 } 535 printf("err_data_buffer=%lx\n", err_data_buffer); 536 537 sprintf(fn, "%s/status", path); 538 if (rd("status", (u64*)&status)<0) { 539 perror("status reading error.\n"); 540 return -1; 541 } 542 printf("status=%d\n", status); 543 544 sprintf(fn, "%s/capabilities", path); 545 if (rd(fn,&capabilities)<0) { 546 perror("capabilities reading error.\n"); 547 return -1; 548 } 549 printf("capabilities=%lx\n", capabilities); 550 551 sprintf(fn, "%s/resources", path); 552 if (rd(fn, &resources)<0) { 553 perror("resources reading error.\n"); 554 return -1; 555 } 556 printf("resources=%lx\n", resources); 557 558 return 0; 559} 560 561int query_capabilities(char *path, err_type_info_t err_type_info, 562 u64 *capabilities) 563{ 564 char fn[MAX_FN_SIZE]; 565 err_struct_info_t err_struct_info; 566 err_data_buffer_t err_data_buffer; 567 568 err_struct_info.err_struct_info=0; 569 memset(err_data_buffer.err_data_buffer, -1, ERR_DATA_BUFFER_SIZE*8); 570 571 sprintf(fn, "%s/err_type_info", path); 572 wr(fn, err_type_info.err_type_info); 573 sprintf(fn, "%s/err_struct_info", path); 574 wr(fn, 0x0); 575 sprintf(fn, "%s/err_data_buffer", path); 576 wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE); 577 578 // Fire pal_mc_error_inject procedure. 579 sprintf(fn, "%s/call_start", path); 580 wr(fn, mode); 581 582 if (rd_capabilities(path, capabilities)<0) 583 return -1; 584 585 return 0; 586} 587 588int query_all_capabilities() 589{ 590 int status; 591 err_type_info_t err_type_info; 592 int err_sev, err_struct, struct_hier; 593 int cap=0; 594 u64 capabilities; 595 char path[MAX_FN_SIZE]; 596 597 err_type_info.err_type_info=0; // Initial 598 err_type_info.err_type_info_u.mode=0; // Query mode; 599 err_type_info.err_type_info_u.err_inj=0; 600 601 printf("All capabilities implemented in pal_mc_error_inject:\n"); 602 sprintf(path, PATH_FORMAT ,0); 603 for (err_sev=0;err_sev<3;err_sev++) 604 for (err_struct=0;err_struct<5;err_struct++) 605 for (struct_hier=0;struct_hier<5;struct_hier++) 606 { 607 status=-1; 608 capabilities=0; 609 err_type_info.err_type_info_u.err_sev=err_sev; 610 err_type_info.err_type_info_u.err_struct=err_struct; 611 err_type_info.err_type_info_u.struct_hier=struct_hier; 612 613 if (query_capabilities(path, err_type_info, &capabilities)<0) 614 continue; 615 616 if (rd_status(path, &status)<0) 617 continue; 618 619 if (status==0) { 620 cap=1; 621 printf("For err_sev=%d, err_struct=%d, struct_hier=%d: ", 622 err_sev, err_struct, struct_hier); 623 printf("capabilities 0x%lx\n", capabilities); 624 } 625 } 626 if (!cap) { 627 printf("No capabilities supported.\n"); 628 return 0; 629 } 630 631 return 0; 632} 633 634int err_inject(int cpu, char *path, err_type_info_t err_type_info, 635 err_struct_info_t err_struct_info, 636 err_data_buffer_t err_data_buffer) 637{ 638 int status; 639 char fn[MAX_FN_SIZE]; 640 641 log_info(cpu, "err_type_info=%lx, err_struct_info=%lx, ", 642 err_type_info.err_type_info, 643 err_struct_info.err_struct_info); 644 log_info(cpu,"err_data_buffer=[%lx,%lx,%lx]\n", 645 err_data_buffer.err_data_buffer[0], 646 err_data_buffer.err_data_buffer[1], 647 err_data_buffer.err_data_buffer[2]); 648 sprintf(fn, "%s/err_type_info", path); 649 wr(fn, err_type_info.err_type_info); 650 sprintf(fn, "%s/err_struct_info", path); 651 wr(fn, err_struct_info.err_struct_info); 652 sprintf(fn, "%s/err_data_buffer", path); 653 wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE); 654 655 // Fire pal_mc_error_inject procedure. 656 sprintf(fn, "%s/call_start", path); 657 wr(fn,mode); 658 659 if (rd_status(path, &status)<0) { 660 vbprintf("fail: read status\n"); 661 return -100; 662 } 663 664 if (status!=0) { 665 log_info(cpu, "fail: status=%d\n", status); 666 return status; 667 } 668 669 return status; 670} 671 672static int construct_data_buf(char *path, err_type_info_t err_type_info, 673 err_struct_info_t err_struct_info, 674 err_data_buffer_t *err_data_buffer, 675 void *va1) 676{ 677 char fn[MAX_FN_SIZE]; 678 u64 virt_addr=0, phys_addr=0; 679 680 vbprintf("va1=%lx\n", (u64)va1); 681 memset(&err_data_buffer->err_data_buffer_cache, 0, ERR_DATA_BUFFER_SIZE*8); 682 683 switch (err_type_info.err_type_info_u.err_struct) { 684 case 1: // Cache 685 switch (err_struct_info.err_struct_info_cache.cl_id) { 686 case 1: //Virtual addr 687 err_data_buffer->err_data_buffer_cache.inj_addr=(u64)va1; 688 break; 689 case 2: //Phys addr 690 sprintf(fn, "%s/virtual_to_phys", path); 691 virt_addr=(u64)va1; 692 if (wr(fn,virt_addr)<0) 693 return -1; 694 rd(fn, &phys_addr); 695 err_data_buffer->err_data_buffer_cache.inj_addr=phys_addr; 696 break; 697 default: 698 printf("Not supported cl_id\n"); 699 break; 700 } 701 break; 702 case 2: // TLB 703 break; 704 case 3: // Register file 705 break; 706 case 4: // Bus/system interconnect 707 default: 708 printf("Not supported err_struct\n"); 709 break; 710 } 711 712 return 0; 713} 714 715typedef struct { 716 u64 cpu; 717 u64 loop; 718 u64 interval; 719 u64 err_type_info; 720 u64 err_struct_info; 721 u64 err_data_buffer[ERR_DATA_BUFFER_SIZE]; 722} parameters_t; 723 724parameters_t line_para; 725int para; 726 727static int empty_data_buffer(u64 *err_data_buffer) 728{ 729 int empty=1; 730 int i; 731 732 for (i=0;i<ERR_DATA_BUFFER_SIZE; i++) 733 if (err_data_buffer[i]!=-1) 734 empty=0; 735 736 return empty; 737} 738 739int err_inj() 740{ 741 err_type_info_t err_type_info; 742 err_struct_info_t err_struct_info; 743 err_data_buffer_t err_data_buffer; 744 int count; 745 FILE *fp; 746 unsigned long cpu, loop, interval, err_type_info_conf, err_struct_info_conf; 747 u64 err_data_buffer_conf[ERR_DATA_BUFFER_SIZE]; 748 int num; 749 int i; 750 char path[MAX_FN_SIZE]; 751 parameters_t parameters[MAX_TASK_NUM]={}; 752 pid_t child_pid[MAX_TASK_NUM]; 753 time_t current_time; 754 int status; 755 756 if (!para) { 757 fp=fopen("err.conf", "r"); 758 if (fp==NULL) { 759 perror("Error open err.conf"); 760 return -1; 761 } 762 763 num=0; 764 while (!feof(fp)) { 765 char buf[256]; 766 memset(buf,0,256); 767 fgets(buf, 256, fp); 768 count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n", 769 &cpu, &loop, &interval,&err_type_info_conf, 770 &err_struct_info_conf, 771 &err_data_buffer_conf[0], 772 &err_data_buffer_conf[1], 773 &err_data_buffer_conf[2]); 774 if (count!=PARA_FIELD_NUM+3) { 775 err_data_buffer_conf[0]=-1; 776 err_data_buffer_conf[1]=-1; 777 err_data_buffer_conf[2]=-1; 778 count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx\n", 779 &cpu, &loop, &interval,&err_type_info_conf, 780 &err_struct_info_conf); 781 if (count!=PARA_FIELD_NUM) 782 continue; 783 } 784 785 parameters[num].cpu=cpu; 786 parameters[num].loop=loop; 787 parameters[num].interval= interval>MIN_INTERVAL 788 ?interval:MIN_INTERVAL; 789 parameters[num].err_type_info=err_type_info_conf; 790 parameters[num].err_struct_info=err_struct_info_conf; 791 memcpy(parameters[num++].err_data_buffer, 792 err_data_buffer_conf,ERR_DATA_BUFFER_SIZE*8) ; 793 794 if (num>=MAX_TASK_NUM) 795 break; 796 } 797 } 798 else { 799 parameters[0].cpu=line_para.cpu; 800 parameters[0].loop=line_para.loop; 801 parameters[0].interval= line_para.interval>MIN_INTERVAL 802 ?line_para.interval:MIN_INTERVAL; 803 parameters[0].err_type_info=line_para.err_type_info; 804 parameters[0].err_struct_info=line_para.err_struct_info; 805 memcpy(parameters[0].err_data_buffer, 806 line_para.err_data_buffer,ERR_DATA_BUFFER_SIZE*8) ; 807 808 num=1; 809 } 810 811 /* Create semaphore: If one_lock, one semaphore for all processors. 812 Otherwise, one semaphore for each processor. */ 813 if (one_lock) { 814 if (create_sem(0)) { 815 printf("Can not create semaphore...exit\n"); 816 free_sem(0); 817 return -1; 818 } 819 } 820 else { 821 for (i=0;i<num;i++) { 822 if (create_sem(parameters[i].cpu)) { 823 printf("Can not create semaphore for cpu%d...exit\n",i); 824 free_sem(parameters[num].cpu); 825 return -1; 826 } 827 } 828 } 829 830 /* Create a shm segment which will be used to inject/consume errors on.*/ 831 if (create_shm()==-1) { 832 printf("Error to create shm...exit\n"); 833 return -1; 834 } 835 836 for (i=0;i<num;i++) { 837 pid_t pid; 838 839 current_time=time(NULL); 840 log_info(parameters[i].cpu, "\nBegine at %s", ctime(¤t_time)); 841 log_info(parameters[i].cpu, "Configurations:\n"); 842 log_info(parameters[i].cpu,"On cpu%ld: loop=%lx, interval=%lx(s)", 843 parameters[i].cpu, 844 parameters[i].loop, 845 parameters[i].interval); 846 log_info(parameters[i].cpu," err_type_info=%lx,err_struct_info=%lx\n", 847 parameters[i].err_type_info, 848 parameters[i].err_struct_info); 849 850 sprintf(path, PATH_FORMAT, (int)parameters[i].cpu); 851 err_type_info.err_type_info=parameters[i].err_type_info; 852 err_struct_info.err_struct_info=parameters[i].err_struct_info; 853 memcpy(err_data_buffer.err_data_buffer, 854 parameters[i].err_data_buffer, 855 ERR_DATA_BUFFER_SIZE*8); 856 857 pid=fork(); 858 if (pid==0) { 859 unsigned long mask[MASK_SIZE]; 860 int j, k; 861 862 void *va1, *va2; 863 864 /* Allocate two memory areas va1 and va2 in shm */ 865 va1=shmaddr+parameters[i].cpu*PAGE_SIZE; 866 va2=shmaddr+parameters[i].cpu*PAGE_SIZE+PAGE_SIZE; 867 868 vbprintf("va1=%lx, va2=%lx\n", (u64)va1, (u64)va2); 869 memset(va1, 0x1, PAGE_SIZE); 870 memset(va2, 0x2, PAGE_SIZE); 871 872 if (empty_data_buffer(err_data_buffer.err_data_buffer)) 873 /* If not specified yet, construct data buffer 874 * with va1 875 */ 876 construct_data_buf(path, err_type_info, 877 err_struct_info, &err_data_buffer,va1); 878 879 for (j=0;j<MASK_SIZE;j++) 880 mask[j]=0; 881 882 cpu=parameters[i].cpu; 883 k = cpu%64; 884 j = cpu/64; 885 mask[j] = 1UL << k; 886 887 if (sched_setaffinity(0, MASK_SIZE*8, mask)==-1) { 888 perror("Error sched_setaffinity:"); 889 return -1; 890 } 891 892 for (j=0; j<parameters[i].loop; j++) { 893 log_info(parameters[i].cpu,"Injection "); 894 log_info(parameters[i].cpu,"on cpu%ld: #%d/%ld ", 895 896 parameters[i].cpu,j+1, parameters[i].loop); 897 898 /* Hold the lock */ 899 if (one_lock) 900 lock(0); 901 else 902 /* Hold lock on this cpu */ 903 lock(parameters[i].cpu); 904 905 if ((status=err_inject(parameters[i].cpu, 906 path, err_type_info, 907 err_struct_info, err_data_buffer)) 908 ==0) { 909 /* consume the error for "inject only"*/ 910 memcpy(va2, va1, PAGE_SIZE); 911 memcpy(va1, va2, PAGE_SIZE); 912 log_info(parameters[i].cpu, 913 "successful\n"); 914 } 915 else { 916 log_info(parameters[i].cpu,"fail:"); 917 log_info(parameters[i].cpu, 918 "status=%d\n", status); 919 unlock(parameters[i].cpu); 920 break; 921 } 922 if (one_lock) 923 /* Release the lock */ 924 unlock(0); 925 /* Release lock on this cpu */ 926 else 927 unlock(parameters[i].cpu); 928 929 if (j < parameters[i].loop-1) 930 sleep(parameters[i].interval); 931 } 932 current_time=time(NULL); 933 log_info(parameters[i].cpu, "Done at %s", ctime(¤t_time)); 934 return 0; 935 } 936 else if (pid<0) { 937 perror("Error fork:"); 938 continue; 939 } 940 child_pid[i]=pid; 941 } 942 for (i=0;i<num;i++) 943 waitpid(child_pid[i], NULL, 0); 944 945 if (one_lock) 946 free_sem(0); 947 else 948 for (i=0;i<num;i++) 949 free_sem(parameters[i].cpu); 950 951 printf("All done.\n"); 952 953 return 0; 954} 955 956void help() 957{ 958 printf("err_inject_tool:\n"); 959 printf("\t-q: query all capabilities. default: off\n"); 960 printf("\t-m: procedure mode. 1: physical 2: virtual. default: 1\n"); 961 printf("\t-i: inject errors. default: off\n"); 962 printf("\t-l: one lock per cpu. default: one lock for all\n"); 963 printf("\t-e: error parameters:\n"); 964 printf("\t\tcpu,loop,interval,err_type_info,err_struct_info[,err_data_buffer[0],err_data_buffer[1],err_data_buffer[2]]\n"); 965 printf("\t\t cpu: logical cpu number the error will be inject in.\n"); 966 printf("\t\t loop: times the error will be injected.\n"); 967 printf("\t\t interval: In second. every so often one error is injected.\n"); 968 printf("\t\t err_type_info, err_struct_info: PAL parameters.\n"); 969 printf("\t\t err_data_buffer: PAL parameter. Optional. If not present,\n"); 970 printf("\t\t it's constructed by tool automatically. Be\n"); 971 printf("\t\t careful to provide err_data_buffer and make\n"); 972 printf("\t\t sure it's working with the environment.\n"); 973 printf("\t Note:no space between error parameters.\n"); 974 printf("\t default: Take error parameters from err.conf instead of command line.\n"); 975 printf("\t-v: verbose. default: off\n"); 976 printf("\t-h: help\n\n"); 977 printf("The tool will take err.conf file as "); 978 printf("input to inject single or multiple errors "); 979 printf("on one or multiple cpus in parallel.\n"); 980} 981 982int main(int argc, char **argv) 983{ 984 char c; 985 int do_err_inj=0; 986 int do_query_all=0; 987 int count; 988 u32 m; 989 990 /* Default one lock for all cpu's */ 991 one_lock=1; 992 while ((c = getopt(argc, argv, "m:iqvhle:")) != EOF) 993 switch (c) { 994 case 'm': /* Procedure mode. 1: phys 2: virt */ 995 count=sscanf(optarg, "%x", &m); 996 if (count!=1 || (m!=1 && m!=2)) { 997 printf("Wrong mode number.\n"); 998 help(); 999 return -1; 1000 } 1001 mode=m; 1002 break; 1003 case 'i': /* Inject errors */ 1004 do_err_inj=1; 1005 break; 1006 case 'q': /* Query */ 1007 do_query_all=1; 1008 break; 1009 case 'v': /* Verbose */ 1010 verbose=1; 1011 break; 1012 case 'l': /* One lock per cpu */ 1013 one_lock=0; 1014 break; 1015 case 'e': /* error arguments */ 1016 /* Take parameters: 1017 * #cpu, loop, interval, err_type_info, err_struct_info[, err_data_buffer] 1018 * err_data_buffer is optional. Recommend not to specify 1019 * err_data_buffer. Better to use tool to generate it. 1020 */ 1021 count=sscanf(optarg, 1022 "%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n", 1023 &line_para.cpu, 1024 &line_para.loop, 1025 &line_para.interval, 1026 &line_para.err_type_info, 1027 &line_para.err_struct_info, 1028 &line_para.err_data_buffer[0], 1029 &line_para.err_data_buffer[1], 1030 &line_para.err_data_buffer[2]); 1031 if (count!=PARA_FIELD_NUM+3) { 1032 line_para.err_data_buffer[0]=-1, 1033 line_para.err_data_buffer[1]=-1, 1034 line_para.err_data_buffer[2]=-1; 1035 count=sscanf(optarg, "%lx, %lx, %lx, %lx, %lx\n", 1036 &line_para.cpu, 1037 &line_para.loop, 1038 &line_para.interval, 1039 &line_para.err_type_info, 1040 &line_para.err_struct_info); 1041 if (count!=PARA_FIELD_NUM) { 1042 printf("Wrong error arguments.\n"); 1043 help(); 1044 return -1; 1045 } 1046 } 1047 para=1; 1048 break; 1049 continue; 1050 break; 1051 case 'h': 1052 help(); 1053 return 0; 1054 default: 1055 break; 1056 } 1057 1058 if (do_query_all) 1059 query_all_capabilities(); 1060 if (do_err_inj) 1061 err_inj(); 1062 1063 if (!do_query_all && !do_err_inj) 1064 help(); 1065 1066 return 0; 1067} 1068 1069