This source file includes following definitions.
- dcbf
- err_msg
- compute_chunk_start_addr
- compute_word_offset
- compute_store_pattern
- extract_tid
- extract_word_offset
- extract_sweep_id
- start_verification_log
- log_anamoly
- end_verification_log
- verify_chunk
- set_pthread_cpu
- set_mycpu
- segv_handler
- set_segv_handler
- rim_fn
- mem_snapshot_fn
- alrm_sighandler
- main
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 #define _GNU_SOURCE
19 #include <stdio.h>
20 #include <sys/mman.h>
21 #include <sys/types.h>
22 #include <sys/wait.h>
23 #include <sys/ipc.h>
24 #include <sys/shm.h>
25 #include <sys/stat.h>
26 #include <sys/time.h>
27 #include <linux/futex.h>
28 #include <unistd.h>
29 #include <asm/unistd.h>
30 #include <string.h>
31 #include <stdlib.h>
32 #include <fcntl.h>
33 #include <sched.h>
34 #include <time.h>
35 #include <stdarg.h>
36 #include <sched.h>
37 #include <pthread.h>
38 #include <signal.h>
39 #include <sys/prctl.h>
40
41 static inline void dcbf(volatile unsigned int *addr)
42 {
43 __asm__ __volatile__ ("dcbf %y0; sync" : : "Z"(*(unsigned char *)addr) : "memory");
44 }
45
46 static void err_msg(char *msg)
47 {
48
49 time_t now;
50 time(&now);
51 printf("=================================\n");
52 printf(" Error: %s\n", msg);
53 printf(" %s", ctime(&now));
54 printf("=================================\n");
55 exit(1);
56 }
57
58 static char *map1;
59 static char *map2;
60 static pid_t rim_process_pid;
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82 static volatile unsigned int corruption_found;
83
84
85
86
87
88
89
90
91
92
93 #define MAX_THREADS 64
94 #define THREAD_ID_BITS 8
95 #define THREAD_ID_MASK ((1 << THREAD_ID_BITS) - 1)
96 static unsigned int rim_thread_ids[MAX_THREADS];
97 static pthread_t rim_threads[MAX_THREADS];
98
99
100
101
102
103
104
105
106
107
108
109 #define RIM_CHUNK_SIZE 1024
110 #define BITS_PER_BYTE 8
111 #define WORD_SIZE (sizeof(unsigned int))
112 #define WORD_BITS (WORD_SIZE * BITS_PER_BYTE)
113 #define WORDS_PER_CHUNK (RIM_CHUNK_SIZE/WORD_SIZE)
114
115 static inline char *compute_chunk_start_addr(unsigned int thread_id)
116 {
117 char *chunk_start;
118
119 chunk_start = (char *)((unsigned long)map1 +
120 (thread_id * RIM_CHUNK_SIZE));
121
122 return chunk_start;
123 }
124
125
126
127
128
129
130
131
132
133 #define WORD_OFFSET_BITS (__builtin_ctz(WORDS_PER_CHUNK))
134 #define WORD_OFFSET_MASK ((1 << WORD_OFFSET_BITS) - 1)
135
136 static inline unsigned int compute_word_offset(char *start, unsigned int *addr)
137 {
138 unsigned int delta_bytes, ret;
139 delta_bytes = (unsigned long)addr - (unsigned long)start;
140
141 ret = delta_bytes/WORD_SIZE;
142
143 return ret;
144 }
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159 #define SWEEP_ID_BITS (WORD_BITS - (THREAD_ID_BITS + WORD_OFFSET_BITS))
160 #define SWEEP_ID_MASK ((1 << SWEEP_ID_BITS) - 1)
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194 #define SWEEP_ID_SHIFT 0
195 #define WORD_OFFSET_SHIFT (SWEEP_ID_BITS)
196 #define THREAD_ID_SHIFT (WORD_OFFSET_BITS + SWEEP_ID_BITS)
197
198
199
200
201
202 static inline unsigned int compute_store_pattern(unsigned int tid,
203 unsigned int *addr,
204 unsigned int sweep_id)
205 {
206 unsigned int ret = 0;
207 char *start = compute_chunk_start_addr(tid);
208 unsigned int word_offset = compute_word_offset(start, addr);
209
210 ret += (tid & THREAD_ID_MASK) << THREAD_ID_SHIFT;
211 ret += (word_offset & WORD_OFFSET_MASK) << WORD_OFFSET_SHIFT;
212 ret += (sweep_id & SWEEP_ID_MASK) << SWEEP_ID_SHIFT;
213 return ret;
214 }
215
216
217 static inline unsigned int extract_tid(unsigned int pattern)
218 {
219 unsigned int ret;
220
221 ret = (pattern >> THREAD_ID_SHIFT) & THREAD_ID_MASK;
222 return ret;
223 }
224
225
226 static inline unsigned int extract_word_offset(unsigned int pattern)
227 {
228 unsigned int ret;
229
230 ret = (pattern >> WORD_OFFSET_SHIFT) & WORD_OFFSET_MASK;
231
232 return ret;
233 }
234
235
236 static inline unsigned int extract_sweep_id(unsigned int pattern)
237
238 {
239 unsigned int ret;
240
241 ret = (pattern >> SWEEP_ID_SHIFT) & SWEEP_ID_MASK;
242
243 return ret;
244 }
245
246
247
248
249
250
251 #define LOGDIR_NAME_SIZE 100
252 static char logdir[LOGDIR_NAME_SIZE];
253
254 static FILE *fp[MAX_THREADS];
255 static const char logfilename[] ="Thread-%02d-Chunk";
256
257 static inline void start_verification_log(unsigned int tid,
258 unsigned int *addr,
259 unsigned int cur_sweep_id,
260 unsigned int prev_sweep_id)
261 {
262 FILE *f;
263 char logfile[30];
264 char path[LOGDIR_NAME_SIZE + 30];
265 char separator[2] = "/";
266 char *chunk_start = compute_chunk_start_addr(tid);
267 unsigned int size = RIM_CHUNK_SIZE;
268
269 sprintf(logfile, logfilename, tid);
270 strcpy(path, logdir);
271 strcat(path, separator);
272 strcat(path, logfile);
273 f = fopen(path, "w");
274
275 if (!f) {
276 err_msg("Unable to create logfile\n");
277 }
278
279 fp[tid] = f;
280
281 fprintf(f, "----------------------------------------------------------\n");
282 fprintf(f, "PID = %d\n", rim_process_pid);
283 fprintf(f, "Thread id = %02d\n", tid);
284 fprintf(f, "Chunk Start Addr = 0x%016lx\n", (unsigned long)chunk_start);
285 fprintf(f, "Chunk Size = %d\n", size);
286 fprintf(f, "Next Store Addr = 0x%016lx\n", (unsigned long)addr);
287 fprintf(f, "Current sweep-id = 0x%08x\n", cur_sweep_id);
288 fprintf(f, "Previous sweep-id = 0x%08x\n", prev_sweep_id);
289 fprintf(f, "----------------------------------------------------------\n");
290 }
291
292 static inline void log_anamoly(unsigned int tid, unsigned int *addr,
293 unsigned int expected, unsigned int observed)
294 {
295 FILE *f = fp[tid];
296
297 fprintf(f, "Thread %02d: Addr 0x%lx: Expected 0x%x, Observed 0x%x\n",
298 tid, (unsigned long)addr, expected, observed);
299 fprintf(f, "Thread %02d: Expected Thread id = %02d\n", tid, extract_tid(expected));
300 fprintf(f, "Thread %02d: Observed Thread id = %02d\n", tid, extract_tid(observed));
301 fprintf(f, "Thread %02d: Expected Word offset = %03d\n", tid, extract_word_offset(expected));
302 fprintf(f, "Thread %02d: Observed Word offset = %03d\n", tid, extract_word_offset(observed));
303 fprintf(f, "Thread %02d: Expected sweep-id = 0x%x\n", tid, extract_sweep_id(expected));
304 fprintf(f, "Thread %02d: Observed sweep-id = 0x%x\n", tid, extract_sweep_id(observed));
305 fprintf(f, "----------------------------------------------------------\n");
306 }
307
308 static inline void end_verification_log(unsigned int tid, unsigned nr_anamolies)
309 {
310 FILE *f = fp[tid];
311 char logfile[30];
312 char path[LOGDIR_NAME_SIZE + 30];
313 char separator[] = "/";
314
315 fclose(f);
316
317 if (nr_anamolies == 0) {
318 remove(path);
319 return;
320 }
321
322 sprintf(logfile, logfilename, tid);
323 strcpy(path, logdir);
324 strcat(path, separator);
325 strcat(path, logfile);
326
327 printf("Thread %02d chunk has %d corrupted words. For details check %s\n",
328 tid, nr_anamolies, path);
329 }
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358 static void verify_chunk(unsigned int tid, unsigned int *next_store_addr,
359 unsigned int cur_sweep_id,
360 unsigned int prev_sweep_id)
361 {
362 unsigned int *iter_ptr;
363 unsigned int size = RIM_CHUNK_SIZE;
364 unsigned int expected;
365 unsigned int observed;
366 char *chunk_start = compute_chunk_start_addr(tid);
367
368 int nr_anamolies = 0;
369
370 start_verification_log(tid, next_store_addr,
371 cur_sweep_id, prev_sweep_id);
372
373 for (iter_ptr = (unsigned int *)chunk_start;
374 (unsigned long)iter_ptr < (unsigned long)chunk_start + size;
375 iter_ptr++) {
376 unsigned int expected_sweep_id;
377
378 if (iter_ptr < next_store_addr) {
379 expected_sweep_id = cur_sweep_id;
380 } else {
381 expected_sweep_id = prev_sweep_id;
382 }
383
384 expected = compute_store_pattern(tid, iter_ptr, expected_sweep_id);
385
386 dcbf((volatile unsigned int*)iter_ptr);
387 observed = *iter_ptr;
388
389 if (observed != expected) {
390 nr_anamolies++;
391 log_anamoly(tid, iter_ptr, expected, observed);
392 }
393 }
394
395 end_verification_log(tid, nr_anamolies);
396 }
397
398 static void set_pthread_cpu(pthread_t th, int cpu)
399 {
400 cpu_set_t run_cpu_mask;
401 struct sched_param param;
402
403 CPU_ZERO(&run_cpu_mask);
404 CPU_SET(cpu, &run_cpu_mask);
405 pthread_setaffinity_np(th, sizeof(cpu_set_t), &run_cpu_mask);
406
407 param.sched_priority = 1;
408 if (0 && sched_setscheduler(0, SCHED_FIFO, ¶m) == -1) {
409
410 fprintf(stderr, "could not set SCHED_FIFO, run as root?\n");
411 }
412 }
413
414 static void set_mycpu(int cpu)
415 {
416 cpu_set_t run_cpu_mask;
417 struct sched_param param;
418
419 CPU_ZERO(&run_cpu_mask);
420 CPU_SET(cpu, &run_cpu_mask);
421 sched_setaffinity(0, sizeof(cpu_set_t), &run_cpu_mask);
422
423 param.sched_priority = 1;
424 if (0 && sched_setscheduler(0, SCHED_FIFO, ¶m) == -1) {
425 fprintf(stderr, "could not set SCHED_FIFO, run as root?\n");
426 }
427 }
428
429 static volatile int segv_wait;
430
431 static void segv_handler(int signo, siginfo_t *info, void *extra)
432 {
433 while (segv_wait) {
434 sched_yield();
435 }
436
437 }
438
439 static void set_segv_handler(void)
440 {
441 struct sigaction sa;
442
443 sa.sa_flags = SA_SIGINFO;
444 sa.sa_sigaction = segv_handler;
445
446 if (sigaction(SIGSEGV, &sa, NULL) == -1) {
447 perror("sigaction");
448 exit(EXIT_FAILURE);
449 }
450 }
451
452 int timeout = 0;
453
454
455
456
457
458
459 static void *rim_fn(void *arg)
460 {
461 unsigned int tid = *((unsigned int *)arg);
462
463 int size = RIM_CHUNK_SIZE;
464 char *chunk_start = compute_chunk_start_addr(tid);
465
466 unsigned int prev_sweep_id;
467 unsigned int cur_sweep_id = 0;
468
469
470 unsigned int pattern = cur_sweep_id;
471 unsigned int *pattern_ptr = &pattern;
472 unsigned int *w_ptr, read_data;
473
474 set_segv_handler();
475
476
477
478
479
480
481
482
483
484
485 for (w_ptr = (unsigned int *)chunk_start;
486 (unsigned long)w_ptr < (unsigned long)(chunk_start) + size;
487 w_ptr++) {
488
489 *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id);
490 *w_ptr = *pattern_ptr;
491 }
492
493 while (!corruption_found && !timeout) {
494 prev_sweep_id = cur_sweep_id;
495 cur_sweep_id = cur_sweep_id + 1;
496
497 for (w_ptr = (unsigned int *)chunk_start;
498 (unsigned long)w_ptr < (unsigned long)(chunk_start) + size;
499 w_ptr++) {
500 unsigned int old_pattern;
501
502
503
504
505
506
507 old_pattern = compute_store_pattern(tid, w_ptr, prev_sweep_id);
508
509
510
511
512
513 dcbf((volatile unsigned int*)w_ptr);
514
515
516 read_data = *w_ptr;
517
518
519
520
521
522 if (read_data != old_pattern) {
523
524 corruption_found = 1;
525 }
526
527
528
529
530
531 if (corruption_found || timeout) {
532
533
534
535
536
537
538
539
540 verify_chunk(tid, w_ptr, cur_sweep_id, prev_sweep_id);
541
542 return 0;
543 }
544
545
546
547
548
549 *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id);
550
551
552
553
554
555 *w_ptr = *pattern_ptr;
556 }
557 }
558
559 return NULL;
560 }
561
562
563 static unsigned long start_cpu = 0;
564 static unsigned long nrthreads = 4;
565
566 static pthread_t mem_snapshot_thread;
567
568 static void *mem_snapshot_fn(void *arg)
569 {
570 int page_size = getpagesize();
571 size_t size = page_size;
572 void *tmp = malloc(size);
573
574 while (!corruption_found && !timeout) {
575
576 segv_wait = 1;
577
578 mprotect(map1, size, PROT_READ);
579
580
581
582
583
584 memcpy(tmp, map1, size);
585
586
587
588
589
590
591
592 memcpy(map2, tmp, size);
593
594
595
596
597 asm volatile("sync" ::: "memory");
598 mprotect(map1, size, PROT_READ|PROT_WRITE);
599 asm volatile("sync" ::: "memory");
600 segv_wait = 0;
601
602 usleep(1);
603 }
604
605 return 0;
606 }
607
608 void alrm_sighandler(int sig)
609 {
610 timeout = 1;
611 }
612
613 int main(int argc, char *argv[])
614 {
615 int c;
616 int page_size = getpagesize();
617 time_t now;
618 int i, dir_error;
619 pthread_attr_t attr;
620 key_t shm_key = (key_t) getpid();
621 int shmid, run_time = 20 * 60;
622 struct sigaction sa_alrm;
623
624 snprintf(logdir, LOGDIR_NAME_SIZE,
625 "/tmp/logdir-%u", (unsigned int)getpid());
626 while ((c = getopt(argc, argv, "r:hn:l:t:")) != -1) {
627 switch(c) {
628 case 'r':
629 start_cpu = strtoul(optarg, NULL, 10);
630 break;
631 case 'h':
632 printf("%s [-r <start_cpu>] [-n <nrthreads>] [-l <logdir>] [-t <timeout>]\n", argv[0]);
633 exit(0);
634 break;
635 case 'n':
636 nrthreads = strtoul(optarg, NULL, 10);
637 break;
638 case 'l':
639 strncpy(logdir, optarg, LOGDIR_NAME_SIZE - 1);
640 break;
641 case 't':
642 run_time = strtoul(optarg, NULL, 10);
643 break;
644 default:
645 printf("invalid option\n");
646 exit(0);
647 break;
648 }
649 }
650
651 if (nrthreads > MAX_THREADS)
652 nrthreads = MAX_THREADS;
653
654 shmid = shmget(shm_key, page_size, IPC_CREAT|0666);
655 if (shmid < 0) {
656 err_msg("Failed shmget\n");
657 }
658
659 map1 = shmat(shmid, NULL, 0);
660 if (map1 == (void *) -1) {
661 err_msg("Failed shmat");
662 }
663
664 map2 = shmat(shmid, NULL, 0);
665 if (map2 == (void *) -1) {
666 err_msg("Failed shmat");
667 }
668
669 dir_error = mkdir(logdir, 0755);
670
671 if (dir_error) {
672 err_msg("Failed mkdir");
673 }
674
675 printf("start_cpu list:%lu\n", start_cpu);
676 printf("number of worker threads:%lu + 1 snapshot thread\n", nrthreads);
677 printf("Allocated address:0x%016lx + secondary map:0x%016lx\n", (unsigned long)map1, (unsigned long)map2);
678 printf("logdir at : %s\n", logdir);
679 printf("Timeout: %d seconds\n", run_time);
680
681 time(&now);
682 printf("=================================\n");
683 printf(" Starting Test\n");
684 printf(" %s", ctime(&now));
685 printf("=================================\n");
686
687 for (i = 0; i < nrthreads; i++) {
688 if (1 && !fork()) {
689 prctl(PR_SET_PDEATHSIG, SIGKILL);
690 set_mycpu(start_cpu + i);
691 for (;;)
692 sched_yield();
693 exit(0);
694 }
695 }
696
697
698 sa_alrm.sa_handler = &alrm_sighandler;
699 sigemptyset(&sa_alrm.sa_mask);
700 sa_alrm.sa_flags = 0;
701
702 if (sigaction(SIGALRM, &sa_alrm, 0) == -1) {
703 err_msg("Failed signal handler registration\n");
704 }
705
706 alarm(run_time);
707
708 pthread_attr_init(&attr);
709 for (i = 0; i < nrthreads; i++) {
710 rim_thread_ids[i] = i;
711 pthread_create(&rim_threads[i], &attr, rim_fn, &rim_thread_ids[i]);
712 set_pthread_cpu(rim_threads[i], start_cpu + i);
713 }
714
715 pthread_create(&mem_snapshot_thread, &attr, mem_snapshot_fn, map1);
716 set_pthread_cpu(mem_snapshot_thread, start_cpu + i);
717
718
719 pthread_join(mem_snapshot_thread, NULL);
720 for (i = 0; i < nrthreads; i++) {
721 pthread_join(rim_threads[i], NULL);
722 }
723
724 if (!timeout) {
725 time(&now);
726 printf("=================================\n");
727 printf(" Data Corruption Detected\n");
728 printf(" %s", ctime(&now));
729 printf(" See logfiles in %s\n", logdir);
730 printf("=================================\n");
731 return 1;
732 }
733 return 0;
734 }