1 #ifndef _TOOLS_LINUX_RING_BUFFER_H_ 2 #define _TOOLS_LINUX_RING_BUFFER_H_ 3 4 #include <asm/barrier.h> 5 #include <linux/perf_event.h> 6 7 /* 8 * Contract with kernel for walking the perf ring buffer from 9 * user space requires the following barrier pairing (quote 10 * from kernel/events/ring_buffer.c): 11 * 12 * Since the mmap() consumer (userspace) can run on a 13 * different CPU: 14 * 15 * kernel user 16 * 17 * if (LOAD ->data_tail) { LOAD ->data_head 18 * (A) smp_rmb() (C) 19 * STORE $data LOAD $data 20 * smp_wmb() (B) smp_mb() (D) 21 * STORE ->data_head STORE ->data_tail 22 * } 23 * 24 * Where A pairs with D, and B pairs with C. 25 * 26 * In our case A is a control dependency that separates the 27 * load of the ->data_tail and the stores of $data. In case 28 * ->data_tail indicates there is no room in the buffer to 29 * store $data we do not. 30 * 31 * D needs to be a full barrier since it separates the data 32 * READ from the tail WRITE. 33 * 34 * For B a WMB is sufficient since it separates two WRITEs, 35 * and for C an RMB is sufficient since it separates two READs. 36 * 37 * Note, instead of B, C, D we could also use smp_store_release() 38 * in B and D as well as smp_load_acquire() in C. 39 * 40 * However, this optimization does not make sense for all kernel 41 * supported architectures since for a fair number it would 42 * resolve into READ_ONCE() + smp_mb() pair for smp_load_acquire(), 43 * and smp_mb() + WRITE_ONCE() pair for smp_store_release(). 44 * 45 * Thus for those smp_wmb() in B and smp_rmb() in C would still 46 * be less expensive. For the case of D this has either the same 47 * cost or is less expensive, for example, due to TSO x86 can 48 * avoid the CPU barrier entirely. 49 */ 50 51 static inline u64 ring_buffer_read_head(struct perf_event_mmap_page *base) 52 { 53 /* 54 * Architectures where smp_load_acquire() does not fallback to 55 * READ_ONCE() + smp_mb() pair. 56 */ 57 #if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \ 58 defined(__ia64__) || defined(__sparc__) && defined(__arch64__) 59 return smp_load_acquire(&base->data_head); 60 #else 61 u64 head = READ_ONCE(base->data_head); 62 63 smp_rmb(); 64 return head; 65 #endif 66 } 67 68 static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base, 69 u64 tail) 70 { 71 smp_store_release(&base->data_tail, tail); 72 } 73 74 #endif /* _TOOLS_LINUX_RING_BUFFER_H_ */