1/*
2 * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
3 *
4 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation;
8 * version 2.1 of the License (not later!)
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
20 */
21#include <stdio.h>
22#include <stdlib.h>
23#include <string.h>
24
25#include "kbuffer.h"
26
27#define MISSING_EVENTS (1 << 31)
28#define MISSING_STORED (1 << 30)
29
30#define COMMIT_MASK ((1 << 27) - 1)
31
32enum {
33	KBUFFER_FL_HOST_BIG_ENDIAN	= (1<<0),
34	KBUFFER_FL_BIG_ENDIAN		= (1<<1),
35	KBUFFER_FL_LONG_8		= (1<<2),
36	KBUFFER_FL_OLD_FORMAT		= (1<<3),
37};
38
39#define ENDIAN_MASK (KBUFFER_FL_HOST_BIG_ENDIAN | KBUFFER_FL_BIG_ENDIAN)
40
41/** kbuffer
42 * @timestamp		- timestamp of current event
43 * @lost_events		- # of lost events between this subbuffer and previous
44 * @flags		- special flags of the kbuffer
45 * @subbuffer		- pointer to the sub-buffer page
46 * @data		- pointer to the start of data on the sub-buffer page
47 * @index		- index from @data to the @curr event data
48 * @curr		- offset from @data to the start of current event
49 *			   (includes metadata)
50 * @next		- offset from @data to the start of next event
51 * @size		- The size of data on @data
52 * @start		- The offset from @subbuffer where @data lives
53 *
54 * @read_4		- Function to read 4 raw bytes (may swap)
55 * @read_8		- Function to read 8 raw bytes (may swap)
56 * @read_long		- Function to read a long word (4 or 8 bytes with needed swap)
57 */
58struct kbuffer {
59	unsigned long long 	timestamp;
60	long long		lost_events;
61	unsigned long		flags;
62	void			*subbuffer;
63	void			*data;
64	unsigned int		index;
65	unsigned int		curr;
66	unsigned int		next;
67	unsigned int		size;
68	unsigned int		start;
69
70	unsigned int (*read_4)(void *ptr);
71	unsigned long long (*read_8)(void *ptr);
72	unsigned long long (*read_long)(struct kbuffer *kbuf, void *ptr);
73	int (*next_event)(struct kbuffer *kbuf);
74};
75
76static void *zmalloc(size_t size)
77{
78	return calloc(1, size);
79}
80
81static int host_is_bigendian(void)
82{
83	unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 };
84	unsigned int *ptr;
85
86	ptr = (unsigned int *)str;
87	return *ptr == 0x01020304;
88}
89
90static int do_swap(struct kbuffer *kbuf)
91{
92	return ((kbuf->flags & KBUFFER_FL_HOST_BIG_ENDIAN) + kbuf->flags) &
93		ENDIAN_MASK;
94}
95
96static unsigned long long __read_8(void *ptr)
97{
98	unsigned long long data = *(unsigned long long *)ptr;
99
100	return data;
101}
102
103static unsigned long long __read_8_sw(void *ptr)
104{
105	unsigned long long data = *(unsigned long long *)ptr;
106	unsigned long long swap;
107
108	swap = ((data & 0xffULL) << 56) |
109		((data & (0xffULL << 8)) << 40) |
110		((data & (0xffULL << 16)) << 24) |
111		((data & (0xffULL << 24)) << 8) |
112		((data & (0xffULL << 32)) >> 8) |
113		((data & (0xffULL << 40)) >> 24) |
114		((data & (0xffULL << 48)) >> 40) |
115		((data & (0xffULL << 56)) >> 56);
116
117	return swap;
118}
119
120static unsigned int __read_4(void *ptr)
121{
122	unsigned int data = *(unsigned int *)ptr;
123
124	return data;
125}
126
127static unsigned int __read_4_sw(void *ptr)
128{
129	unsigned int data = *(unsigned int *)ptr;
130	unsigned int swap;
131
132	swap = ((data & 0xffULL) << 24) |
133		((data & (0xffULL << 8)) << 8) |
134		((data & (0xffULL << 16)) >> 8) |
135		((data & (0xffULL << 24)) >> 24);
136
137	return swap;
138}
139
140static unsigned long long read_8(struct kbuffer *kbuf, void *ptr)
141{
142	return kbuf->read_8(ptr);
143}
144
145static unsigned int read_4(struct kbuffer *kbuf, void *ptr)
146{
147	return kbuf->read_4(ptr);
148}
149
150static unsigned long long __read_long_8(struct kbuffer *kbuf, void *ptr)
151{
152	return kbuf->read_8(ptr);
153}
154
155static unsigned long long __read_long_4(struct kbuffer *kbuf, void *ptr)
156{
157	return kbuf->read_4(ptr);
158}
159
160static unsigned long long read_long(struct kbuffer *kbuf, void *ptr)
161{
162	return kbuf->read_long(kbuf, ptr);
163}
164
165static int calc_index(struct kbuffer *kbuf, void *ptr)
166{
167	return (unsigned long)ptr - (unsigned long)kbuf->data;
168}
169
170static int __next_event(struct kbuffer *kbuf);
171
172/**
173 * kbuffer_alloc - allocat a new kbuffer
174 * @size;	enum to denote size of word
175 * @endian:	enum to denote endianness
176 *
177 * Allocates and returns a new kbuffer.
178 */
179struct kbuffer *
180kbuffer_alloc(enum kbuffer_long_size size, enum kbuffer_endian endian)
181{
182	struct kbuffer *kbuf;
183	int flags = 0;
184
185	switch (size) {
186	case KBUFFER_LSIZE_4:
187		break;
188	case KBUFFER_LSIZE_8:
189		flags |= KBUFFER_FL_LONG_8;
190		break;
191	default:
192		return NULL;
193	}
194
195	switch (endian) {
196	case KBUFFER_ENDIAN_LITTLE:
197		break;
198	case KBUFFER_ENDIAN_BIG:
199		flags |= KBUFFER_FL_BIG_ENDIAN;
200		break;
201	default:
202		return NULL;
203	}
204
205	kbuf = zmalloc(sizeof(*kbuf));
206	if (!kbuf)
207		return NULL;
208
209	kbuf->flags = flags;
210
211	if (host_is_bigendian())
212		kbuf->flags |= KBUFFER_FL_HOST_BIG_ENDIAN;
213
214	if (do_swap(kbuf)) {
215		kbuf->read_8 = __read_8_sw;
216		kbuf->read_4 = __read_4_sw;
217	} else {
218		kbuf->read_8 = __read_8;
219		kbuf->read_4 = __read_4;
220	}
221
222	if (kbuf->flags & KBUFFER_FL_LONG_8)
223		kbuf->read_long = __read_long_8;
224	else
225		kbuf->read_long = __read_long_4;
226
227	/* May be changed by kbuffer_set_old_format() */
228	kbuf->next_event = __next_event;
229
230	return kbuf;
231}
232
233/** kbuffer_free - free an allocated kbuffer
234 * @kbuf:	The kbuffer to free
235 *
236 * Can take NULL as a parameter.
237 */
238void kbuffer_free(struct kbuffer *kbuf)
239{
240	free(kbuf);
241}
242
243static unsigned int type4host(struct kbuffer *kbuf,
244			      unsigned int type_len_ts)
245{
246	if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
247		return (type_len_ts >> 29) & 3;
248	else
249		return type_len_ts & 3;
250}
251
252static unsigned int len4host(struct kbuffer *kbuf,
253			     unsigned int type_len_ts)
254{
255	if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
256		return (type_len_ts >> 27) & 7;
257	else
258		return (type_len_ts >> 2) & 7;
259}
260
261static unsigned int type_len4host(struct kbuffer *kbuf,
262				  unsigned int type_len_ts)
263{
264	if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
265		return (type_len_ts >> 27) & ((1 << 5) - 1);
266	else
267		return type_len_ts & ((1 << 5) - 1);
268}
269
270static unsigned int ts4host(struct kbuffer *kbuf,
271			    unsigned int type_len_ts)
272{
273	if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
274		return type_len_ts & ((1 << 27) - 1);
275	else
276		return type_len_ts >> 5;
277}
278
279/*
280 * Linux 2.6.30 and earlier (not much ealier) had a different
281 * ring buffer format. It should be obsolete, but we handle it anyway.
282 */
283enum old_ring_buffer_type {
284	OLD_RINGBUF_TYPE_PADDING,
285	OLD_RINGBUF_TYPE_TIME_EXTEND,
286	OLD_RINGBUF_TYPE_TIME_STAMP,
287	OLD_RINGBUF_TYPE_DATA,
288};
289
290static unsigned int old_update_pointers(struct kbuffer *kbuf)
291{
292	unsigned long long extend;
293	unsigned int type_len_ts;
294	unsigned int type;
295	unsigned int len;
296	unsigned int delta;
297	unsigned int length;
298	void *ptr = kbuf->data + kbuf->curr;
299
300	type_len_ts = read_4(kbuf, ptr);
301	ptr += 4;
302
303	type = type4host(kbuf, type_len_ts);
304	len = len4host(kbuf, type_len_ts);
305	delta = ts4host(kbuf, type_len_ts);
306
307	switch (type) {
308	case OLD_RINGBUF_TYPE_PADDING:
309		kbuf->next = kbuf->size;
310		return 0;
311
312	case OLD_RINGBUF_TYPE_TIME_EXTEND:
313		extend = read_4(kbuf, ptr);
314		extend <<= TS_SHIFT;
315		extend += delta;
316		delta = extend;
317		ptr += 4;
318		break;
319
320	case OLD_RINGBUF_TYPE_TIME_STAMP:
321		/* should never happen! */
322		kbuf->curr = kbuf->size;
323		kbuf->next = kbuf->size;
324		kbuf->index = kbuf->size;
325		return -1;
326	default:
327		if (len)
328			length = len * 4;
329		else {
330			length = read_4(kbuf, ptr);
331			length -= 4;
332			ptr += 4;
333		}
334		break;
335	}
336
337	kbuf->timestamp += delta;
338	kbuf->index = calc_index(kbuf, ptr);
339	kbuf->next = kbuf->index + length;
340
341	return type;
342}
343
344static int __old_next_event(struct kbuffer *kbuf)
345{
346	int type;
347
348	do {
349		kbuf->curr = kbuf->next;
350		if (kbuf->next >= kbuf->size)
351			return -1;
352		type = old_update_pointers(kbuf);
353	} while (type == OLD_RINGBUF_TYPE_TIME_EXTEND || type == OLD_RINGBUF_TYPE_PADDING);
354
355	return 0;
356}
357
358static unsigned int
359translate_data(struct kbuffer *kbuf, void *data, void **rptr,
360	       unsigned long long *delta, int *length)
361{
362	unsigned long long extend;
363	unsigned int type_len_ts;
364	unsigned int type_len;
365
366	type_len_ts = read_4(kbuf, data);
367	data += 4;
368
369	type_len = type_len4host(kbuf, type_len_ts);
370	*delta = ts4host(kbuf, type_len_ts);
371
372	switch (type_len) {
373	case KBUFFER_TYPE_PADDING:
374		*length = read_4(kbuf, data);
375		break;
376
377	case KBUFFER_TYPE_TIME_EXTEND:
378		extend = read_4(kbuf, data);
379		data += 4;
380		extend <<= TS_SHIFT;
381		extend += *delta;
382		*delta = extend;
383		*length = 0;
384		break;
385
386	case KBUFFER_TYPE_TIME_STAMP:
387		data += 12;
388		*length = 0;
389		break;
390	case 0:
391		*length = read_4(kbuf, data) - 4;
392		*length = (*length + 3) & ~3;
393		data += 4;
394		break;
395	default:
396		*length = type_len * 4;
397		break;
398	}
399
400	*rptr = data;
401
402	return type_len;
403}
404
405static unsigned int update_pointers(struct kbuffer *kbuf)
406{
407	unsigned long long delta;
408	unsigned int type_len;
409	int length;
410	void *ptr = kbuf->data + kbuf->curr;
411
412	type_len = translate_data(kbuf, ptr, &ptr, &delta, &length);
413
414	kbuf->timestamp += delta;
415	kbuf->index = calc_index(kbuf, ptr);
416	kbuf->next = kbuf->index + length;
417
418	return type_len;
419}
420
421/**
422 * kbuffer_translate_data - read raw data to get a record
423 * @swap:	Set to 1 if bytes in words need to be swapped when read
424 * @data:	The raw data to read
425 * @size:	Address to store the size of the event data.
426 *
427 * Returns a pointer to the event data. To determine the entire
428 * record size (record metadata + data) just add the difference between
429 * @data and the returned value to @size.
430 */
431void *kbuffer_translate_data(int swap, void *data, unsigned int *size)
432{
433	unsigned long long delta;
434	struct kbuffer kbuf;
435	int type_len;
436	int length;
437	void *ptr;
438
439	if (swap) {
440		kbuf.read_8 = __read_8_sw;
441		kbuf.read_4 = __read_4_sw;
442		kbuf.flags = host_is_bigendian() ? 0 : KBUFFER_FL_BIG_ENDIAN;
443	} else {
444		kbuf.read_8 = __read_8;
445		kbuf.read_4 = __read_4;
446		kbuf.flags = host_is_bigendian() ? KBUFFER_FL_BIG_ENDIAN: 0;
447	}
448
449	type_len = translate_data(&kbuf, data, &ptr, &delta, &length);
450	switch (type_len) {
451	case KBUFFER_TYPE_PADDING:
452	case KBUFFER_TYPE_TIME_EXTEND:
453	case KBUFFER_TYPE_TIME_STAMP:
454		return NULL;
455	};
456
457	*size = length;
458
459	return ptr;
460}
461
462static int __next_event(struct kbuffer *kbuf)
463{
464	int type;
465
466	do {
467		kbuf->curr = kbuf->next;
468		if (kbuf->next >= kbuf->size)
469			return -1;
470		type = update_pointers(kbuf);
471	} while (type == KBUFFER_TYPE_TIME_EXTEND || type == KBUFFER_TYPE_PADDING);
472
473	return 0;
474}
475
476static int next_event(struct kbuffer *kbuf)
477{
478	return kbuf->next_event(kbuf);
479}
480
481/**
482 * kbuffer_next_event - increment the current pointer
483 * @kbuf:	The kbuffer to read
484 * @ts:		Address to store the next record's timestamp (may be NULL to ignore)
485 *
486 * Increments the pointers into the subbuffer of the kbuffer to point to the
487 * next event so that the next kbuffer_read_event() will return a
488 * new event.
489 *
490 * Returns the data of the next event if a new event exists on the subbuffer,
491 * NULL otherwise.
492 */
493void *kbuffer_next_event(struct kbuffer *kbuf, unsigned long long *ts)
494{
495	int ret;
496
497	if (!kbuf || !kbuf->subbuffer)
498		return NULL;
499
500	ret = next_event(kbuf);
501	if (ret < 0)
502		return NULL;
503
504	if (ts)
505		*ts = kbuf->timestamp;
506
507	return kbuf->data + kbuf->index;
508}
509
510/**
511 * kbuffer_load_subbuffer - load a new subbuffer into the kbuffer
512 * @kbuf:	The kbuffer to load
513 * @subbuffer:	The subbuffer to load into @kbuf.
514 *
515 * Load a new subbuffer (page) into @kbuf. This will reset all
516 * the pointers and update the @kbuf timestamp. The next read will
517 * return the first event on @subbuffer.
518 *
519 * Returns 0 on succes, -1 otherwise.
520 */
521int kbuffer_load_subbuffer(struct kbuffer *kbuf, void *subbuffer)
522{
523	unsigned long long flags;
524	void *ptr = subbuffer;
525
526	if (!kbuf || !subbuffer)
527		return -1;
528
529	kbuf->subbuffer = subbuffer;
530
531	kbuf->timestamp = read_8(kbuf, ptr);
532	ptr += 8;
533
534	kbuf->curr = 0;
535
536	if (kbuf->flags & KBUFFER_FL_LONG_8)
537		kbuf->start = 16;
538	else
539		kbuf->start = 12;
540
541	kbuf->data = subbuffer + kbuf->start;
542
543	flags = read_long(kbuf, ptr);
544	kbuf->size = (unsigned int)flags & COMMIT_MASK;
545
546	if (flags & MISSING_EVENTS) {
547		if (flags & MISSING_STORED) {
548			ptr = kbuf->data + kbuf->size;
549			kbuf->lost_events = read_long(kbuf, ptr);
550		} else
551			kbuf->lost_events = -1;
552	} else
553		kbuf->lost_events = 0;
554
555	kbuf->index = 0;
556	kbuf->next = 0;
557
558	next_event(kbuf);
559
560	return 0;
561}
562
563/**
564 * kbuffer_read_event - read the next event in the kbuffer subbuffer
565 * @kbuf:	The kbuffer to read from
566 * @ts:		The address to store the timestamp of the event (may be NULL to ignore)
567 *
568 * Returns a pointer to the data part of the current event.
569 * NULL if no event is left on the subbuffer.
570 */
571void *kbuffer_read_event(struct kbuffer *kbuf, unsigned long long *ts)
572{
573	if (!kbuf || !kbuf->subbuffer)
574		return NULL;
575
576	if (kbuf->curr >= kbuf->size)
577		return NULL;
578
579	if (ts)
580		*ts = kbuf->timestamp;
581	return kbuf->data + kbuf->index;
582}
583
584/**
585 * kbuffer_timestamp - Return the timestamp of the current event
586 * @kbuf:	The kbuffer to read from
587 *
588 * Returns the timestamp of the current (next) event.
589 */
590unsigned long long kbuffer_timestamp(struct kbuffer *kbuf)
591{
592	return kbuf->timestamp;
593}
594
595/**
596 * kbuffer_read_at_offset - read the event that is at offset
597 * @kbuf:	The kbuffer to read from
598 * @offset:	The offset into the subbuffer
599 * @ts:		The address to store the timestamp of the event (may be NULL to ignore)
600 *
601 * The @offset must be an index from the @kbuf subbuffer beginning.
602 * If @offset is bigger than the stored subbuffer, NULL will be returned.
603 *
604 * Returns the data of the record that is at @offset. Note, @offset does
605 * not need to be the start of the record, the offset just needs to be
606 * in the record (or beginning of it).
607 *
608 * Note, the kbuf timestamp and pointers are updated to the
609 * returned record. That is, kbuffer_read_event() will return the same
610 * data and timestamp, and kbuffer_next_event() will increment from
611 * this record.
612 */
613void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset,
614			     unsigned long long *ts)
615{
616	void *data;
617
618	if (offset < kbuf->start)
619		offset = 0;
620	else
621		offset -= kbuf->start;
622
623	/* Reset the buffer */
624	kbuffer_load_subbuffer(kbuf, kbuf->subbuffer);
625
626	while (kbuf->curr < offset) {
627		data = kbuffer_next_event(kbuf, ts);
628		if (!data)
629			break;
630	}
631
632	return data;
633}
634
635/**
636 * kbuffer_subbuffer_size - the size of the loaded subbuffer
637 * @kbuf:	The kbuffer to read from
638 *
639 * Returns the size of the subbuffer. Note, this size is
640 * where the last event resides. The stored subbuffer may actually be
641 * bigger due to padding and such.
642 */
643int kbuffer_subbuffer_size(struct kbuffer *kbuf)
644{
645	return kbuf->size;
646}
647
648/**
649 * kbuffer_curr_index - Return the index of the record
650 * @kbuf:	The kbuffer to read from
651 *
652 * Returns the index from the start of the data part of
653 * the subbuffer to the current location. Note this is not
654 * from the start of the subbuffer. An index of zero will
655 * point to the first record. Use kbuffer_curr_offset() for
656 * the actually offset (that can be used by kbuffer_read_at_offset())
657 */
658int kbuffer_curr_index(struct kbuffer *kbuf)
659{
660	return kbuf->curr;
661}
662
663/**
664 * kbuffer_curr_offset - Return the offset of the record
665 * @kbuf:	The kbuffer to read from
666 *
667 * Returns the offset from the start of the subbuffer to the
668 * current location.
669 */
670int kbuffer_curr_offset(struct kbuffer *kbuf)
671{
672	return kbuf->curr + kbuf->start;
673}
674
675/**
676 * kbuffer_event_size - return the size of the event data
677 * @kbuf:	The kbuffer to read
678 *
679 * Returns the size of the event data (the payload not counting
680 * the meta data of the record) of the current event.
681 */
682int kbuffer_event_size(struct kbuffer *kbuf)
683{
684	return kbuf->next - kbuf->index;
685}
686
687/**
688 * kbuffer_curr_size - return the size of the entire record
689 * @kbuf:	The kbuffer to read
690 *
691 * Returns the size of the entire record (meta data and payload)
692 * of the current event.
693 */
694int kbuffer_curr_size(struct kbuffer *kbuf)
695{
696	return kbuf->next - kbuf->curr;
697}
698
699/**
700 * kbuffer_missed_events - return the # of missed events from last event.
701 * @kbuf: 	The kbuffer to read from
702 *
703 * Returns the # of missed events (if recorded) before the current
704 * event. Note, only events on the beginning of a subbuffer can
705 * have missed events, all other events within the buffer will be
706 * zero.
707 */
708int kbuffer_missed_events(struct kbuffer *kbuf)
709{
710	/* Only the first event can have missed events */
711	if (kbuf->curr)
712		return 0;
713
714	return kbuf->lost_events;
715}
716
717/**
718 * kbuffer_set_old_forma - set the kbuffer to use the old format parsing
719 * @kbuf:	The kbuffer to set
720 *
721 * This is obsolete (or should be). The first kernels to use the
722 * new ring buffer had a slightly different ring buffer format
723 * (2.6.30 and earlier). It is still somewhat supported by kbuffer,
724 * but should not be counted on in the future.
725 */
726void kbuffer_set_old_format(struct kbuffer *kbuf)
727{
728	kbuf->flags |= KBUFFER_FL_OLD_FORMAT;
729
730	kbuf->next_event = __old_next_event;
731}
732
733/**
734 * kbuffer_start_of_data - return offset of where data starts on subbuffer
735 * @kbuf:	The kbuffer
736 *
737 * Returns the location on the subbuffer where the data starts.
738 */
739int kbuffer_start_of_data(struct kbuffer *kbuf)
740{
741	return kbuf->start;
742}
743