1# export-to-postgresql.py: export perf data to a postgresql database
2# Copyright (c) 2014, Intel Corporation.
3#
4# This program is free software; you can redistribute it and/or modify it
5# under the terms and conditions of the GNU General Public License,
6# version 2, as published by the Free Software Foundation.
7#
8# This program is distributed in the hope it will be useful, but WITHOUT
9# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11# more details.
12
13import os
14import sys
15import struct
16import datetime
17
18from PySide.QtSql import *
19
20# Need to access PostgreSQL C library directly to use COPY FROM STDIN
21from ctypes import *
22libpq = CDLL("libpq.so.5")
23PQconnectdb = libpq.PQconnectdb
24PQconnectdb.restype = c_void_p
25PQfinish = libpq.PQfinish
26PQstatus = libpq.PQstatus
27PQexec = libpq.PQexec
28PQexec.restype = c_void_p
29PQresultStatus = libpq.PQresultStatus
30PQputCopyData = libpq.PQputCopyData
31PQputCopyData.argtypes = [ c_void_p, c_void_p, c_int ]
32PQputCopyEnd = libpq.PQputCopyEnd
33PQputCopyEnd.argtypes = [ c_void_p, c_void_p ]
34
35sys.path.append(os.environ['PERF_EXEC_PATH'] + \
36	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
37
38# These perf imports are not used at present
39#from perf_trace_context import *
40#from Core import *
41
42perf_db_export_mode = True
43perf_db_export_calls = False
44
45def usage():
46	print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>]"
47	print >> sys.stderr, "where:	columns		'all' or 'branches'"
48	print >> sys.stderr, "		calls		'calls' => create calls table"
49	raise Exception("Too few arguments")
50
51if (len(sys.argv) < 2):
52	usage()
53
54dbname = sys.argv[1]
55
56if (len(sys.argv) >= 3):
57	columns = sys.argv[2]
58else:
59	columns = "all"
60
61if columns not in ("all", "branches"):
62	usage()
63
64branches = (columns == "branches")
65
66if (len(sys.argv) >= 4):
67	if (sys.argv[3] == "calls"):
68		perf_db_export_calls = True
69	else:
70		usage()
71
72output_dir_name = os.getcwd() + "/" + dbname + "-perf-data"
73os.mkdir(output_dir_name)
74
75def do_query(q, s):
76	if (q.exec_(s)):
77		return
78	raise Exception("Query failed: " + q.lastError().text())
79
80print datetime.datetime.today(), "Creating database..."
81
82db = QSqlDatabase.addDatabase('QPSQL')
83query = QSqlQuery(db)
84db.setDatabaseName('postgres')
85db.open()
86try:
87	do_query(query, 'CREATE DATABASE ' + dbname)
88except:
89	os.rmdir(output_dir_name)
90	raise
91query.finish()
92query.clear()
93db.close()
94
95db.setDatabaseName(dbname)
96db.open()
97
98query = QSqlQuery(db)
99do_query(query, 'SET client_min_messages TO WARNING')
100
101do_query(query, 'CREATE TABLE selected_events ('
102		'id		bigint		NOT NULL,'
103		'name		varchar(80))')
104do_query(query, 'CREATE TABLE machines ('
105		'id		bigint		NOT NULL,'
106		'pid		integer,'
107		'root_dir 	varchar(4096))')
108do_query(query, 'CREATE TABLE threads ('
109		'id		bigint		NOT NULL,'
110		'machine_id	bigint,'
111		'process_id	bigint,'
112		'pid		integer,'
113		'tid		integer)')
114do_query(query, 'CREATE TABLE comms ('
115		'id		bigint		NOT NULL,'
116		'comm		varchar(16))')
117do_query(query, 'CREATE TABLE comm_threads ('
118		'id		bigint		NOT NULL,'
119		'comm_id	bigint,'
120		'thread_id	bigint)')
121do_query(query, 'CREATE TABLE dsos ('
122		'id		bigint		NOT NULL,'
123		'machine_id	bigint,'
124		'short_name	varchar(256),'
125		'long_name	varchar(4096),'
126		'build_id	varchar(64))')
127do_query(query, 'CREATE TABLE symbols ('
128		'id		bigint		NOT NULL,'
129		'dso_id		bigint,'
130		'sym_start	bigint,'
131		'sym_end	bigint,'
132		'binding	integer,'
133		'name		varchar(2048))')
134do_query(query, 'CREATE TABLE branch_types ('
135		'id		integer		NOT NULL,'
136		'name		varchar(80))')
137
138if branches:
139	do_query(query, 'CREATE TABLE samples ('
140		'id		bigint		NOT NULL,'
141		'evsel_id	bigint,'
142		'machine_id	bigint,'
143		'thread_id	bigint,'
144		'comm_id	bigint,'
145		'dso_id		bigint,'
146		'symbol_id	bigint,'
147		'sym_offset	bigint,'
148		'ip		bigint,'
149		'time		bigint,'
150		'cpu		integer,'
151		'to_dso_id	bigint,'
152		'to_symbol_id	bigint,'
153		'to_sym_offset	bigint,'
154		'to_ip		bigint,'
155		'branch_type	integer,'
156		'in_tx		boolean)')
157else:
158	do_query(query, 'CREATE TABLE samples ('
159		'id		bigint		NOT NULL,'
160		'evsel_id	bigint,'
161		'machine_id	bigint,'
162		'thread_id	bigint,'
163		'comm_id	bigint,'
164		'dso_id		bigint,'
165		'symbol_id	bigint,'
166		'sym_offset	bigint,'
167		'ip		bigint,'
168		'time		bigint,'
169		'cpu		integer,'
170		'to_dso_id	bigint,'
171		'to_symbol_id	bigint,'
172		'to_sym_offset	bigint,'
173		'to_ip		bigint,'
174		'period		bigint,'
175		'weight		bigint,'
176		'transaction	bigint,'
177		'data_src	bigint,'
178		'branch_type	integer,'
179		'in_tx		boolean)')
180
181if perf_db_export_calls:
182	do_query(query, 'CREATE TABLE call_paths ('
183		'id		bigint		NOT NULL,'
184		'parent_id	bigint,'
185		'symbol_id	bigint,'
186		'ip		bigint)')
187	do_query(query, 'CREATE TABLE calls ('
188		'id		bigint		NOT NULL,'
189		'thread_id	bigint,'
190		'comm_id	bigint,'
191		'call_path_id	bigint,'
192		'call_time	bigint,'
193		'return_time	bigint,'
194		'branch_count	bigint,'
195		'call_id	bigint,'
196		'return_id	bigint,'
197		'parent_call_path_id	bigint,'
198		'flags		integer)')
199
200do_query(query, 'CREATE VIEW samples_view AS '
201	'SELECT '
202		'id,'
203		'time,'
204		'cpu,'
205		'(SELECT pid FROM threads WHERE id = thread_id) AS pid,'
206		'(SELECT tid FROM threads WHERE id = thread_id) AS tid,'
207		'(SELECT comm FROM comms WHERE id = comm_id) AS command,'
208		'(SELECT name FROM selected_events WHERE id = evsel_id) AS event,'
209		'to_hex(ip) AS ip_hex,'
210		'(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,'
211		'sym_offset,'
212		'(SELECT short_name FROM dsos WHERE id = dso_id) AS dso_short_name,'
213		'to_hex(to_ip) AS to_ip_hex,'
214		'(SELECT name FROM symbols WHERE id = to_symbol_id) AS to_symbol,'
215		'to_sym_offset,'
216		'(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,'
217		'(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,'
218		'in_tx'
219	' FROM samples')
220
221
222file_header = struct.pack("!11sii", "PGCOPY\n\377\r\n\0", 0, 0)
223file_trailer = "\377\377"
224
225def open_output_file(file_name):
226	path_name = output_dir_name + "/" + file_name
227	file = open(path_name, "w+")
228	file.write(file_header)
229	return file
230
231def close_output_file(file):
232	file.write(file_trailer)
233	file.close()
234
235def copy_output_file_direct(file, table_name):
236	close_output_file(file)
237	sql = "COPY " + table_name + " FROM '" + file.name + "' (FORMAT 'binary')"
238	do_query(query, sql)
239
240# Use COPY FROM STDIN because security may prevent postgres from accessing the files directly
241def copy_output_file(file, table_name):
242	conn = PQconnectdb("dbname = " + dbname)
243	if (PQstatus(conn)):
244		raise Exception("COPY FROM STDIN PQconnectdb failed")
245	file.write(file_trailer)
246	file.seek(0)
247	sql = "COPY " + table_name + " FROM STDIN (FORMAT 'binary')"
248	res = PQexec(conn, sql)
249	if (PQresultStatus(res) != 4):
250		raise Exception("COPY FROM STDIN PQexec failed")
251	data = file.read(65536)
252	while (len(data)):
253		ret = PQputCopyData(conn, data, len(data))
254		if (ret != 1):
255			raise Exception("COPY FROM STDIN PQputCopyData failed, error " + str(ret))
256		data = file.read(65536)
257	ret = PQputCopyEnd(conn, None)
258	if (ret != 1):
259		raise Exception("COPY FROM STDIN PQputCopyEnd failed, error " + str(ret))
260	PQfinish(conn)
261
262def remove_output_file(file):
263	name = file.name
264	file.close()
265	os.unlink(name)
266
267evsel_file		= open_output_file("evsel_table.bin")
268machine_file		= open_output_file("machine_table.bin")
269thread_file		= open_output_file("thread_table.bin")
270comm_file		= open_output_file("comm_table.bin")
271comm_thread_file	= open_output_file("comm_thread_table.bin")
272dso_file		= open_output_file("dso_table.bin")
273symbol_file		= open_output_file("symbol_table.bin")
274branch_type_file	= open_output_file("branch_type_table.bin")
275sample_file		= open_output_file("sample_table.bin")
276if perf_db_export_calls:
277	call_path_file		= open_output_file("call_path_table.bin")
278	call_file		= open_output_file("call_table.bin")
279
280def trace_begin():
281	print datetime.datetime.today(), "Writing to intermediate files..."
282	# id == 0 means unknown.  It is easier to create records for them than replace the zeroes with NULLs
283	evsel_table(0, "unknown")
284	machine_table(0, 0, "unknown")
285	thread_table(0, 0, 0, -1, -1)
286	comm_table(0, "unknown")
287	dso_table(0, 0, "unknown", "unknown", "")
288	symbol_table(0, 0, 0, 0, 0, "unknown")
289	sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
290	if perf_db_export_calls:
291		call_path_table(0, 0, 0, 0)
292
293unhandled_count = 0
294
295def trace_end():
296	print datetime.datetime.today(), "Copying to database..."
297	copy_output_file(evsel_file,		"selected_events")
298	copy_output_file(machine_file,		"machines")
299	copy_output_file(thread_file,		"threads")
300	copy_output_file(comm_file,		"comms")
301	copy_output_file(comm_thread_file,	"comm_threads")
302	copy_output_file(dso_file,		"dsos")
303	copy_output_file(symbol_file,		"symbols")
304	copy_output_file(branch_type_file,	"branch_types")
305	copy_output_file(sample_file,		"samples")
306	if perf_db_export_calls:
307		copy_output_file(call_path_file,	"call_paths")
308		copy_output_file(call_file,		"calls")
309
310	print datetime.datetime.today(), "Removing intermediate files..."
311	remove_output_file(evsel_file)
312	remove_output_file(machine_file)
313	remove_output_file(thread_file)
314	remove_output_file(comm_file)
315	remove_output_file(comm_thread_file)
316	remove_output_file(dso_file)
317	remove_output_file(symbol_file)
318	remove_output_file(branch_type_file)
319	remove_output_file(sample_file)
320	if perf_db_export_calls:
321		remove_output_file(call_path_file)
322		remove_output_file(call_file)
323	os.rmdir(output_dir_name)
324	print datetime.datetime.today(), "Adding primary keys"
325	do_query(query, 'ALTER TABLE selected_events ADD PRIMARY KEY (id)')
326	do_query(query, 'ALTER TABLE machines        ADD PRIMARY KEY (id)')
327	do_query(query, 'ALTER TABLE threads         ADD PRIMARY KEY (id)')
328	do_query(query, 'ALTER TABLE comms           ADD PRIMARY KEY (id)')
329	do_query(query, 'ALTER TABLE comm_threads    ADD PRIMARY KEY (id)')
330	do_query(query, 'ALTER TABLE dsos            ADD PRIMARY KEY (id)')
331	do_query(query, 'ALTER TABLE symbols         ADD PRIMARY KEY (id)')
332	do_query(query, 'ALTER TABLE branch_types    ADD PRIMARY KEY (id)')
333	do_query(query, 'ALTER TABLE samples         ADD PRIMARY KEY (id)')
334	if perf_db_export_calls:
335		do_query(query, 'ALTER TABLE call_paths      ADD PRIMARY KEY (id)')
336		do_query(query, 'ALTER TABLE calls           ADD PRIMARY KEY (id)')
337
338	print datetime.datetime.today(), "Adding foreign keys"
339	do_query(query, 'ALTER TABLE threads '
340					'ADD CONSTRAINT machinefk  FOREIGN KEY (machine_id)   REFERENCES machines   (id),'
341					'ADD CONSTRAINT processfk  FOREIGN KEY (process_id)   REFERENCES threads    (id)')
342	do_query(query, 'ALTER TABLE comm_threads '
343					'ADD CONSTRAINT commfk     FOREIGN KEY (comm_id)      REFERENCES comms      (id),'
344					'ADD CONSTRAINT threadfk   FOREIGN KEY (thread_id)    REFERENCES threads    (id)')
345	do_query(query, 'ALTER TABLE dsos '
346					'ADD CONSTRAINT machinefk  FOREIGN KEY (machine_id)   REFERENCES machines   (id)')
347	do_query(query, 'ALTER TABLE symbols '
348					'ADD CONSTRAINT dsofk      FOREIGN KEY (dso_id)       REFERENCES dsos       (id)')
349	do_query(query, 'ALTER TABLE samples '
350					'ADD CONSTRAINT evselfk    FOREIGN KEY (evsel_id)     REFERENCES selected_events (id),'
351					'ADD CONSTRAINT machinefk  FOREIGN KEY (machine_id)   REFERENCES machines   (id),'
352					'ADD CONSTRAINT threadfk   FOREIGN KEY (thread_id)    REFERENCES threads    (id),'
353					'ADD CONSTRAINT commfk     FOREIGN KEY (comm_id)      REFERENCES comms      (id),'
354					'ADD CONSTRAINT dsofk      FOREIGN KEY (dso_id)       REFERENCES dsos       (id),'
355					'ADD CONSTRAINT symbolfk   FOREIGN KEY (symbol_id)    REFERENCES symbols    (id),'
356					'ADD CONSTRAINT todsofk    FOREIGN KEY (to_dso_id)    REFERENCES dsos       (id),'
357					'ADD CONSTRAINT tosymbolfk FOREIGN KEY (to_symbol_id) REFERENCES symbols    (id)')
358	if perf_db_export_calls:
359		do_query(query, 'ALTER TABLE call_paths '
360					'ADD CONSTRAINT parentfk    FOREIGN KEY (parent_id)    REFERENCES call_paths (id),'
361					'ADD CONSTRAINT symbolfk    FOREIGN KEY (symbol_id)    REFERENCES symbols    (id)')
362		do_query(query, 'ALTER TABLE calls '
363					'ADD CONSTRAINT threadfk    FOREIGN KEY (thread_id)    REFERENCES threads    (id),'
364					'ADD CONSTRAINT commfk      FOREIGN KEY (comm_id)      REFERENCES comms      (id),'
365					'ADD CONSTRAINT call_pathfk FOREIGN KEY (call_path_id) REFERENCES call_paths (id),'
366					'ADD CONSTRAINT callfk      FOREIGN KEY (call_id)      REFERENCES samples    (id),'
367					'ADD CONSTRAINT returnfk    FOREIGN KEY (return_id)    REFERENCES samples    (id),'
368					'ADD CONSTRAINT parent_call_pathfk FOREIGN KEY (parent_call_path_id) REFERENCES call_paths (id)')
369		do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)')
370
371	if (unhandled_count):
372		print datetime.datetime.today(), "Warning: ", unhandled_count, " unhandled events"
373	print datetime.datetime.today(), "Done"
374
375def trace_unhandled(event_name, context, event_fields_dict):
376	global unhandled_count
377	unhandled_count += 1
378
379def sched__sched_switch(*x):
380	pass
381
382def evsel_table(evsel_id, evsel_name, *x):
383	n = len(evsel_name)
384	fmt = "!hiqi" + str(n) + "s"
385	value = struct.pack(fmt, 2, 8, evsel_id, n, evsel_name)
386	evsel_file.write(value)
387
388def machine_table(machine_id, pid, root_dir, *x):
389	n = len(root_dir)
390	fmt = "!hiqiii" + str(n) + "s"
391	value = struct.pack(fmt, 3, 8, machine_id, 4, pid, n, root_dir)
392	machine_file.write(value)
393
394def thread_table(thread_id, machine_id, process_id, pid, tid, *x):
395	value = struct.pack("!hiqiqiqiiii", 5, 8, thread_id, 8, machine_id, 8, process_id, 4, pid, 4, tid)
396	thread_file.write(value)
397
398def comm_table(comm_id, comm_str, *x):
399	n = len(comm_str)
400	fmt = "!hiqi" + str(n) + "s"
401	value = struct.pack(fmt, 2, 8, comm_id, n, comm_str)
402	comm_file.write(value)
403
404def comm_thread_table(comm_thread_id, comm_id, thread_id, *x):
405	fmt = "!hiqiqiq"
406	value = struct.pack(fmt, 3, 8, comm_thread_id, 8, comm_id, 8, thread_id)
407	comm_thread_file.write(value)
408
409def dso_table(dso_id, machine_id, short_name, long_name, build_id, *x):
410	n1 = len(short_name)
411	n2 = len(long_name)
412	n3 = len(build_id)
413	fmt = "!hiqiqi" + str(n1) + "si"  + str(n2) + "si" + str(n3) + "s"
414	value = struct.pack(fmt, 5, 8, dso_id, 8, machine_id, n1, short_name, n2, long_name, n3, build_id)
415	dso_file.write(value)
416
417def symbol_table(symbol_id, dso_id, sym_start, sym_end, binding, symbol_name, *x):
418	n = len(symbol_name)
419	fmt = "!hiqiqiqiqiii" + str(n) + "s"
420	value = struct.pack(fmt, 6, 8, symbol_id, 8, dso_id, 8, sym_start, 8, sym_end, 4, binding, n, symbol_name)
421	symbol_file.write(value)
422
423def branch_type_table(branch_type, name, *x):
424	n = len(name)
425	fmt = "!hiii" + str(n) + "s"
426	value = struct.pack(fmt, 2, 4, branch_type, n, name)
427	branch_type_file.write(value)
428
429def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, *x):
430	if branches:
431		value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiB", 17, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx)
432	else:
433		value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiB", 21, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx)
434	sample_file.write(value)
435
436def call_path_table(cp_id, parent_id, symbol_id, ip, *x):
437	fmt = "!hiqiqiqiq"
438	value = struct.pack(fmt, 4, 8, cp_id, 8, parent_id, 8, symbol_id, 8, ip)
439	call_path_file.write(value)
440
441def call_return_table(cr_id, thread_id, comm_id, call_path_id, call_time, return_time, branch_count, call_id, return_id, parent_call_path_id, flags, *x):
442	fmt = "!hiqiqiqiqiqiqiqiqiqiqii"
443	value = struct.pack(fmt, 11, 8, cr_id, 8, thread_id, 8, comm_id, 8, call_path_id, 8, call_time, 8, return_time, 8, branch_count, 8, call_id, 8, return_id, 8, parent_call_path_id, 4, flags)
444	call_file.write(value)
445