Skip to content
This repository was archived by the owner on Jan 10, 2023. It is now read-only.

Commit a6493b2

Browse files
committed
Call-graph from samples, initial pprof support
PProf importer Call graphs generation from samples Kernel stacks collection of Mac Better java detection Suspending the execution to make collectors time to ramp up (Mac and Linux) Better process naming on Mac More effective handling of ITT strings Multi-counter support for other formats
1 parent 848d255 commit a6493b2

File tree

15 files changed

+1277
-78
lines changed

15 files changed

+1277
-78
lines changed

InstrumentationExample.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ void workerthread(int data)
131131

132132
if (rand() % 5 == 1)
133133
{
134-
ITT_SCOPE_TRACK(nullptr, "GPU");
134+
ITT_SCOPE_TRACK(nullptr, "USER_SCOPE");
135135
unsigned long long end = TClock::now().time_since_epoch().count();
136136
unsigned long long length = end - start;
137137
__itt_task_begin_ex(g_domain, clock_domain, start + length / 4, __itt_null, id, handle_gpu);

buildall.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,11 @@ def enum_nodes(curpath, level):
107107
return enum_nodes(path, depth)
108108

109109

110+
def locate_exact(what):
111+
items = subprocess.check_output(['locate', what]).decode("utf-8").split('\n')
112+
return [item for item in items if item.endswith(what)]
113+
114+
110115
def GetJDKPath():
111116
if sys.platform == 'win32':
112117
bush = read_registry(r'HKLM\SOFTWARE\JavaSoft\Java Development Kit')
@@ -119,8 +124,21 @@ def GetJDKPath():
119124
if err or not path:
120125
return None
121126
if sys.platform == 'darwin':
122-
path = subprocess.check_output("/usr/libexec/java_home").decode("utf-8").split('\n')[0]
123-
return path if os.path.exists(path) else None
127+
javacs = locate_exact('javac')
128+
if not javacs:
129+
return None
130+
jnis = locate_exact('jni.h')
131+
if jnis:
132+
longest = {'prefix': '', 'jni': '', 'java': ''}
133+
for jni in jnis:
134+
for java in javacs:
135+
prefix = os.path.commonprefix([jni, java])
136+
if len(prefix) > len(longest['prefix']):
137+
longest = {'prefix': prefix, 'jni': jni, 'java': java}
138+
return longest['prefix'].rstrip('/')
139+
else:
140+
path = subprocess.check_output("/usr/libexec/java_home").decode("utf-8").split('\n')[0]
141+
return path if os.path.exists(path) else None
124142
else:
125143
matches = []
126144
for root, dirnames, filenames in os.walk('/usr/lib/jvm'):

main.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -271,9 +271,9 @@ int main(int argc, char* argv[])
271271
const char* api_ver = __itt_api_version();
272272
VerbosePrint("ITT Version: %s\n", api_ver ? api_ver : "Not loaded");
273273

274-
std::thread thrd(ChangePaths);
274+
//std::thread thrd(ChangePaths); //only for stress testing
275275
Main(work_seconds);
276-
thrd.join();
276+
//thrd.join();
277277
return 0;
278278
}
279279

runtool/collectors/osx.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,14 @@
2929
"""
3030

3131
OFF_CPU_STACKS = r"""
32-
printf("%x\tstack\t%x\t%x:", machtimestamp, pid, tid);
32+
printf("%x\tkstack\t%x\t%x:", machtimestamp, pid, tid);
33+
stack();
34+
printf("\n%x\tustack\t%x\t%x:", machtimestamp, pid, tid);
3335
ustack();
36+
/*
37+
printf("\n%x\tjstack\t%x\t%x:", machtimestamp, pid, tid);
38+
jstack(); //TODO: enable better support for jstack-s
39+
*/
3440
printf("\n");
3541
"""
3642

@@ -70,7 +76,9 @@
7076
7177
"""
7278

79+
7380
class DTraceCollector(Collector):
81+
7482
def __init__(self, args):
7583
Collector.__init__(self, args)
7684
self.pid = None
@@ -142,6 +150,7 @@ def start(self):
142150
('bufresize', 'auto'),
143151
('bufsize', '%dm' % (self.args.ring * 10))
144152
]))
153+
145154
dtrace_script.append(dtrace_context_switch)
146155
dtrace_script.append(self.gen_gpu_hooks(probes))
147156

@@ -157,7 +166,7 @@ def start(self):
157166
with open(script, 'w') as file:
158167
file.write(dtrace_script)
159168

160-
proc = subprocess.Popen(cmd, shell=True, stdin=None, stdout=sys.stdout, stderr=sys.stderr, env=os.environ)
169+
proc = subprocess.Popen(cmd, shell=True, stdin=None, stdout=self.output, stderr=self.output, env=os.environ)
161170
self.pid = proc.pid
162171
self.log(cmd)
163172
self.log("pid: %d" % proc.pid)

runtool/decoders/SteamVR.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,12 @@ def finish_task(self, events, call_data, data, id):
126126
call_data.update({'str': data, 'type': 0})
127127
call_data['time'] = self.parser.convert_time(events[id])
128128
end_data['type'] = 1
129-
self.callbacks.complete_task('task', call_data, end_data)
129+
130+
lane_task = self.callbacks.process(call_data['pid']).\
131+
thread(call_data['tid']).lane(call_data['str'], call_data['domain']).\
132+
frame_begin(call_data['time'], call_data['str'])
133+
lane_task.end(end_data['time'])
134+
130135
del events[id]
131136

132137
@classmethod

runtool/exporters/ChromeTracing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class GoogleTrace(TaskCombiner):
1818
class ContextSwitch:
1919
def __init__(self, parent, file):
2020
self.parent = parent
21-
self.file = file
21+
self.file = file + '.ftrace'
2222
self.ftrace = None
2323

2424
def write(self, time, cpu, prev_tid, prev_state, next_tid, prev_prio=0, next_prio=0, prev_name=None, next_name=None):

runtool/exporters/QtCreatorProfiler.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,10 @@ def complete_task(self, type, begin, end):
7070

7171
args = {}
7272
if type == "counter":
73-
args['value'] = begin['delta']
73+
if 'delta' in begin:
74+
args['value'] = begin['delta']
75+
else: # TODO: add multi-value support
76+
return
7477
if begin.has_key('args'):
7578
args = begin['args']
7679
if end.has_key('args'):

runtool/importers/etw.py

+2
Original file line numberDiff line numberDiff line change
@@ -876,6 +876,8 @@ def on_event(self, system, data, info):
876876
if 'Info' in opcode: return
877877
call_data['id'] = int(data['Handle'], 16)
878878
"""
879+
elif call_data['str'] in ['AdapterAllocation', 'DeviceAllocation']: # Microsoft-Windows-DxgKrnl
880+
return # TODO: it might have important information for building memory charts
879881
else:
880882
if 'Start' in opcode or 'Stop' in opcode:
881883
call_data['id'] = hash(call_data['str'])

runtool/importers/osx.py

+31-9
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,17 @@ def __init__(self, args, gt, callbacks):
1717
self.gpu_transition = {}
1818
self.gpu_frame = {'catch': [0, 0], 'task': None}
1919
self.prepares = {}
20+
self.pid_names = {}
21+
self.tid_map = {}
2022
for callback in self.callbacks.callbacks:
2123
if 'ContextSwitch' in dir(callback):
2224
self.cs = callback.ContextSwitch(callback, args.input + '.ftrace')
2325
callback("metadata_add", {'domain': 'GPU', 'str': '__process__', 'pid': -1, 'tid': -1, 'data': 'GPU Engines', 'time': 0, 'delta': -2})
2426

27+
def add_tid_name(self, tid, name):
28+
if tid in self.tid_map:
29+
self.pid_names[self.tid_map[tid]] = name
30+
2531
def handle_record(self, time, cmd, args):
2632
if cmd == 'off':
2733
if not self.cs or not self.callbacks.check_time_in_limits(time):
@@ -33,13 +39,16 @@ def handle_record(self, time, cmd, args):
3339
prev_tid = '0'
3440
if next_prio == '0' and next_name == 'kernel_task':
3541
next_tid = '0'
36-
42+
prev_tid = int(prev_tid, 16)
43+
next_tid = int(next_tid, 16)
3744
self.cs.write(
3845
time=time, cpu=int(cpu, 16),
39-
prev_tid=int(prev_tid, 16), prev_state='S', next_tid=int(next_tid, 16),
46+
prev_tid=prev_tid, prev_state='S', next_tid=next_tid,
4047
prev_prio=int(prev_prio, 16), next_prio=int(next_prio, 16),
4148
prev_name=prev_name.replace(' ', '_'), next_name=next_name.replace(' ', '_')
4249
)
50+
self.add_tid_name(prev_tid, prev_name)
51+
self.add_tid_name(next_tid, next_name)
4352
elif cmd.startswith('dtHook'):
4453
if not self.ignore_gpu:
4554
pid, tid = args[0:2]
@@ -52,8 +61,10 @@ def handle_record(self, time, cmd, args):
5261
else:
5362
print "unsupported cmd:", cmd, args
5463

55-
def handle_stack(self, time, pid, tid, stack):
64+
def handle_stack(self, kind, time, pid, tid, stack):
5665
pid = int(pid, 16)
66+
tid = int(tid, 16)
67+
self.tid_map[tid] = pid
5768
if not self.callbacks.check_time_in_limits(time) or not self.callbacks.check_pid_allowed(pid):
5869
return
5970
parsed = []
@@ -63,9 +74,10 @@ def handle_stack(self, time, pid, tid, stack):
6374
parsed.append({'ptr': hash(name), 'module': module, 'str': name})
6475
else:
6576
parsed.append({'ptr': int(frame, 16), 'module': '', 'str': ''})
66-
self.callbacks.handle_stack(pid, int(tid, 16), time, parsed)
77+
self.callbacks.handle_stack(pid, tid, time, parsed, kind)
6778

6879
def task(self, time, pid, tid, starts, domain, name, args):
80+
self.tid_map[tid] = pid
6981
if name in ['IGAccelGLContext::BlitFramebuffer', 'CGLFlushDrawable']:
7082
self.gpu_frame['catch'][0 if starts else 1] = time
7183
if name == 'CGLFlushDrawable':
@@ -184,8 +196,10 @@ def gpu_call(self, time, cmd, pid, tid, args):
184196
pass
185197
elif 'WriteStamp' == cmd:
186198
pass
199+
elif 'DidFlip' == cmd:
200+
pass
187201
else:
188-
print cmd
202+
print "Unhandled gpu_call:", cmd
189203

190204
def on_gpu_frame(self, time, pid, tid):
191205
self.callbacks.on_event("marker", {'pid': pid, 'tid': tid, 'domain': 'gits', 'time': time, 'str': "GPU Frame", 'type': 5, 'data': 'task'})
@@ -195,6 +209,9 @@ def finalize(self):
195209
for callback in self.callbacks.callbacks:
196210
thread_name = name.replace('\\"', '').replace('"', '')
197211
callback("metadata_add", {'domain': 'IntelSEAPI', 'str': '__thread__', 'pid': pid, 'tid': tid, 'data': '%s (%d)' % (thread_name, tid)})
212+
for pid, name in self.pid_names.iteritems():
213+
self.callbacks.set_process_name(pid, name)
214+
self.callbacks.set_process_name(-pid, 'Sampling: ' + name)
198215

199216

200217
def transform_dtrace(args):
@@ -212,6 +229,8 @@ def transform_dtrace(args):
212229
reading_stack = None
213230
stack = []
214231
for line in file:
232+
count += 1
233+
ends_with_vt = (11 == ord(line[-1])) if len(line) else False
215234
line = line.strip()
216235
if not line:
217236
if reading_stack:
@@ -220,16 +239,19 @@ def transform_dtrace(args):
220239
stack = []
221240
continue
222241
if reading_stack:
223-
stack.append(line)
242+
if ends_with_vt: # Vertical Tab signifies too long stack frame description
243+
line += '...'
244+
end_of_line = file.readline() # it is also treated as line end by codecs.open
245+
line += end_of_line.strip()
246+
stack.append(line.replace('\t', ' '))
224247
continue
225248
parts = line.split('\t')
226-
if parts[1] == 'stack':
227-
reading_stack = [int(parts[0], 16), parts[2], parts[3].rstrip(':')]
249+
if parts[1] in ['ustack', 'kstack', 'jstack']:
250+
reading_stack = [parts[1], int(parts[0], 16), parts[2], parts[3].rstrip(':')]
228251
continue
229252
dtrace.handle_record(int(parts[0], 16), parts[1], parts[2:])
230253
if not count % 1000:
231254
progress.tick(file.tell())
232-
count += 1
233255
dtrace.finalize()
234256
return callbacks.get_result()
235257

0 commit comments

Comments
 (0)