Skip to content

Commit 1d691f2

Browse files
authored
Merge 79e1c45 into 72525ef
2 parents 72525ef + 79e1c45 commit 1d691f2

File tree

11 files changed

+509
-0
lines changed

11 files changed

+509
-0
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Scripts used for comparing dqrun results
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
#!/usr/bin/env python3
2+
import sys
3+
import signal
4+
import traceback
5+
import html
6+
from pathlib import Path
7+
8+
import cyson as yson
9+
10+
11+
def main():
12+
13+
if len(sys.argv) < 2:
14+
print('Usage: {} resultdir... >report.htm'.format(sys.argv[0]), file=sys.stderr)
15+
sys.exit(1)
16+
17+
rdirs = sys.argv[1:]
18+
data = []
19+
20+
print('''
21+
<html><head><style>.signal { color: blue; } .errcode { color: red; } .ok { color: green; } .mismatch { color: yellow; } .tabnum { text-align: right; } </style></head>
22+
''')
23+
print('<table border="1">')
24+
print('<tr><th>' + ''.join(map(lambda x: '<th colspan="5">' + html.escape(rdirs[x]), range(len(rdirs)))))
25+
print('<tr><th>Testcase' + '<th>Status<th>Real time, s<th>User time, s<th>RSS, MB<th>'*len(rdirs) + '</tr>')
26+
print('<tr><th>')
27+
28+
for dirname in rdirs:
29+
for name in sorted(map(str, Path(dirname).glob('**/summary.tsv'))):
30+
with open(name) as f:
31+
coldata = []
32+
cmdline = f.readline()
33+
print('<th colspan="4"><span title="{}">{}</span><th>'.format(html.escape(cmdline, quote=True), html.escape(name)))
34+
for line in f:
35+
line = line.split('\t')
36+
(q, utime, stime, maxrss, exitcode, elapsed) = line[:6]
37+
utime = float(utime)
38+
stime = float(stime)
39+
maxrss = int(maxrss)
40+
exitcode = int(exitcode)
41+
elapsed = int(elapsed)*1e-9
42+
if len(data):
43+
# assert data[0][len(coldata)][0] == q
44+
if data[0][len(coldata)][0] != q:
45+
pass
46+
coldata += [[dirname, q, elapsed, utime, stime, maxrss, exitcode]]
47+
data += [coldata]
48+
for i in range(len(data[0])):
49+
q = data[0][i][1]
50+
print('<tr><td>{}'.format(html.escape(q)), end='')
51+
for c in range(len(data)):
52+
(dirname, q, elapsed, utime, stime, maxrss, exitcode) = data[c][i]
53+
outname = dirname + '/' + q + '-result.yson'
54+
if exitcode < 0:
55+
print('<td><span class="signal" title="{}">SIG</span>'.format(html.escape(signal.strsignal(-exitcode), quote=True)))
56+
elif exitcode > 0:
57+
print('<td><span class="errcode" title="{}">ERR</span>'.format(exitcode))
58+
else:
59+
print('<td><span class="ok">OK</span>')
60+
print('<td class="tabnum">{:.1f}<td class="tabnum">{:.1f}<td class="tabnum">{:.1f}'.format(elapsed, utime, maxrss/1024))
61+
if exitcode == 0:
62+
try:
63+
valType = None
64+
valData = None
65+
with open(outname, 'rb') as f:
66+
for result in yson.list_fragments(yson.InputStream.from_file(f)):
67+
valType = result[0][b'Write'][0][b'Type']
68+
valData = result[0][b'Write'][0][b'Data']
69+
pass
70+
if c == 0:
71+
data[c][i] += [valType, valData]
72+
print('<td>')
73+
else:
74+
assert valType[0] == b'ListType'
75+
assert valType[1][0] == b'StructType'
76+
stypes = valType[1][1]
77+
ncols = len(stypes)
78+
refType = data[0][i][-2]
79+
refData = data[0][i][-1]
80+
refstypes = refType[1][1]
81+
assert ncols == len(refType[1][1]), 'Column number mismatch {} != {}'.format(ncols, len(refstypes))
82+
nrows = len(valData)
83+
assert nrows == len(refData), 'Row number mismatch {} != {}'.format(nrows, len(refData))
84+
for col in range(ncols):
85+
stype = stypes[col][1]
86+
isOptional = False
87+
if stype[0] == b'OptionalType':
88+
stype = stype[1]
89+
isOptional = True
90+
assert stype[0] == b'DataType'
91+
isDouble = stype[1] == b'Double'
92+
for row in range(nrows):
93+
val = valData[row][col]
94+
ref = refData[row][col]
95+
if isOptional:
96+
if len(ref) == 0:
97+
assert len(val) == 0, 'NULL != NOT NULL at {}, {}'.format(row, col)
98+
continue
99+
assert len(val) == 1, 'NOT NULL != NULL at {}, {}'.format(row, col)
100+
ref = ref[0]
101+
val = val[0]
102+
if isDouble:
103+
val = float(val)
104+
ref = float(ref)
105+
assert abs(val - ref) <= 1e-5*max(abs(val), abs(ref), 1), 'abs({} - {}) >= eps at {}, {}'.format(val, ref, row, col)
106+
else:
107+
assert val == ref, '{} != {} type {} at {}, {}'.format(val, ref, stypes[col][1][1], row, col)
108+
print('<td class="ok">MATCH</td>')
109+
except Exception:
110+
print('<td class="errcode">Comparison failed: ', traceback.format_exc())
111+
else:
112+
print('<td class="errcode">N/A')
113+
114+
print('</tr>')
115+
116+
print('</table>')
117+
print('</html>')
118+
119+
120+
if __name__ == '__main__':
121+
main()
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
PY3_PROGRAM()
2+
3+
OWNER(
4+
yumkam7
5+
g:yql
6+
g:yql_ydb_core
7+
)
8+
9+
PY_SRCS(
10+
MAIN compare.py
11+
)
12+
13+
PEERDIR(
14+
library/python/cyson
15+
)
16+
17+
END()
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#!/usr/bin/env python3
2+
import sys
3+
import signal
4+
import html
5+
from pathlib import Path
6+
7+
8+
def main():
9+
if len(sys.argv) < 2:
10+
print('Usage: {} resultdir... >report.htm'.format(sys.argv[0]), file=sys.stderr)
11+
sys.exit(1)
12+
args = sys.argv[1:]
13+
print('''
14+
<html><head><style>.signal { color: blue; } .errcode { color: red; } .ok { color: green; } .mismatch { color: yellow; } .tabnum { text-align: right; } </style></head>
15+
''')
16+
print('<table border="1">')
17+
print('<tr><th>Testcase' + '<th>Status<th>Real time, s<th>User time, s<th>RSS, MB'*len(args) + '</tr>')
18+
data = []
19+
print('<tr><th>')
20+
for dirname in args:
21+
for name in map(str, Path(dirname).glob('**/summary.tsv')):
22+
name = str(name)
23+
with open(name) as f:
24+
cmdline = f.readline()
25+
print('<th colspan="4"><span title="{}">{}</span>'.format(html.escape(cmdline, quote=True), html.escape(name)))
26+
coldata = []
27+
for line in f:
28+
line = line.strip().split('\t')
29+
if len(line) < 6:
30+
(q, utime, stime, maxrss, exitcode) = line
31+
elapsed = -1
32+
else:
33+
(q, utime, stime, maxrss, exitcode, elapsed) = line[:6]
34+
utime = float(utime)
35+
stime = float(stime)
36+
maxrss = int(maxrss)
37+
elapsed = int(elapsed)*1e-9
38+
exitcode = int(exitcode)
39+
if len(data):
40+
# assert data[0][len(coldata)][0] == q
41+
if data[0][len(coldata)][0] != q:
42+
pass
43+
coldata += [(q, elapsed, utime, stime, maxrss, exitcode)]
44+
data += [coldata]
45+
46+
for i in range(len(data[0])):
47+
q = data[0][i][0]
48+
print('<tr><td>', q, end='')
49+
for c in range(len(data)):
50+
(q, elapsed, utime, stime, maxrss, exitcode) = data[c][i]
51+
if exitcode < 0:
52+
print('<td><span class="signal" title="{}">SIG</span>'.format(html.escape(signal.strsignal(-exitcode), quote=True)))
53+
elif exitcode > 0:
54+
print('<td><span class="errcode" title="{}">ERR</span>'.format(exitcode))
55+
else:
56+
print('<td><span class="ok">OK</span>')
57+
print('<td class="tabnum">{:.1f}<td class="tabnum">{:.1f}<td class="tabnum">{:.1f}'.format(elapsed, utime, maxrss/1024))
58+
print('</tr>')
59+
60+
print("</table>")
61+
print("</html>")
62+
63+
64+
if __name__ == "__main__":
65+
main()
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
PY3_PROGRAM()
2+
3+
OWNER(
4+
yumkam7
5+
g:yql
6+
g:yql_ydb_core
7+
)
8+
9+
PY_SRCS(
10+
MAIN gen-report.py
11+
)
12+
13+
PEERDIR(
14+
)
15+
16+
END()
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#!/bin/sh -ex
2+
: ${datasize=${1-1}}
3+
: ${script_path=${0%/*}}
4+
: ${ydb_path=$script_path/../../../..}
5+
(cd ${script_path} && $ydb_path/ya make --build relwithdebinfo)
6+
(cd ${ydb_path}/ydb/library/yql/tools/dqrun && $ydb_path/ya make --build relwithdebinfo)
7+
(cd ${ydb_path}/ydb/library/yql/udfs/common && $ydb_path/ya make --build relwithdebinfo datetime datetime2 string re2 set)
8+
(cd ${ydb_path}/ydb/library/benchmarks/gen_queries && $ydb_path/ya make --build relwithdebinfo)
9+
[ -d tpc/h/$datasize ] ||
10+
${ydb_path}/ydb/library/benchmarks/runner/download_files_h_${datasize}.sh
11+
[ -f ql-$datasize/h/bindings.json ] ||
12+
${ydb_path}/ydb/library/benchmarks/gen_queries/gen_queries \
13+
--output ql-$datasize --variant h --syntax yql --dataset-size $datasize
14+
[ -f q-$datasize/h/bindings.json ] ||
15+
${ydb_path}/ydb/library/benchmarks/gen_queries/gen_queries \
16+
--output q-$datasize --variant h --syntax yql --dataset-size $datasize \
17+
--pragma dq.MaxTasksPerStage=1 \
18+
--pragma dq.ComputeActorType="async" \
19+
--pragma config.flags=LLVM_OFF \
20+
# --pragma dq.UseFinalizeByKey=true \
21+
# --pragma dq.UseOOBTransport=true
22+
[ -f qs-$datasize/h/bindings.json ] ||
23+
${ydb_path}/ydb/library/benchmarks/gen_queries/gen_queries \
24+
--output qs-$datasize --variant h --syntax yql --dataset-size $datasize \
25+
--pragma dq.MaxTasksPerStage=1 \
26+
--pragma config.flags=LLVM_OFF \
27+
--pragma dq.ComputeActorType="async" \
28+
--pragma dq.UseFinalizeByKey=true \
29+
# --pragma dq.UseOOBTransport=true
30+
outdir=results-`date -u +%Y%m%dT%H%M%S`-$datasize
31+
if false; then
32+
echo LLVM && \
33+
command time ${script_path}/runner/runner ql-$datasize/h ql-$datasize/bindings $outdir ${ydb_path}/ydb/library/yql/tools/dqrun/dqrun -s --fs-cfg ${ydb_path}/ydb/library/yql/tools/dqrun/examples/fs.conf --gateways-cfg $script_path/test-gateways.conf --udfs-dir ${ydb_path}/ydb/library/yql/udfs/common/
34+
fi
35+
echo NO LLVM && \
36+
command time ${script_path}/runner/runner q-$datasize/h q-$datasize/h/bindings.json $outdir ${ydb_path}/ydb/library/yql/tools/dqrun/dqrun -s --fs-cfg ${ydb_path}/ydb/library/yql/tools/dqrun/examples/fs.conf --gateways-cfg $script_path/test-gateways.conf --udfs-dir ${ydb_path}/ydb/library/yql/udfs/common/
37+
echo Spilling && \
38+
command time ${script_path}/runner/runner qs-$datasize/h qs-$datasize/h/bindings.json $outdir ${ydb_path}/ydb/library/yql/tools/dqrun/dqrun -s --enable-spilling --fs-cfg ${ydb_path}/ydb/library/yql/tools/dqrun/examples/fs.conf --gateways-cfg $script_path/test-gateways.conf --udfs-dir ${ydb_path}/ydb/library/yql/udfs/common/
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#!/usr/bin/env python3
2+
import os
3+
import sys
4+
from pathlib import Path
5+
import re
6+
import datetime
7+
8+
try:
9+
from time import clock_gettime_ns, CLOCK_MONOTONIC
10+
11+
def time_ns():
12+
13+
return clock_gettime_ns(CLOCK_MONOTONIC)
14+
15+
except Exception:
16+
from time import time_ns
17+
18+
19+
re_digits = re.compile(r'([0-9]+)')
20+
21+
22+
def run(argv, err, out):
23+
24+
start_time = time_ns()
25+
pid = os.posix_spawn(argv[0], argv, {}, file_actions=(
26+
(os.POSIX_SPAWN_OPEN, 1, out, os.O_WRONLY | os.O_CREAT, 0o666),
27+
(os.POSIX_SPAWN_OPEN, 2, err, os.O_WRONLY | os.O_CREAT, 0o666),
28+
))
29+
(pid, status, rusage) = os.wait4(pid, 0)
30+
elapsed = time_ns()
31+
elapsed -= start_time
32+
exitcode = os.waitstatus_to_exitcode(status)
33+
return exitcode, rusage, elapsed
34+
35+
36+
def main():
37+
38+
qdir = sys.argv[1] or 'q/scalar'
39+
bindings = sys.argv[2] or 'bindings.json'
40+
outdir = sys.argv[3] or "result-{:%Y%m%dT%H%M%S}".format(datetime.datetime.now())
41+
argv = sys.argv[4:]
42+
assert len(argv)
43+
querydir = Path(qdir)
44+
os.makedirs(outdir + '/' + qdir, exist_ok=True)
45+
with open(outdir + '/' + qdir + "/summary.tsv", "w") as outf:
46+
print(' '.join(argv + ['-p', qdir, '--bindings-file', bindings]), file=outf)
47+
for query in sorted(querydir.glob('**/*.sql'), key=lambda x: tuple(map(lambda y: int(y) if re.match(re_digits, y) else y, re.split(re_digits, str(x))))):
48+
q = str(query)
49+
print(q, end='\t', file=outf)
50+
name = outdir + '/' + q
51+
outname = name + '-result.yson'
52+
exitcode, rusage, elapsed = run(
53+
argv + [
54+
'--result-file', outname,
55+
'--bindings-file', bindings,
56+
'--plan-file', name + '-plan.yson',
57+
'--err-file', name + '-err.txt',
58+
'--expr-file', name + '-expr.txt',
59+
'-p', q
60+
],
61+
name + '-stdout.txt',
62+
name + '-stderr.txt')
63+
print(rusage.ru_utime, end='\t', file=outf)
64+
print(rusage.ru_stime, end='\t', file=outf)
65+
print(rusage.ru_maxrss, end='\t', file=outf)
66+
print(exitcode, end='\t', file=outf)
67+
print(elapsed, end='\t', file=outf)
68+
print(rusage.ru_minflt, end='\t', file=outf)
69+
print(rusage.ru_majflt, end='\t', file=outf)
70+
print(rusage.ru_inblock, end='\t', file=outf)
71+
print(rusage.ru_oublock, end='\t', file=outf)
72+
print(rusage.ru_nvcsw, end='\t', file=outf)
73+
print(rusage.ru_nivcsw, end='\t', file=outf)
74+
# resource.struct_rusage(ru_utime=7.919329, ru_stime=5.22704,
75+
# ru_maxrss=639600, ru_ixrss=0, ru_idrss=0, ru_isrss=0,
76+
# ru_minflt=135127, ru_majflt=0, ru_nswap=0, ru_inblock=0,
77+
# ru_oublock=48, ru_msgsnd=0, ru_msgrcv=0, ru_nsignals=0,
78+
# ru_nvcsw=57452, ru_nivcsw=273
79+
# )
80+
print(file=outf)
81+
outf.flush()
82+
83+
84+
if __name__ == "__main__":
85+
main()
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
PY3_PROGRAM()
2+
3+
OWNER(
4+
yumkam7
5+
g:yql
6+
g:yql_ydb_core
7+
)
8+
9+
PY_SRCS(
10+
MAIN runner.py
11+
)
12+
13+
PEERDIR(
14+
)
15+
16+
END()

0 commit comments

Comments
 (0)