Skip to content

Commit da209ec

Browse files
authored
dqrun comparator: initial version (#4545)
1 parent 55a96e6 commit da209ec

File tree

10 files changed

+517
-0
lines changed

10 files changed

+517
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
#!/usr/bin/env python3
2+
import sys
3+
import signal
4+
import traceback
5+
import html
6+
import math
7+
from pathlib import Path
8+
9+
import cyson as yson
10+
11+
12+
def main():
13+
14+
if len(sys.argv) < 2:
15+
print('Usage: {} resultdir... >report.htm'.format(sys.argv[0]), file=sys.stderr)
16+
sys.exit(1)
17+
18+
rdirs = sys.argv[1:]
19+
data = []
20+
21+
print('''
22+
<html><head><style>.signal { color: blue; } .errcode { color: red; } .ok { color: green; } .mismatch { color: yellow; } .tabnum { text-align: right; } </style></head>
23+
''')
24+
print('<table border="1">')
25+
print('<tr><th>' + ''.join(map(lambda x: '<th colspan="5">' + html.escape(rdirs[x]), range(len(rdirs)))))
26+
print('<tr><th>Testcase' + '<th>Status<th>Real time, s<th>User time, s<th>RSS, MB<th>'*len(rdirs) + '</tr>')
27+
print('<tr><th>')
28+
29+
for dirname in rdirs:
30+
for name in sorted(map(str, Path(dirname).glob('**/summary.tsv'))):
31+
with open(name) as f:
32+
coldata = []
33+
cmdline = f.readline()
34+
print('<th colspan="4"><span title="{}">{}</span><th>'.format(html.escape(cmdline, quote=True), html.escape(name)))
35+
for line in f:
36+
line = line.split('\t')
37+
(q, utime, stime, maxrss, exitcode, elapsed) = line[:6]
38+
utime = float(utime)
39+
stime = float(stime)
40+
maxrss = int(maxrss)
41+
exitcode = int(exitcode)
42+
elapsed = int(elapsed)*1e-9
43+
if len(data):
44+
# assert data[0][len(coldata)][0] == q
45+
if data[0][len(coldata)][0] != q:
46+
pass
47+
coldata += [[dirname, q, elapsed, utime, stime, maxrss, exitcode]]
48+
data += [coldata]
49+
for i in range(len(data[0])):
50+
q = data[0][i][1]
51+
print('<tr><td>{}'.format(html.escape(q)), end='')
52+
for c in range(len(data)):
53+
(dirname, q, elapsed, utime, stime, maxrss, exitcode) = data[c][i]
54+
outname = dirname + '/' + q + '-result.yson'
55+
if exitcode < 0:
56+
print('<td><span class="signal" title="{}">SIG</span>'.format(html.escape(signal.strsignal(-exitcode), quote=True)))
57+
elif exitcode > 0:
58+
print('<td><span class="errcode" title="{}">ERR</span>'.format(exitcode))
59+
else:
60+
print('<td><span class="ok">OK</span>')
61+
print('<td class="tabnum">{:.1f}<td class="tabnum">{:.1f}<td class="tabnum">{:.1f}'.format(elapsed, utime, maxrss/1024))
62+
if exitcode == 0:
63+
try:
64+
valType = None
65+
valData = None
66+
with open(outname, 'rb') as f:
67+
for result in yson.list_fragments(yson.InputStream.from_file(f)):
68+
valType = result[0][b'Write'][0][b'Type']
69+
valData = result[0][b'Write'][0][b'Data']
70+
pass
71+
if c == 0:
72+
data[c][i] += [valType, valData]
73+
print('<td>')
74+
else:
75+
assert valType[0] == b'ListType'
76+
assert valType[1][0] == b'StructType'
77+
stypes = valType[1][1]
78+
ncols = len(stypes)
79+
refType = data[0][i][-2]
80+
refData = data[0][i][-1]
81+
refstypes = refType[1][1]
82+
assert ncols == len(refType[1][1]), 'Column number mismatch {} != {}'.format(ncols, len(refstypes))
83+
nrows = len(valData)
84+
assert nrows == len(refData), 'Row number mismatch {} != {}'.format(nrows, len(refData))
85+
for col in range(ncols):
86+
stype = stypes[col][1]
87+
isOptional = False
88+
if stype[0] == b'OptionalType':
89+
stype = stype[1]
90+
isOptional = True
91+
assert stype[0] == b'DataType'
92+
isDouble = stype[1] == b'Double'
93+
for row in range(nrows):
94+
val = valData[row][col]
95+
ref = refData[row][col]
96+
if isOptional:
97+
if ref is None:
98+
assert val is None, 'NULL != NOT NULL at {}, {}'.format(row, col)
99+
continue
100+
assert val is not None, 'NOT NULL != NULL at {}, {}'.format(row, col)
101+
ref = ref[0]
102+
val = val[0]
103+
if isDouble:
104+
val = float(val)
105+
ref = float(ref)
106+
if math.isnan(val):
107+
assert math.isnan(ref), '{} != {} at {}, {}'.format(val, ref, row, col)
108+
continue
109+
assert not math.isnan(ref), '{} != {} at {}, {}'.format(val, ref, row, col)
110+
assert abs(val - ref) <= 1e-5*max(abs(val), abs(ref), 1), 'abs({} - {}) >= eps at {}, {}'.format(val, ref, row, col)
111+
else:
112+
assert val == ref, '{} != {} type {} at {}, {}'.format(val, ref, stypes[col][1][1], row, col)
113+
print('<td class="ok">MATCH</td>')
114+
except Exception:
115+
print('<td class="errcode">Comparison failed: ', traceback.format_exc())
116+
else:
117+
print('<td class="errcode">N/A')
118+
119+
print('</tr>')
120+
121+
print('</table>')
122+
print('</html>')
123+
124+
125+
if __name__ == '__main__':
126+
main()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
PY3_PROGRAM()
2+
3+
OWNER(
4+
yumkam7
5+
g:yql
6+
g:yql_ydb_core
7+
)
8+
9+
PY_SRCS(
10+
MAIN compare.py
11+
)
12+
13+
PEERDIR(
14+
library/python/cyson
15+
)
16+
17+
END()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#!/usr/bin/env python3
2+
import sys
3+
import signal
4+
import html
5+
from pathlib import Path
6+
7+
8+
def main():
9+
if len(sys.argv) < 2:
10+
print('Usage: {} resultdir... >report.htm'.format(sys.argv[0]), file=sys.stderr)
11+
sys.exit(1)
12+
args = sys.argv[1:]
13+
print('''
14+
<html><head><style>.signal { color: blue; } .errcode { color: red; } .ok { color: green; } .mismatch { color: yellow; } .tabnum { text-align: right; } </style></head>
15+
''')
16+
print('<table border="1">')
17+
print('<tr><th>Testcase' + '<th>Status<th>Real time, s<th>User time, s<th>RSS, MB'*len(args) + '</tr>')
18+
data = []
19+
print('<tr><th>')
20+
for dirname in args:
21+
for name in sorted(map(str, Path(dirname).glob('**/summary.tsv'))):
22+
name = str(name)
23+
with open(name) as f:
24+
cmdline = f.readline()
25+
print('<th colspan="4"><span title="{}">{}</span>'.format(html.escape(cmdline, quote=True), html.escape(name)))
26+
coldata = []
27+
for line in f:
28+
line = line.strip().split('\t')
29+
(q, utime, stime, maxrss, exitcode, elapsed) = line[:6]
30+
utime = float(utime)
31+
stime = float(stime)
32+
maxrss = int(maxrss)
33+
elapsed = int(elapsed)*1e-9
34+
exitcode = int(exitcode)
35+
coldata += [(q, elapsed, utime, stime, maxrss, exitcode)]
36+
data += [coldata]
37+
38+
for i in range(len(data[0])):
39+
q = data[0][i][0]
40+
print('<tr><td>', q, end='')
41+
for c in range(len(data)):
42+
(q, elapsed, utime, stime, maxrss, exitcode) = data[c][i]
43+
if exitcode < 0:
44+
print('<td><span class="signal" title="{}">SIG</span>'.format(html.escape(signal.strsignal(-exitcode), quote=True)))
45+
elif exitcode > 0:
46+
print('<td><span class="errcode" title="{}">ERR</span>'.format(exitcode))
47+
else:
48+
print('<td><span class="ok">OK</span>')
49+
print('<td class="tabnum">{:.1f}<td class="tabnum">{:.1f}<td class="tabnum">{:.1f}'.format(elapsed, utime, maxrss/1024))
50+
print('</tr>')
51+
52+
print("</table>")
53+
print("</html>")
54+
55+
56+
if __name__ == "__main__":
57+
main()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
PY3_PROGRAM()
2+
3+
OWNER(
4+
yumkam7
5+
g:yql
6+
g:yql_ydb_core
7+
)
8+
9+
PY_SRCS(
10+
MAIN gen-report.py
11+
)
12+
13+
PEERDIR(
14+
)
15+
16+
END()
+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#!/bin/sh -ex
2+
: ${datasize=${1:-1}}
3+
: ${variant=${2:-h}}
4+
: ${script_path=${0%/*}}
5+
: ${ydb_path=$script_path/../../../..}
6+
(cd ${script_path} && $ydb_path/ya make --build relwithdebinfo)
7+
(cd ${ydb_path}/ydb/library/yql/tools/dqrun && $ydb_path/ya make --build relwithdebinfo)
8+
(cd ${ydb_path}/ydb/library/yql/udfs/common && $ydb_path/ya make --build relwithdebinfo datetime datetime2 string re2 set math unicode_base)
9+
(cd ${ydb_path}/ydb/library/benchmarks/gen_queries && $ydb_path/ya make --build relwithdebinfo)
10+
[ -d tpc/$variant/$datasize ] ||
11+
${ydb_path}/ydb/library/benchmarks/runner/download_files_${variant}_${datasize}.sh
12+
if [ x$variant = xds ]; then
13+
xpragma="--pragma AnsiOptionalAs"
14+
else
15+
xpragma=""
16+
fi
17+
[ -f ql-$datasize/$variant/bindings.json ] ||
18+
${ydb_path}/ydb/library/benchmarks/gen_queries/gen_queries \
19+
--output ql-$datasize --variant ${variant} --syntax yql --dataset-size $datasize \
20+
$xpragma \
21+
#
22+
[ -f q-$datasize/$variant/bindings.json ] ||
23+
${ydb_path}/ydb/library/benchmarks/gen_queries/gen_queries \
24+
--output q-$datasize --variant ${variant} --syntax yql --dataset-size $datasize \
25+
--pragma dq.MaxTasksPerStage=1 \
26+
--pragma dq.ComputeActorType="async" \
27+
--pragma config.flags=LLVM_OFF \
28+
$xpragma \
29+
# --pragma dq.UseFinalizeByKey=true \
30+
# --pragma dq.UseOOBTransport=true \
31+
#
32+
[ -f qs-$datasize/$variant/bindings.json ] ||
33+
${ydb_path}/ydb/library/benchmarks/gen_queries/gen_queries \
34+
--output qs-$datasize --variant ${variant} --syntax yql --dataset-size $datasize \
35+
--pragma dq.MaxTasksPerStage=1 \
36+
--pragma config.flags=LLVM_OFF \
37+
--pragma dq.ComputeActorType="async" \
38+
--pragma dq.UseFinalizeByKey=true \
39+
$xpragma \
40+
# --pragma dq.UseOOBTransport=true \
41+
#
42+
outdir=results-`date -u +%Y%m%dT%H%M%S`-$datasize
43+
if false; then
44+
echo LLVM && \
45+
command time ${script_path}/runner/runner ql-$datasize/${variant} ql-$datasize/bindings.json $outdir ${ydb_path}/ydb/library/yql/tools/dqrun/dqrun -s --fs-cfg ${ydb_path}/ydb/library/yql/tools/dqrun/examples/fs.conf --gateways-cfg $script_path/runner/test-gateways.conf --udfs-dir ${ydb_path}/ydb/library/yql/udfs/common/
46+
fi
47+
echo NO LLVM && \
48+
command time ${script_path}/runner/runner q-$datasize/${variant} q-$datasize/${variant}/bindings.json $outdir ${ydb_path}/ydb/library/yql/tools/dqrun/dqrun -s --fs-cfg ${ydb_path}/ydb/library/yql/tools/dqrun/examples/fs.conf --gateways-cfg $script_path/runner/test-gateways.conf --udfs-dir ${ydb_path}/ydb/library/yql/udfs/common/
49+
echo Spilling && \
50+
command time ${script_path}/runner/runner qs-$datasize/${variant} qs-$datasize/${variant}/bindings.json $outdir ${ydb_path}/ydb/library/yql/tools/dqrun/dqrun -s --enable-spilling --fs-cfg ${ydb_path}/ydb/library/yql/tools/dqrun/examples/fs.conf --gateways-cfg $script_path/runner/test-gateways.conf --udfs-dir ${ydb_path}/ydb/library/yql/udfs/common/
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#!/usr/bin/env python3
2+
import os
3+
import sys
4+
from pathlib import Path
5+
import re
6+
import datetime
7+
8+
try:
9+
from time import clock_gettime_ns, CLOCK_MONOTONIC
10+
11+
def time_ns():
12+
13+
return clock_gettime_ns(CLOCK_MONOTONIC)
14+
15+
except Exception:
16+
from time import time_ns
17+
18+
19+
RE_DIGITS = re.compile(r'([0-9]+)')
20+
21+
22+
def run(argv, out, err):
23+
24+
start_time = time_ns()
25+
pid = os.posix_spawn(argv[0], argv, {}, file_actions=(
26+
(os.POSIX_SPAWN_OPEN, 1, out, os.O_WRONLY | os.O_CREAT, 0o666),
27+
(os.POSIX_SPAWN_OPEN, 2, err, os.O_WRONLY | os.O_CREAT, 0o666),
28+
))
29+
(pid, status, rusage) = os.wait4(pid, 0)
30+
elapsed = time_ns()
31+
elapsed -= start_time
32+
exitcode = os.waitstatus_to_exitcode(status)
33+
return exitcode, rusage, elapsed
34+
35+
36+
def main():
37+
38+
qdir = sys.argv[1] or 'q/scalar'
39+
bindings = sys.argv[2] or 'bindings.json'
40+
outdir = sys.argv[3] or "result-{:%Y%m%dT%H%M%S}".format(datetime.datetime.now())
41+
argv = sys.argv[4:]
42+
assert len(argv)
43+
querydir = Path(qdir)
44+
os.makedirs(outdir + '/' + qdir, exist_ok=True)
45+
with open(outdir + '/' + qdir + "/summary.tsv", "w") as outf:
46+
print(' '.join(argv + ['-p', qdir, '--bindings-file', bindings]), file=outf)
47+
for query in sorted(querydir.glob('**/*.sql'), key=lambda x: tuple(map(lambda y: int(y) if re.match(RE_DIGITS, y) else y, re.split(RE_DIGITS, str(x))))):
48+
q = str(query)
49+
print(q, end='\t', file=outf)
50+
name = outdir + '/' + q
51+
outname = name + '-result.yson'
52+
exitcode, rusage, elapsed = run(
53+
argv + [
54+
'--result-file', outname,
55+
'--bindings-file', bindings,
56+
'--plan-file', name + '-plan.yson',
57+
'--err-file', name + '-err.txt',
58+
'--expr-file', name + '-expr.txt',
59+
'-p', q
60+
],
61+
name + '-stdout.txt',
62+
name + '-stderr.txt')
63+
print(rusage.ru_utime, end='\t', file=outf)
64+
print(rusage.ru_stime, end='\t', file=outf)
65+
print(rusage.ru_maxrss, end='\t', file=outf)
66+
print(exitcode, end='\t', file=outf)
67+
print(elapsed, end='\t', file=outf)
68+
print(rusage.ru_minflt, end='\t', file=outf)
69+
print(rusage.ru_majflt, end='\t', file=outf)
70+
print(rusage.ru_inblock, end='\t', file=outf)
71+
print(rusage.ru_oublock, end='\t', file=outf)
72+
print(rusage.ru_nvcsw, end='\t', file=outf)
73+
print(rusage.ru_nivcsw, end='\t', file=outf)
74+
# resource.struct_rusage(ru_utime=7.919329, ru_stime=5.22704,
75+
# ru_maxrss=639600, ru_ixrss=0, ru_idrss=0, ru_isrss=0,
76+
# ru_minflt=135127, ru_majflt=0, ru_nswap=0, ru_inblock=0,
77+
# ru_oublock=48, ru_msgsnd=0, ru_msgrcv=0, ru_nsignals=0,
78+
# ru_nvcsw=57452, ru_nivcsw=273
79+
# )
80+
print(file=outf)
81+
outf.flush()
82+
83+
84+
if __name__ == "__main__":
85+
main()

0 commit comments

Comments
 (0)