Skip to content

Commit 2f00d9f

Browse files
authored
Merge c0db3dd into 4b2a95c
2 parents 4b2a95c + c0db3dd commit 2f00d9f

File tree

6 files changed

+255
-20
lines changed

6 files changed

+255
-20
lines changed
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
import argparse
2+
import subprocess
3+
import pathlib
4+
import os
5+
from sys import stderr
6+
7+
8+
def variant(string):
9+
if string not in ["h", "ds"]:
10+
raise ValueError("variant must be h or ds")
11+
return string
12+
13+
def paths(string):
14+
return list(map(pathlib.Path, string.split(";")))
15+
16+
def parse_args(passed=None):
17+
18+
parser = argparse.ArgumentParser()
19+
20+
parser.add_argument('--datasize', type=int, default=1)
21+
parser.add_argument('--variant', type=variant, default='h')
22+
parser.add_argument('--tasks', type=int, default=1)
23+
24+
parser.add_argument('--dqrun', type=pathlib.Path)
25+
parser.add_argument('--gen-queries', type=pathlib.Path)
26+
parser.add_argument('--downloaders-dir', type=pathlib.Path)
27+
parser.add_argument('--udfs-dir', type=paths)
28+
parser.add_argument('--fs-cfg', type=pathlib.Path)
29+
parser.add_argument('--flame-graph', type=pathlib.Path)
30+
parser.add_argument('--result-compare', type=pathlib.Path)
31+
parser.add_argument('--gateways-cfg', type=pathlib.Path)
32+
parser.add_argument('--runner-path', type=pathlib.Path)
33+
34+
parser.add_argument('-o', '--output', default="./results")
35+
parser.add_argument('--clean-old', action="store_true", default=False)
36+
parser.add_argument('--query-filter', action="append", default=[])
37+
38+
return parser.parse_args(passed)
39+
40+
class Runner:
41+
def prepare_queries_dir(self, custom_pragmas):
42+
print("Preparing queries...", file=stderr)
43+
self.queries_dir.mkdir(parents=True, exist_ok=True)
44+
print("queries dir: ", self.queries_dir.resolve(), file=stderr)
45+
cmd = [str(self.args.gen_queries)]
46+
cmd += ["--output", f"{self.queries_dir}"]
47+
cmd += ["--variant", f"{self.args.variant}"]
48+
cmd += ["--syntax", "yql"]
49+
cmd += ["--dataset-size", f"{self.args.datasize}"]
50+
for it in custom_pragmas:
51+
cmd += ["--pragma", it]
52+
print(cmd, file=stderr)
53+
subprocess.run(cmd)
54+
55+
def prepare_tpc_dir(self):
56+
print("Preparing tpc...", file=stderr)
57+
cmd = [f"{self.args.downloaders_dir}/download_files_{self.args.variant}_{self.args.datasize}.sh"]
58+
print(cmd, file=stderr)
59+
subprocess.run(cmd)
60+
61+
def __init__(self, args, enable_spilling):
62+
self.args = args
63+
self.enable_spilling = enable_spilling
64+
65+
self.queries_dir = pathlib.Path(f"queries{"+" if self.enable_spilling else "-"}spilling-{args.datasize}-{args.tasks}").resolve()
66+
if self.args.clean_old or not self.queries_dir.exists():
67+
self.prepare_queries_dir([
68+
f"dq.MaxTasksPerStage={self.args.tasks}",
69+
"dq.OptLLVM=ON"
70+
] + [
71+
"dq.UseFinalizeByKey=true",
72+
"dq.EnableSpillingNodes=All",
73+
] if self.enable_spilling else [])
74+
75+
self.tpc_dir = pathlib.Path(f"{self.args.downloaders_dir}/tpc/{self.args.variant}/{self.args.datasize}").resolve()
76+
if self.args.clean_old or not self.tpc_dir.exists():
77+
self.prepare_tpc_dir()
78+
if not pathlib.Path("./tpc").exists():
79+
os.symlink(f"{self.args.downloaders_dir}/tpc", f"{pathlib.Path("./tpc")}", target_is_directory=True)
80+
81+
self.result_dir = pathlib.Path(f"{self.args.output}/{"with" if self.enable_spilling else "no"}-spilling/{args.variant}-{args.datasize}-{args.tasks}").resolve()
82+
self.result_dir.mkdir(parents=True, exist_ok=True)
83+
84+
def run(self):
85+
cmd = ["/usr/bin/time", f"{str(self.args.runner_path)}"]
86+
cmd += ["--perf"]
87+
for it in self.args.query_filter:
88+
cmd += ["--include-q", it]
89+
cmd += ["--query-dir", f"{str(self.queries_dir)}/{self.args.variant}"]
90+
cmd += ["--bindings", f"{str(self.queries_dir)}/{self.args.variant}/bindings.json"]
91+
cmd += ["--result-dir", str(self.result_dir)]
92+
cmd += ["--flame-graph", str(self.flame_graph)]
93+
cmd += [f"{self.args.dqrun}", "-s"]
94+
cmd += ["--enable-spilling"] if self.enable_spilling else []
95+
cmd += ["--udfs-dir", ";".join(map(str, self.args.udfs_dir))]
96+
cmd += ["--fs-cfg", f"{str(self.args.fs_cfg)}"]
97+
cmd += ["--gateways-cfg", f"{str(self.args.gateways_cfg)}"]
98+
subprocess.run(cmd)
99+
100+
return self.result_dir
101+
102+
def result_compare(args, to_compare):
103+
cmd = [f"{args.result_compare}"]
104+
cmd += ["-v"]
105+
cmd += to_compare
106+
print(cmd, file=stderr)
107+
with open(f"{args.output}/result-{args.variant}-{args.datasize}-{args.tasks}.htm", "w") as result_table:
108+
subprocess.run(cmd, stdout=result_table)
109+
110+
def run(passed=None):
111+
args = parse_args(passed)
112+
113+
print(args.query_filter)
114+
115+
results = []
116+
print("With spilling...", file=stderr)
117+
results.append(Runner(args, True).run())
118+
print("No spilling...", file=stderr)
119+
results.append(Runner(args, False).run())
120+
121+
print(results, file=stderr)
122+
123+
result_compare(args, results)
124+
125+
def main():
126+
run()
127+
128+
if __name__ == "__main__":
129+
main()
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
PY3_PROGRAM()
2+
3+
PY_SRCS(
4+
MAIN run_tests.py
5+
)
6+
7+
PEERDIR(
8+
)
9+
10+
END()

ydb/library/benchmarks/runner/runner/runner.py

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -65,25 +65,25 @@ def run(argv, out, err, timeout=30*60, hard_timeout=5):
6565

6666

6767
def main():
68-
6968
parser = argparse.ArgumentParser()
7069
parser.add_argument('--query-dir', type=str, default='q/scalar')
7170
parser.add_argument('--bindings', type=str, default='bindings.json')
72-
parser.add_argument('--result-dir', type=str, default="result-{:%Y%m%dT%H%M%S}".format(datetime.datetime.now()))
71+
parser.add_argument('--result-dir', type=Path, default="result-{:%Y%m%dT%H%M%S}".format(datetime.datetime.now()))
7372
parser.add_argument('--timeout', type=int, default=30*60)
7473
parser.add_argument('--perf', action='store_true')
75-
parser.add_argument('--arc-path', type=str, default='{}/arcadia'.format(os.environ['HOME']))
74+
parser.add_argument('--flame-graph', type=Path, default=None)
7675
parser.add_argument('--include-q', default=[], action='append')
7776
parser.add_argument('--exclude-q', default=[], action='append')
77+
7878
args, argv = parser.parse_known_intermixed_args()
79+
7980
qdir = args.query_dir
8081
bindings = args.bindings
8182
outdir = args.result_dir
8283
assert len(argv)
8384
querydir = Path(qdir)
84-
os.makedirs(outdir + '/' + qdir, exist_ok=True)
85-
with open(outdir + '/' + qdir + "/summary.tsv", "w") as outf, \
86-
open(outdir + '/' + qdir + "/summary.json", "w") as outj:
85+
with open(outdir / "summary.tsv", "w") as outf, \
86+
open(outdir / "summary.json", "w") as outj:
8787
print(' '.join(argv + ['-p', qdir, '--bindings-file', bindings]), file=outf)
8888
print(json.dumps({
8989
'cmdline': argv,
@@ -92,26 +92,28 @@ def main():
9292
'version': 100
9393
}), file=outj)
9494
for query in sorted(querydir.glob('**/*.sql'), key=lambda x: tuple(map(lambda y: int(y) if re.match(RE_DIGITS, y) else y, re.split(RE_DIGITS, str(x))))):
95-
q = str(query)
96-
name = outdir + '/' + q
95+
q = str(query.stem)
96+
print(f"{q}", end="", flush=True)
97+
name = str(outdir / q)
9798
if len(args.include_q):
9899
include = False
99100
for r in args.include_q:
100-
if re.search(r, name):
101+
if re.search(r, str(query)):
101102
include = True
102103
break
103104
if not include:
104105
continue
105106
if len(args.exclude_q):
106107
include = True
107108
for r in args.exclude_q:
108-
if re.search(r, name):
109+
if re.search(r, str(query)):
109110
include = False
110111
break
111112
if not include:
112113
continue
113114
print(q, end='\t', file=outf)
114115
outname = name + '-result.yson'
116+
print(".", end="", flush=True)
115117
exitcode, rusage, elapsed, iostat = run(
116118
argv + [
117119
'--result-file', outname,
@@ -120,7 +122,7 @@ def main():
120122
'--err-file', name + '-err.txt',
121123
'--expr-file', name + '-expr.txt',
122124
'--stat', name + '-stat.yson',
123-
'-p', q
125+
'-p', str(query)
124126
],
125127
name + '-stdout.txt',
126128
name + '-stderr.txt',
@@ -164,25 +166,27 @@ def main():
164166
}
165167
}), file=outj)
166168
outj.flush()
169+
print(".", end="", flush=True)
167170
if args.perf:
168171
exitcode, rusage, elapsed, iostat = run(
169-
['{}/ya'.format(args.arc_path), 'tool', 'perf', 'record', '-F250', '-g', '--call-graph', 'dwarf', '-o', '{}/perf.data'.format(outdir), '--'] +
172+
['/usr/bin/perf', 'record', '-F250', '-g', '--call-graph', 'dwarf', '-o', '{}/perf.data'.format(outdir), '--'] +
170173
argv + [
171174
'--result-file', '/dev/null',
172175
'--bindings-file', bindings,
173176
'--plan-file', '/dev/null',
174177
'--err-file', '/dev/null',
175178
'--expr-file', '/dev/null',
176-
'-p', q
179+
'-p', str(query)
177180
],
178181
name + '-stdout-perf.txt',
179182
name + '-stderr-perf.txt',
180183
timeout=args.timeout)
181184
os.system('''
182-
{0}/ya tool perf script -i {2}/perf.data --header |
183-
{0}/contrib/tools/flame-graph/stackcollapse-perf.pl |
184-
{0}/contrib/tools/flame-graph/flamegraph.pl > {1}.svg
185-
'''.format(args.arc_path, name, outdir))
185+
perf script -i {2}/perf.data --header |
186+
{0}/stackcollapse-perf.pl |
187+
{0}/flamegraph.pl > {1}.svg
188+
'''.format(args.flame_graph, name, outdir))
189+
print(".", flush=True)
186190

187191

188192
if __name__ == "__main__":

ydb/library/benchmarks/runner/runner/ya.make

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,4 @@ PY_SRCS(
44
MAIN runner.py
55
)
66

7-
PEERDIR(
8-
)
9-
107
END()
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import run_tests.run_tests as run_tests
2+
import yatest.common
3+
import pathlib
4+
import sys
5+
6+
7+
class TestRunner:
8+
DEPS = {
9+
"dqrun" : "ydb/library/yql/tools/dqrun",
10+
"gen-queries" : "ydb/library/benchmarks/gen_queries",
11+
"result-compare" : "ydb/library/benchmarks/runner/result_compare",
12+
"runner" : "ydb/library/benchmarks/runner/runner"
13+
}
14+
15+
DATA = {
16+
"fs-cfg" : "ydb/library/yql/tools/dqrun/examples/fs.conf",
17+
"gateways-cfg" : "ydb/library/benchmarks/runner/runner/test-gateways.conf",
18+
"flame-graph" : "contrib/tools/flame-graph"
19+
}
20+
21+
UDFS = [
22+
"ydb/library/yql/udfs/common/set",
23+
"ydb/library/yql/udfs/common/url_base",
24+
"ydb/library/yql/udfs/common/datetime2",
25+
"ydb/library/yql/udfs/common/re2"
26+
]
27+
28+
def __init__(self):
29+
self.deps = {name : pathlib.Path(yatest.common.binary_path(path)) for name, path in self.DEPS.items()}
30+
self.udfs = [pathlib.Path(yatest.common.binary_path(path)) for path in self.UDFS]
31+
self.data = {name : pathlib.Path(yatest.common.source_path(path)) for name, path in self.DATA.items()}
32+
self.output = pathlib.Path(yatest.common.output_path()).resolve()
33+
self.results_path = self.output / "results"
34+
self.results_path.mkdir()
35+
36+
self.cmd = []
37+
self.cmd += ["--dqrun", str(self.deps["dqrun"]) + "/dqrun"]
38+
self.cmd += ["--gen-queries", str(self.deps["gen-queries"]) + "/gen_queries"]
39+
self.cmd += ["--result-compare", str(self.deps["result-compare"]) + "/result_compare"]
40+
self.cmd += ["--downloaders-dir", "/home/vladluk/ydbwork/ydb/ydb/library/benchmarks/runner"]
41+
self.cmd += ["--runner", str(self.deps["runner"]) + "/runner"]
42+
self.cmd += ["--flama-graph", str(self.data["flame-graph"])]
43+
self.cmd += ["--udfs-dir", ";".join(map(str, self.udfs))]
44+
self.cmd += ["--fs-cfg", str(self.data["fs-cfg"])]
45+
self.cmd += ["--gateways-cfg", str(self.data["gateways-cfg"])]
46+
self.cmd += ["-o", str(self.results_path)]
47+
48+
49+
def wrapped_run(self, variant, datasize, tasks, query_filter):
50+
cmd = self.cmd
51+
cmd += ["--variant", f"{variant}"]
52+
cmd += ["--datasize", f"{datasize}"]
53+
cmd += ["--tasks", f"{tasks}"]
54+
cmd += ["--query-filter", f"{query_filter}"]
55+
print(" ".join(cmd), file=sys.stderr)
56+
run_tests.run(cmd)
57+
58+
59+
def test_tpc():
60+
runner = TestRunner()
61+
runner.wrapped_run("h", 1, 1, r"q1\.sql")
62+
print("results path:", runner.results_path.resolve(), file=sys.stderr)

ydb/library/benchmarks/runner/ya.make

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,37 @@
1+
PY3TEST()
2+
3+
SIZE(MEDIUM)
4+
5+
PY_SRCS(
6+
run_tests/run_tests.py
7+
)
8+
9+
TEST_SRCS(
10+
tpc_tests.py
11+
)
12+
13+
DEPENDS(
14+
ydb/library/yql/tools/dqrun
15+
ydb/library/benchmarks/gen_queries
16+
ydb/library/benchmarks/runner/result_compare
17+
ydb/library/benchmarks/runner/runner
18+
19+
ydb/library/yql/udfs/common/set
20+
ydb/library/yql/udfs/common/url_base
21+
ydb/library/yql/udfs/common/datetime2
22+
ydb/library/yql/udfs/common/re2
23+
)
24+
25+
DATA(
26+
arcadia/ydb/library/yql/tools/dqrun/examples/fs.conf
27+
arcadia/ydb/library/benchmarks/runner/runner/test-gateways.conf
28+
contrib/tools/flame-graph
29+
)
30+
31+
END()
32+
133
RECURSE(
34+
run_tests
235
runner
336
result_convert
437
result_compare

0 commit comments

Comments
 (0)