Skip to content

Commit b2251c5

Browse files
author
Andrea Spacca
authored
Remove benchmark generate corpus command (#1553)
* remove benchmark generate coprus command * minor fixes in benhcmark rally * make check-statis * fix docs for benchmark rally command
1 parent 2c241f1 commit b2251c5

File tree

13 files changed

+79
-576
lines changed

13 files changed

+79
-576
lines changed

README.md

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -165,16 +165,6 @@ These benchmarks allow you to benchmark an integration end to end.
165165

166166
For details on how to configure system benchmarks for a package, review the [HOWTO guide](./docs/howto/system_benchmarking.md).
167167

168-
### `elastic-package benchmark generate-corpus`
169-
170-
_Context: package_
171-
172-
173-
*BEWARE*: this command is in beta and it's behaviour may change in the future.
174-
Use this command to generate benchmarks corpus data for a package.
175-
Currently, only data for what we have related assets on https://github.com/elastic/elastic-integration-corpus-generator-tool are supported.
176-
For details on how to run this command, review the [HOWTO guide](./docs/howto/generate_corpus.md).
177-
178168
### `elastic-package benchmark pipeline`
179169

180170
_Context: package_

cmd/benchmark.go

Lines changed: 0 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,6 @@ import (
1212
"strings"
1313
"time"
1414

15-
"github.com/dustin/go-humanize"
16-
17-
"github.com/elastic/elastic-package/internal/corpusgenerator"
1815
"github.com/elastic/elastic-package/internal/elasticsearch"
1916
"github.com/elastic/elastic-package/internal/install"
2017
"github.com/elastic/elastic-package/internal/logger"
@@ -36,12 +33,6 @@ import (
3633
"github.com/elastic/elastic-package/internal/testrunner"
3734
)
3835

39-
const generateLongDescription = `
40-
*BEWARE*: this command is in beta and it's behaviour may change in the future.
41-
Use this command to generate benchmarks corpus data for a package.
42-
Currently, only data for what we have related assets on https://github.com/elastic/elastic-integration-corpus-generator-tool are supported.
43-
For details on how to run this command, review the [HOWTO guide](./docs/howto/generate_corpus.md).`
44-
4536
const benchLongDescription = `Use this command to run benchmarks on a package. Currently, the following types of benchmarks are available:
4637
4738
#### Pipeline Benchmarks
@@ -80,9 +71,6 @@ func setupBenchmarkCommand() *cobraext.Command {
8071
systemCmd := getSystemCommand()
8172
cmd.AddCommand(systemCmd)
8273

83-
generateCorpusCmd := getGenerateCorpusCommand()
84-
cmd.AddCommand(generateCorpusCmd)
85-
8674
return cobraext.NewCommand(cmd, cobraext.ContextPackage)
8775
}
8876

@@ -257,16 +245,6 @@ func rallyCommandAction(cmd *cobra.Command, args []string) error {
257245
return cobraext.FlagParsingError(err, cobraext.BenchNameFlagName)
258246
}
259247

260-
deferCleanup, err := cmd.Flags().GetDuration(cobraext.DeferCleanupFlagName)
261-
if err != nil {
262-
return cobraext.FlagParsingError(err, cobraext.DeferCleanupFlagName)
263-
}
264-
265-
metricsInterval, err := cmd.Flags().GetDuration(cobraext.BenchMetricsIntervalFlagName)
266-
if err != nil {
267-
return cobraext.FlagParsingError(err, cobraext.BenchMetricsIntervalFlagName)
268-
}
269-
270248
dataReindex, err := cmd.Flags().GetBool(cobraext.BenchReindexToMetricstoreFlagName)
271249
if err != nil {
272250
return cobraext.FlagParsingError(err, cobraext.BenchReindexToMetricstoreFlagName)
@@ -314,8 +292,6 @@ func rallyCommandAction(cmd *cobra.Command, args []string) error {
314292
withOpts := []rally.OptionFunc{
315293
rally.WithVariant(variant),
316294
rally.WithBenchmarkName(benchName),
317-
rally.WithDeferCleanup(deferCleanup),
318-
rally.WithMetricsInterval(metricsInterval),
319295
rally.WithDataReindexing(dataReindex),
320296
rally.WithPackageRootPath(packageRootPath),
321297
rally.WithESAPI(esClient.API),
@@ -496,75 +472,6 @@ func systemCommandAction(cmd *cobra.Command, args []string) error {
496472
return nil
497473
}
498474

499-
func getGenerateCorpusCommand() *cobra.Command {
500-
generateCorpusCmd := &cobra.Command{
501-
Use: "generate-corpus",
502-
Short: "Generate benchmarks corpus data for the package",
503-
Long: generateLongDescription,
504-
Args: cobra.NoArgs,
505-
RunE: generateDataStreamCorpusCommandAction,
506-
}
507-
508-
generateCorpusCmd.Flags().StringP(cobraext.PackageFlagName, cobraext.PackageFlagShorthand, "", cobraext.PackageFlagDescription)
509-
generateCorpusCmd.Flags().StringP(cobraext.GenerateCorpusDataSetFlagName, cobraext.GenerateCorpusDataSetFlagShorthand, "", cobraext.GenerateCorpusDataSetFlagDescription)
510-
generateCorpusCmd.Flags().StringP(cobraext.GenerateCorpusSizeFlagName, cobraext.GenerateCorpusSizeFlagShorthand, "", cobraext.GenerateCorpusSizeFlagDescription)
511-
generateCorpusCmd.Flags().StringP(cobraext.GenerateCorpusCommitFlagName, cobraext.GenerateCorpusCommitFlagShorthand, "main", cobraext.GenerateCorpusCommitFlagDescription)
512-
generateCorpusCmd.Flags().StringP(cobraext.GenerateCorpusRallyTrackOutputDirFlagName, cobraext.GenerateCorpusRallyTrackOutputDirFlagShorthand, "", cobraext.GenerateCorpusRallyTrackOutputDirFlagDescription)
513-
514-
return generateCorpusCmd
515-
}
516-
517-
func generateDataStreamCorpusCommandAction(cmd *cobra.Command, _ []string) error {
518-
packageName, err := cmd.Flags().GetString(cobraext.PackageFlagName)
519-
if err != nil {
520-
return cobraext.FlagParsingError(err, cobraext.PackageFlagName)
521-
}
522-
523-
dataSetName, err := cmd.Flags().GetString(cobraext.GenerateCorpusDataSetFlagName)
524-
if err != nil {
525-
return cobraext.FlagParsingError(err, cobraext.GenerateCorpusDataSetFlagName)
526-
}
527-
528-
totSize, err := cmd.Flags().GetString(cobraext.GenerateCorpusSizeFlagName)
529-
if err != nil {
530-
return cobraext.FlagParsingError(err, cobraext.GenerateCorpusSizeFlagName)
531-
}
532-
533-
totSizeInBytes, err := humanize.ParseBytes(totSize)
534-
if err != nil {
535-
return cobraext.FlagParsingError(err, cobraext.GenerateCorpusSizeFlagName)
536-
}
537-
538-
commit, err := cmd.Flags().GetString(cobraext.GenerateCorpusCommitFlagName)
539-
if err != nil {
540-
return cobraext.FlagParsingError(err, cobraext.GenerateCorpusCommitFlagName)
541-
}
542-
543-
if len(commit) == 0 {
544-
commit = "main"
545-
}
546-
547-
rallyTrackOutputDir, err := cmd.Flags().GetString(cobraext.GenerateCorpusRallyTrackOutputDirFlagName)
548-
if err != nil {
549-
return cobraext.FlagParsingError(err, cobraext.GenerateCorpusRallyTrackOutputDirFlagName)
550-
}
551-
552-
genLibClient := corpusgenerator.NewClient(commit)
553-
generator, err := corpusgenerator.NewGenerator(genLibClient, packageName, dataSetName, totSizeInBytes)
554-
if err != nil {
555-
return fmt.Errorf("can't generate benchmarks data corpus for data stream: %w", err)
556-
}
557-
558-
// TODO: we need a way to extract the type from the package and dataset, currently hardcode to `metrics`
559-
dataStream := fmt.Sprintf("metrics-%s.%s-default", packageName, dataSetName)
560-
err = corpusgenerator.RunGenerator(generator, dataStream, rallyTrackOutputDir)
561-
if err != nil {
562-
return fmt.Errorf("can't generate benchmarks data corpus for data stream: %w", err)
563-
}
564-
565-
return nil
566-
}
567-
568475
func initializeESMetricsClient(ctx context.Context) (*elasticsearch.Client, error) {
569476
address := os.Getenv(benchcommon.ESMetricstoreHostEnv)
570477
user := os.Getenv(benchcommon.ESMetricstoreUsernameEnv)

docs/howto/generate_corpus.md

Lines changed: 0 additions & 41 deletions
This file was deleted.

docs/howto/rally_benchmarking.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# HOWTO: Writing system benchmarks for a package
1+
# HOWTO: Writing rally benchmarks for a package
22

33
## Introduction
44
Elastic Packages are comprised of data streams. A rally benchmark runs `esrally` track with a corpus of data into an Elasticsearch data stream, and reports rally stats as well as retrieving performance metrics from the Elasticsearch nodes.
@@ -10,11 +10,11 @@ Conceptually, running a rally benchmark involves the following steps:
1010
1. Deploy the Elastic Stack, including Elasticsearch, Kibana, and the Elastic Agent(s). This step takes time so it should typically be done once as a pre-requisite to running a system benchmark scenario.
1111
1. Install a package that configures its assets for every data stream in the package.
1212
1. Metrics collections from the cluster starts. (**TODO**: record metrics from all Elastic Agents involved using the `system` integration.)
13-
1. Send the collected metrics to the ES Metricstore if set.
1413
1. Generate data (it uses the [corpus-generator-tool](https://github.com/elastic/elastic-integration-corpus-generator-tool))
1514
1. Run an `esrally` track with the corpus of generated data. `esrally` must be installed on the system where the `elastic-package` is run and available in the `PATH`.
1615
1. Wait for the `esrally` track to be executed.
1716
1. Metrics collection ends and a summary report is created.
17+
1. Send the collected metrics to the ES Metricstore if set.
1818
1. Delete test artifacts.
1919
1. Optionally reindex all ingested data into the ES Metricstore for further analysis.
2020
1. **TODO**: Optionally compare results against another benchmark run.
@@ -60,7 +60,6 @@ Example:
6060
description: Benchmark 20000 events ingested
6161
data_stream:
6262
name: testds
63-
warmup_time_period: 10s
6463
corpora:
6564
generator:
6665
total_events: 900000
@@ -275,7 +274,7 @@ In the directory of the `rally-track-output-dir` flag two files are saved:
275274
Both files are required to replay the rally benchmark. The first file references the second in its content.
276275
The command to run for replaying the track is the following:
277276
```shell
278-
rally --target-hosts='{"default":["%es_cluster_host:es_cluster_port%"]}' --track-path=%path/to/saved-track-json% --client-options='{"default":{"basic_auth_user":"%es_user%","basic_auth_password":"%es_user%","use_ssl":true,"verify_certs":false}}' --pipeline=benchmark-only
277+
esrally --target-hosts='{"defauelt":["%es_cluster_host:es_cluster_port%"]}' --track-path=%path/to/saved-track-json% --client-options='{"default":{"basic_auth_user":"%es_user%","basic_auth_password":"%es_user%","use_ssl":true,"verify_certs":false}}' --pipeline=benchmark-only
279278
```
280279

281280
Please refer to [esrally CLI reference](https://esrally.readthedocs.io/en/stable/command_line_reference.html) for more details.

internal/benchrunner/runners/rally/options.go

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -67,18 +67,6 @@ func WithBenchmarkName(name string) OptionFunc {
6767
}
6868
}
6969

70-
func WithDeferCleanup(d time.Duration) OptionFunc {
71-
return func(opts *Options) {
72-
opts.DeferCleanup = d
73-
}
74-
}
75-
76-
func WithMetricsInterval(d time.Duration) OptionFunc {
77-
return func(opts *Options) {
78-
opts.MetricsInterval = d
79-
}
80-
}
81-
8270
func WithDataReindexing(b bool) OptionFunc {
8371
return func(opts *Options) {
8472
opts.ReindexData = b

internal/benchrunner/runners/rally/runner.go

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@ import (
1717
"os/exec"
1818
"path/filepath"
1919
"strings"
20+
"text/template"
2021
"time"
2122

2223
"github.com/elastic/elastic-package/internal/packages/installer"
2324

2425
"github.com/magefile/mage/sh"
2526

26-
"github.com/elastic/elastic-package/internal/corpusgenerator"
2727
"github.com/elastic/elastic-package/internal/stack"
2828

2929
"github.com/google/uuid"
@@ -51,6 +51,42 @@ const (
5151

5252
// BenchType defining rally benchmark
5353
BenchType benchrunner.Type = "rally"
54+
55+
rallyTrackTemplate = `{% import "rally.helpers" as rally with context %}
56+
{
57+
"version": 2,
58+
"description": "Track for [[.DataStream]]",
59+
"datastream": [
60+
{
61+
"name": "[[.DataStream]]",
62+
"body": "[[.CorpusFilename]]"
63+
}
64+
],
65+
"corpora": [
66+
{
67+
"name": "[[.CorpusFilename]]",
68+
"documents": [
69+
{
70+
"target-data-stream": "[[.DataStream]]",
71+
"source-file": "[[.CorpusFilename]]",
72+
"document-count": [[.CorpusDocsCount]],
73+
"uncompressed-bytes": [[.CorpusSizeInBytes]]
74+
}
75+
]
76+
}
77+
],
78+
"schedule": [
79+
{
80+
"operation": {
81+
"operation-type": "bulk",
82+
"bulk-size": {{bulk_size | default(5000)}},
83+
"ingest-percentage": {{ingest_percentage | default(100)}}
84+
},
85+
"clients": {{bulk_indexing_clients | default(8)}}
86+
}
87+
]
88+
}
89+
`
5490
)
5591

5692
var ErrDryRun = errors.New("dry run: rally benchmark not executed")
@@ -524,7 +560,7 @@ func (r *runner) runGenerator(destDir string) error {
524560
return fmt.Errorf("cannot not create rally track file: %w", err)
525561
}
526562
r.trackFile = trackFile.Name()
527-
rallyTrackContent, err := corpusgenerator.GenerateRallyTrack(r.runtimeDataStream, corpusFile, corpusDocsCount)
563+
rallyTrackContent, err := generateRallyTrack(r.runtimeDataStream, corpusFile, corpusDocsCount)
528564
if err != nil {
529565
return fmt.Errorf("cannot not generate rally track content: %w", err)
530566
}
@@ -909,3 +945,34 @@ func createRunID() string {
909945
func getDataStreamPath(packageRoot, dataStream string) string {
910946
return filepath.Join(packageRoot, "data_stream", dataStream)
911947
}
948+
949+
func generateRallyTrack(dataStream string, corpusFile *os.File, corpusDocsCount uint64) ([]byte, error) {
950+
t := template.New("rallytrack")
951+
952+
parsedTpl, err := t.Delims("[[", "]]").Parse(rallyTrackTemplate)
953+
if err != nil {
954+
return nil, fmt.Errorf("error while parsing rally track template: %w", err)
955+
}
956+
957+
fi, err := corpusFile.Stat()
958+
if err != nil {
959+
return nil, fmt.Errorf("error with stat on rally corpus file: %w", err)
960+
}
961+
962+
corpusSizeInBytes := fi.Size()
963+
964+
buf := new(bytes.Buffer)
965+
templateData := map[string]any{
966+
"DataStream": dataStream,
967+
"CorpusFilename": filepath.Base(corpusFile.Name()),
968+
"CorpusDocsCount": corpusDocsCount,
969+
"CorpusSizeInBytes": corpusSizeInBytes,
970+
}
971+
972+
err = parsedTpl.Execute(buf, templateData)
973+
if err != nil {
974+
return nil, fmt.Errorf("error on parsin on rally track template: %w", err)
975+
}
976+
977+
return buf.Bytes(), nil
978+
}

internal/benchrunner/runners/rally/scenario.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ package rally
77
import (
88
"errors"
99
"fmt"
10-
"os"
1110
"path/filepath"
1211

1312
"github.com/elastic/go-ucfg/yaml"
@@ -56,7 +55,7 @@ func readConfig(path, scenario, packageName, packageVersion string) (*scenario,
5655
configPath := filepath.Join(path, devPath, fmt.Sprintf("%s.yml", scenario))
5756
c := defaultConfig()
5857
cfg, err := yaml.NewConfigWithFile(configPath)
59-
if err != nil && !errors.Is(err, os.ErrNotExist) {
58+
if err != nil {
6059
return nil, fmt.Errorf("can't load benchmark configuration: %s: %w", configPath, err)
6160
}
6261

@@ -69,5 +68,9 @@ func readConfig(path, scenario, packageName, packageVersion string) (*scenario,
6968
c.Package = packageName
7069
c.Version = packageVersion
7170

71+
if c.DataStream.Name == "" {
72+
return nil, errors.New("can't read data stream name from benchmark configuration: empty")
73+
}
74+
7275
return c, nil
7376
}

0 commit comments

Comments
 (0)