Skip to content

Commit d13d1ed

Browse files
authored
Add validation to check if time series data streams have dimensions (#655)
Time series data streams without dimensions are rejected by Elasticsearch, detect the issue earlier.
1 parent 6a1d395 commit d13d1ed

File tree

15 files changed

+363
-29
lines changed

15 files changed

+363
-29
lines changed
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2+
// or more contributor license agreements. Licensed under the Elastic License;
3+
// you may not use this file except in compliance with the Elastic License.
4+
5+
package semantic
6+
7+
import (
8+
"fmt"
9+
"io/fs"
10+
"path"
11+
12+
"github.com/elastic/package-spec/v3/code/go/internal/fspath"
13+
"github.com/elastic/package-spec/v3/code/go/pkg/specerrors"
14+
"gopkg.in/yaml.v3"
15+
)
16+
17+
// ValidateDimensionsPresent verifies if dimension fields are of one of the expected types.
18+
func ValidateDimensionsPresent(fsys fspath.FS) specerrors.ValidationErrors {
19+
dimensionPresent := make(map[string]struct{})
20+
errs := validateFields(fsys, func(metadata fieldFileMetadata, f field) specerrors.ValidationErrors {
21+
if f.Dimension {
22+
dimensionPresent[metadata.dataStream] = struct{}{}
23+
}
24+
return nil
25+
})
26+
if len(errs) > 0 {
27+
return errs
28+
}
29+
30+
dataStreams, err := listDataStreams(fsys)
31+
if err != nil {
32+
return specerrors.ValidationErrors{specerrors.NewStructuredError(err, specerrors.UnassignedCode)}
33+
}
34+
for _, dataStream := range dataStreams {
35+
tsEnabled, err := isTimeSeriesModeEnabled(fsys, dataStream)
36+
if err != nil {
37+
return specerrors.ValidationErrors{specerrors.NewStructuredError(err, specerrors.UnassignedCode)}
38+
}
39+
_, hasDimensions := dimensionPresent[dataStream]
40+
if tsEnabled && !hasDimensions {
41+
errs = append(errs, specerrors.NewStructuredErrorf(
42+
`file "%s" is invalid: time series mode enabled but no dimensions configured`,
43+
fsys.Path("data_stream", dataStream, "manifest.yml"),
44+
))
45+
}
46+
}
47+
return errs
48+
}
49+
50+
func isTimeSeriesModeEnabled(fsys fspath.FS, dataStream string) (bool, error) {
51+
manifestPath := path.Join("data_stream", dataStream, "manifest.yml")
52+
d, err := fs.ReadFile(fsys, manifestPath)
53+
if err != nil {
54+
return false, fmt.Errorf("failed to read data stream manifest in %q: %w", fsys.Path(manifestPath), err)
55+
}
56+
57+
var manifest struct {
58+
Elasticsearch struct {
59+
IndexMode string `yaml:"index_mode"`
60+
} `yaml:"elasticsearch"`
61+
}
62+
err = yaml.Unmarshal(d, &manifest)
63+
if err != nil {
64+
return false, fmt.Errorf("failed to parse data stream manifest in %q: %w", fsys.Path(manifestPath), err)
65+
}
66+
67+
return manifest.Elasticsearch.IndexMode == "time_series", nil
68+
}

code/go/internal/validator/spec.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ func (s Spec) rules(pkgType string, rootSpec spectypes.ItemSpec) validationRules
163163
{fn: semantic.ValidateKibanaNoDanglingObjectIDs, since: semver.MustParse("3.0.0")},
164164
{fn: semantic.ValidateKibanaFilterPresent, since: semver.MustParse("3.0.0")},
165165
{fn: semantic.ValidateKibanaNoLegacyVisualizations, types: []string{"integration"}, since: semver.MustParse("3.0.0")},
166+
{fn: semantic.ValidateDimensionsPresent, types: []string{"integration"}, since: semver.MustParse("3.0.1")},
166167
}
167168

168169
var validationRules validationRules

code/go/pkg/validator/validator_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,12 @@ func TestValidateFile(t *testing.T) {
240240
"\"Dashboard with mixed by-value visualizations\" contains legacy visualization: \"Timelion time series\" (timelion, Timelion)",
241241
},
242242
},
243+
"bad_time_series_missing_dimensions": {
244+
"data_stream/missing_dimension/manifest.yml",
245+
[]string{
246+
"time series mode enabled but no dimensions configured",
247+
},
248+
},
243249
}
244250

245251
filter := specerrors.NewFilter(&specerrors.ConfigFilter{

spec/changelog.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
- description: 'Allow to set index: false in dynamic templates defined in data stream manifests'
1111
type: enhancement
1212
link: https://github.com/elastic/package-spec/issues/650
13+
- description: Validate that data streams with time series enabled have configured dimensions
14+
type: enhancement
15+
link: https://github.com/elastic/package-spec/issues/655
1316
- version: 3.0.0
1417
changes:
1518
- description: Validate processors used in ingest pipelines
Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,32 @@
11
- name: example
22
type: group
33
fields:
4-
- name: agent.id
5-
type: keyword
6-
dimension: true
7-
- name: agent.call_count
8-
type: long
9-
metric_type: counter
10-
- name: agent.current_count
11-
type: long
12-
metric_type: gauge
13-
- name: agent.call_duration
14-
type: histogram
15-
metric_type: gauge
16-
dimension: true # This should fail, a histogram cannot be a dimension.
17-
- name: no_valid_type
18-
type: boolean
19-
metric_type: gauge
20-
- name: no_type
21-
metric_type: gauge
22-
- name: field_object_type_double
23-
type: double
24-
object_type: double
25-
metric_type: gauge
26-
- name: field_object_type_boolean
27-
type: object
28-
object_type: boolean
29-
metric_type: gauge
30-
- name: field_object
31-
type: object
32-
metric_type: gauge
4+
- name: agent.id
5+
type: keyword
6+
dimension: true
7+
- name: agent.call_count
8+
type: long
9+
metric_type: counter
10+
- name: agent.current_count
11+
type: long
12+
metric_type: gauge
13+
- name: agent.call_duration
14+
type: histogram
15+
metric_type: gauge
16+
dimension: true # This should fail, a histogram cannot be a dimension.
17+
- name: no_valid_type
18+
type: boolean
19+
metric_type: gauge
20+
- name: no_type
21+
metric_type: gauge
22+
- name: field_object_type_double
23+
type: double
24+
object_type: double
25+
metric_type: gauge
26+
- name: field_object_type_boolean
27+
type: object
28+
object_type: boolean
29+
metric_type: gauge
30+
- name: field_object
31+
type: object
32+
metric_type: gauge

test/packages/bad_time_series/data_stream/example/manifest.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,7 @@ streams:
99
type: text
1010
title: Period
1111
default: 10s
12+
13+
elasticsearch:
14+
source_mode: synthetic
15+
index_mode: time_series
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
Elastic License 2.0
2+
3+
URL: https://www.elastic.co/licensing/elastic-license
4+
5+
## Acceptance
6+
7+
By using the software, you agree to all of the terms and conditions below.
8+
9+
## Copyright License
10+
11+
The licensor grants you a non-exclusive, royalty-free, worldwide,
12+
non-sublicensable, non-transferable license to use, copy, distribute, make
13+
available, and prepare derivative works of the software, in each case subject to
14+
the limitations and conditions below.
15+
16+
## Limitations
17+
18+
You may not provide the software to third parties as a hosted or managed
19+
service, where the service provides users with access to any substantial set of
20+
the features or functionality of the software.
21+
22+
You may not move, change, disable, or circumvent the license key functionality
23+
in the software, and you may not remove or obscure any functionality in the
24+
software that is protected by the license key.
25+
26+
You may not alter, remove, or obscure any licensing, copyright, or other notices
27+
of the licensor in the software. Any use of the licensor’s trademarks is subject
28+
to applicable law.
29+
30+
## Patents
31+
32+
The licensor grants you a license, under any patent claims the licensor can
33+
license, or becomes able to license, to make, have made, use, sell, offer for
34+
sale, import and have imported the software, in each case subject to the
35+
limitations and conditions in this license. This license does not cover any
36+
patent claims that you cause to be infringed by modifications or additions to
37+
the software. If you or your company make any written claim that the software
38+
infringes or contributes to infringement of any patent, your patent license for
39+
the software granted under these terms ends immediately. If your company makes
40+
such a claim, your patent license ends immediately for work on behalf of your
41+
company.
42+
43+
## Notices
44+
45+
You must ensure that anyone who gets a copy of any part of the software from you
46+
also gets a copy of these terms.
47+
48+
If you modify the software, you must include in any modified copies of the
49+
software prominent notices stating that you have modified the software.
50+
51+
## No Other Rights
52+
53+
These terms do not imply any licenses other than those expressly granted in
54+
these terms.
55+
56+
## Termination
57+
58+
If you use the software in violation of these terms, such use is not licensed,
59+
and your licenses will automatically terminate. If the licensor provides you
60+
with a notice of your violation, and you cease all violation of this license no
61+
later than 30 days after you receive that notice, your licenses will be
62+
reinstated retroactively. However, if you violate these terms after such
63+
reinstatement, any additional violation of these terms will cause your licenses
64+
to terminate automatically and permanently.
65+
66+
## No Liability
67+
68+
*As far as the law allows, the software comes as is, without any warranty or
69+
condition, and the licensor will not be liable to you for any damages arising
70+
out of these terms or the use or nature of the software, under any kind of
71+
legal claim.*
72+
73+
## Definitions
74+
75+
The **licensor** is the entity offering these terms, and the **software** is the
76+
software the licensor makes available under these terms, including any portion
77+
of it.
78+
79+
**you** refers to the individual or entity agreeing to these terms.
80+
81+
**your company** is any legal entity, sole proprietorship, or other kind of
82+
organization that you work for, plus all organizations that have control over,
83+
are under the control of, or are under common control with that
84+
organization. **control** means ownership of substantially all the assets of an
85+
entity, or the power to direct its management and policies by vote, contract, or
86+
otherwise. Control can be direct or indirect.
87+
88+
**your licenses** are all the licenses granted to you for the software under
89+
these terms.
90+
91+
**use** means anything you do with the software requiring one of your licenses.
92+
93+
**trademark** means trademarks, service marks, and similar rights.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# newer versions go on top
2+
- version: "0.1.0"
3+
changes:
4+
- description: Initial draft of the package
5+
type: enhancement
6+
link: https://github.com/elastic/integrations/pull/1 # FIXME Replace with the real PR link
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
metricsets: ["sample_metricset"]
2+
hosts:
3+
{{#each hosts}}
4+
- {{this}}
5+
{{/each}}
6+
period: {{period}}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
- name: data_stream.type
2+
type: constant_keyword
3+
description: Data stream type.
4+
- name: data_stream.dataset
5+
type: constant_keyword
6+
description: Data stream dataset.
7+
- name: data_stream.namespace
8+
type: constant_keyword
9+
description: Data stream namespace.
10+
- name: '@timestamp'
11+
type: date
12+
description: Event timestamp.
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
title: "Data stream with time series but no dimensions."
2+
type: metrics
3+
streams:
4+
- input: sample/metrics
5+
title: Sample metrics
6+
description: Collect sample metrics
7+
vars:
8+
- name: period
9+
type: text
10+
title: Period
11+
default: 10s
12+
elasticsearch:
13+
source_mode: synthetic
14+
index_mode: time_series
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
<!-- Use this template language as a starting point, replacing {placeholder text} with details about the integration. -->
2+
<!-- Find more detailed documentation guidelines in https://github.com/elastic/integrations/blob/main/docs/documentation_guidelines.md -->
3+
4+
# Missing Dimensions
5+
6+
<!-- The Missing Dimensions integration allows you to monitor {name of service}. {name of service} is {describe service}.
7+
8+
Use the Missing Dimensions integration to {purpose}. Then visualize that data in Kibana, create alerts to notify you if something goes wrong, and reference {data stream type} when troubleshooting an issue.
9+
10+
For example, if you wanted to {sample use case} you could {action}. Then you can {visualize|alert|troubleshoot} by {action}. -->
11+
12+
## Data streams
13+
14+
<!-- The Missing Dimensions integration collects {one|two} type{s} of data streams: {logs and/or metrics}. -->
15+
16+
<!-- If applicable -->
17+
<!-- **Logs** help you keep a record of events happening in {service}.
18+
Log data streams collected by the {name} integration include {sample data stream(s)} and more. See more details in the [Logs](#logs-reference). -->
19+
20+
<!-- If applicable -->
21+
<!-- **Metrics** give you insight into the state of {service}.
22+
Metric data streams collected by the {name} integration include {sample data stream(s)} and more. See more details in the [Metrics](#metrics-reference). -->
23+
24+
<!-- Optional: Any additional notes on data streams -->
25+
26+
## Requirements
27+
28+
You need Elasticsearch for storing and searching your data and Kibana for visualizing and managing it.
29+
You can use our hosted Elasticsearch Service on Elastic Cloud, which is recommended, or self-manage the Elastic Stack on your own hardware.
30+
31+
<!--
32+
Optional: Other requirements including:
33+
* System compatibility
34+
* Supported versions of third-party products
35+
* Permissions needed
36+
* Anything else that could block a user from successfully using the integration
37+
-->
38+
39+
## Setup
40+
41+
<!-- Any prerequisite instructions -->
42+
43+
For step-by-step instructions on how to set up an integration, see the
44+
[Getting started](https://www.elastic.co/guide/en/welcome-to-elastic/current/getting-started-observability.html) guide.
45+
46+
<!-- Additional set up instructions -->
47+
48+
<!-- If applicable -->
49+
<!-- ## Logs reference -->
50+
51+
<!-- Repeat for each data stream of the current type -->
52+
<!-- ### {Data stream name}
53+
54+
The `{data stream name}` data stream provides events from {source} of the following types: {list types}. -->
55+
56+
<!-- Optional -->
57+
<!-- #### Example
58+
59+
An example event for `{data stream name}` looks as following:
60+
61+
{code block with example} -->
62+
63+
<!-- #### Exported fields
64+
65+
{insert table} -->
66+
67+
<!-- If applicable -->
68+
<!-- ## Metrics reference -->
69+
70+
<!-- Repeat for each data stream of the current type -->
71+
<!-- ### {Data stream name}
72+
73+
The `{data stream name}` data stream provides events from {source} of the following types: {list types}. -->
74+
75+
<!-- Optional -->
76+
<!-- #### Example
77+
78+
An example event for `{data stream name}` looks as following:
79+
80+
{code block with example} -->
81+
82+
<!-- #### Exported fields
83+
84+
{insert table} -->
Lines changed: 1 addition & 0 deletions
Loading
Loading

0 commit comments

Comments
 (0)