Skip to content

Commit e93a250

Browse files
authored
Reject recording rules with incompatible fields (#2000)
* Reject recording rules with incompatible fields * Hack validation in place around SDK limitations * Update docs * Fix diff for empty values
1 parent efd0ac2 commit e93a250

File tree

4 files changed

+134
-27
lines changed

4 files changed

+134
-27
lines changed

docs/resources/rule_group.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -132,18 +132,18 @@ EOT
132132

133133
Required:
134134

135-
- `condition` (String) The `ref_id` of the query node in the `data` field to use as the alert condition.
136135
- `data` (Block List, Min: 1) A sequence of stages that describe the contents of the rule. (see [below for nested schema](#nestedblock--rule--data))
137136
- `name` (String) The name of the alert rule.
138137

139138
Optional:
140139

141140
- `annotations` (Map of String) Key-value pairs of metadata to attach to the alert rule. They add additional information, such as a `summary` or `runbook_url`, to help identify and investigate alerts. The `dashboardUId` and `panelId` annotations, which link alerts to a panel, must be set together. Defaults to `map[]`.
142-
- `exec_err_state` (String) Describes what state to enter when the rule's query is invalid and the rule cannot be executed. Options are OK, Error, KeepLast, and Alerting. Defaults to `Alerting`.
141+
- `condition` (String) The `ref_id` of the query node in the `data` field to use as the alert condition.
142+
- `exec_err_state` (String) Describes what state to enter when the rule's query is invalid and the rule cannot be executed. Options are OK, Error, KeepLast, and Alerting. Defaults to Alerting if not set.
143143
- `for` (String) The amount of time for which the rule must be breached for the rule to be considered to be Firing. Before this time has elapsed, the rule is only considered to be Pending. Defaults to `0`.
144144
- `is_paused` (Boolean) Sets whether the alert should be paused or not. Defaults to `false`.
145145
- `labels` (Map of String) Key-value pairs to attach to the alert rule that can be used in matching, grouping, and routing. Defaults to `map[]`.
146-
- `no_data_state` (String) Describes what state to enter when the rule's query returns No Data. Options are OK, NoData, KeepLast, and Alerting. Defaults to `NoData`.
146+
- `no_data_state` (String) Describes what state to enter when the rule's query returns No Data. Options are OK, NoData, KeepLast, and Alerting. Defaults to NoData if not set.
147147
- `notification_settings` (Block List, Max: 1) Notification settings for the rule. If specified, it overrides the notification policies. Available since Grafana 10.4, requires feature flag 'alertingSimplifiedRouting' to be enabled. (see [below for nested schema](#nestedblock--rule--notification_settings))
148148
- `record` (Block List, Max: 1) Settings for a recording rule. Available since Grafana 11.2, requires feature flag 'grafanaManagedRecordingRules' to be enabled. (see [below for nested schema](#nestedblock--rule--record))
149149

internal/resources/grafana/resource_alerting_mute_timing_test.go

+9-9
Original file line numberDiff line numberDiff line change
@@ -118,29 +118,29 @@ func TestAccMuteTiming_RemoveInUse(t *testing.T) {
118118
locals {
119119
use_mute = %t
120120
}
121-
121+
122122
resource "grafana_organization" "my_org" {
123123
name = "mute-timing-test"
124124
}
125-
125+
126126
resource "grafana_contact_point" "default_policy" {
127127
org_id = grafana_organization.my_org.id
128128
name = "default-policy"
129129
email {
130130
addresses = ["[email protected]"]
131131
}
132132
}
133-
133+
134134
resource "grafana_notification_policy" "org_policy" {
135135
org_id = grafana_organization.my_org.id
136136
group_by = ["..."]
137137
group_wait = "45s"
138138
group_interval = "6m"
139139
repeat_interval = "3h"
140140
contact_point = grafana_contact_point.default_policy.name
141-
141+
142142
policy {
143-
mute_timings = local.use_mute ? [grafana_mute_timing.test[0].name] : []
143+
mute_timings = local.use_mute ? [grafana_mute_timing.test[0].name] : []
144144
contact_point = grafana_contact_point.default_policy.name
145145
}
146146
}
@@ -167,7 +167,7 @@ func TestAccMuteTiming_RemoveInUse(t *testing.T) {
167167
}
168168

169169
func TestAccMuteTiming_RemoveInUseInAlertRule(t *testing.T) {
170-
testutils.CheckCloudInstanceTestsEnabled(t) // TODO: Switch to OSS when this is released: https://github.com/grafana/grafana/pull/90500
170+
testutils.CheckOSSTestsEnabled(t, ">=11.2.0")
171171

172172
randomStr := acctest.RandString(6)
173173

@@ -176,11 +176,11 @@ func TestAccMuteTiming_RemoveInUseInAlertRule(t *testing.T) {
176176
locals {
177177
use_mute = %[2]t
178178
}
179-
179+
180180
resource "grafana_folder" "rule_folder" {
181181
title = "%[1]s"
182182
}
183-
183+
184184
resource "grafana_contact_point" "default_policy" {
185185
name = "%[1]s"
186186
email {
@@ -219,7 +219,7 @@ func TestAccMuteTiming_RemoveInUseInAlertRule(t *testing.T) {
219219
}
220220
}
221221
222-
222+
223223
resource "grafana_mute_timing" "test" {
224224
count = local.use_mute ? 1 : 0
225225
name = "%[1]s"

internal/resources/grafana/resource_alerting_rule_group.go

+56-8
Original file line numberDiff line numberDiff line change
@@ -106,18 +106,30 @@ This resource requires Grafana 9.1.0 or later.
106106
"no_data_state": {
107107
Type: schema.TypeString,
108108
Optional: true,
109-
Default: "NoData",
110-
Description: "Describes what state to enter when the rule's query returns No Data. Options are OK, NoData, KeepLast, and Alerting.",
109+
Description: "Describes what state to enter when the rule's query returns No Data. Options are OK, NoData, KeepLast, and Alerting. Defaults to NoData if not set.",
110+
DiffSuppressFunc: func(k, oldValue, newValue string, d *schema.ResourceData) bool {
111+
// We default to this value later in the pipeline, so we need to account for that here.
112+
if newValue == "" {
113+
return oldValue == "NoData"
114+
}
115+
return oldValue == newValue
116+
},
111117
},
112118
"exec_err_state": {
113119
Type: schema.TypeString,
114120
Optional: true,
115-
Default: "Alerting",
116-
Description: "Describes what state to enter when the rule's query is invalid and the rule cannot be executed. Options are OK, Error, KeepLast, and Alerting.",
121+
Description: "Describes what state to enter when the rule's query is invalid and the rule cannot be executed. Options are OK, Error, KeepLast, and Alerting. Defaults to Alerting if not set.",
122+
DiffSuppressFunc: func(k, oldValue, newValue string, d *schema.ResourceData) bool {
123+
// We default to this value later in the pipeline, so we need to account for that here.
124+
if newValue == "" {
125+
return oldValue == "Alerting"
126+
}
127+
return oldValue == newValue
128+
},
117129
},
118130
"condition": {
119131
Type: schema.TypeString,
120-
Required: true,
132+
Optional: true,
121133
Description: "The `ref_id` of the query node in the `data` field to use as the alert condition.",
122134
},
123135
"data": {
@@ -527,17 +539,53 @@ func unpackAlertRule(raw interface{}, groupName string, folderUID string, orgID
527539
return nil, err
528540
}
529541

542+
// Check for conflicting fields before unpacking the rest of the rule.
543+
// This is a workaround due to the lack of support for ConflictsWith in Lists in the SDK.
544+
errState := json["exec_err_state"].(string)
545+
noDataState := json["no_data_state"].(string)
546+
condition := json["condition"].(string)
547+
548+
record := unpackRecord(json["record"])
549+
if record != nil {
550+
incompatFieldMsgFmt := `conflicting fields "record" and "%s"`
551+
if forDuration != 0 {
552+
return nil, fmt.Errorf(incompatFieldMsgFmt, "for")
553+
}
554+
if noDataState != "" {
555+
return nil, fmt.Errorf(incompatFieldMsgFmt, "no_data_state")
556+
}
557+
if errState != "" {
558+
return nil, fmt.Errorf(incompatFieldMsgFmt, "exec_err_state")
559+
}
560+
if condition != "" {
561+
return nil, fmt.Errorf(incompatFieldMsgFmt, "condition")
562+
}
563+
}
564+
if record == nil {
565+
if condition == "" {
566+
return nil, fmt.Errorf(`"condition" is required`)
567+
}
568+
// Compute defaults here to avoid issues related to the above, setting a default in the schema will cause these
569+
// to be set before we can validate the configuration.
570+
if noDataState == "" {
571+
noDataState = "NoData"
572+
}
573+
if errState == "" {
574+
errState = "Alerting"
575+
}
576+
}
577+
530578
rule := models.ProvisionedAlertRule{
531579
UID: json["uid"].(string),
532580
Title: common.Ref(json["name"].(string)),
533581
FolderUID: common.Ref(folderUID),
534582
RuleGroup: common.Ref(groupName),
535583
OrgID: common.Ref(orgID),
536-
ExecErrState: common.Ref(json["exec_err_state"].(string)),
537-
NoDataState: common.Ref(json["no_data_state"].(string)),
584+
ExecErrState: common.Ref(errState),
585+
NoDataState: common.Ref(noDataState),
538586
For: common.Ref(strfmt.Duration(forDuration)),
539587
Data: data,
540-
Condition: common.Ref(json["condition"].(string)),
588+
Condition: common.Ref(condition),
541589
Labels: unpackMap(json["labels"]),
542590
Annotations: unpackMap(json["annotations"]),
543591
IsPaused: json["is_paused"].(bool),

internal/resources/grafana/resource_alerting_rule_group_test.go

+66-7
Original file line numberDiff line numberDiff line change
@@ -676,7 +676,7 @@ func TestAccAlertRule_NotificationSettings(t *testing.T) {
676676
}
677677

678678
func TestAccRecordingRule(t *testing.T) {
679-
testutils.CheckCloudInstanceTestsEnabled(t) // TODO: change to 11.3.1 when available
679+
testutils.CheckOSSTestsEnabled(t, ">=11.4.0")
680680

681681
var group models.AlertRuleGroup
682682
var name = acctest.RandString(10)
@@ -696,17 +696,34 @@ func TestAccRecordingRule(t *testing.T) {
696696
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.data.0.model", "{\"refId\":\"A\"}"),
697697
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.record.0.metric", metric),
698698
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.record.0.from", "A"),
699-
// ensure fields are cleared as expected
700-
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.for", "2m0s"),
701-
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.condition", "A"),
702-
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.no_data_state", "NoData"),
703-
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.exec_err_state", "Alerting"),
699+
// ensure fields are empty as expected
700+
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.for", "0s"),
701+
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.condition", ""),
702+
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.no_data_state", ""),
703+
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.exec_err_state", ""),
704704
),
705705
},
706706
},
707707
})
708708
}
709709

710+
func TestAccRecordingRule_incompatibleSettings(t *testing.T) {
711+
testutils.CheckOSSTestsEnabled(t, ">=11.4.0")
712+
713+
var name = acctest.RandString(10)
714+
var metric = "valid_metric"
715+
716+
resource.ParallelTest(t, resource.TestCase{
717+
ProtoV5ProviderFactories: testutils.ProtoV5ProviderFactories,
718+
Steps: []resource.TestStep{
719+
{
720+
Config: testAccRecordingRuleInvalid(name, metric, "A"),
721+
ExpectError: regexp.MustCompile(`conflicting fields "record" and "for"`),
722+
},
723+
},
724+
})
725+
}
726+
710727
func testAccAlertRuleGroupInOrgConfig(name string, interval int, disableProvenance bool) string {
711728
return fmt.Sprintf(`
712729
resource "grafana_organization" "test" {
@@ -877,7 +894,49 @@ resource "grafana_rule_group" "my_rule_group" {
877894
878895
rule {
879896
name = "My Random Walk Alert"
880-
// following should be cleared by Grafana
897+
898+
// Query the datasource.
899+
data {
900+
ref_id = "A"
901+
relative_time_range {
902+
from = 600
903+
to = 0
904+
}
905+
datasource_uid = grafana_data_source.testdata_datasource.uid
906+
model = jsonencode({
907+
intervalMs = 1000
908+
maxDataPoints = 43200
909+
refId = "A"
910+
})
911+
}
912+
record {
913+
metric = "%[2]s"
914+
from = "%[3]s"
915+
}
916+
}
917+
}`, name, metric, refID)
918+
}
919+
920+
func testAccRecordingRuleInvalid(name string, metric string, refID string) string {
921+
return fmt.Sprintf(`
922+
resource "grafana_folder" "rule_folder" {
923+
title = "%[1]s"
924+
}
925+
926+
resource "grafana_data_source" "testdata_datasource" {
927+
name = "%[1]s"
928+
type = "grafana-testdata-datasource"
929+
url = "http://localhost:3333"
930+
}
931+
932+
resource "grafana_rule_group" "my_rule_group" {
933+
name = "%[1]s"
934+
folder_uid = grafana_folder.rule_folder.uid
935+
interval_seconds = 60
936+
937+
rule {
938+
name = "My Random Walk Alert"
939+
// following should be rejected
881940
condition = "A"
882941
no_data_state = "NoData"
883942
exec_err_state = "Alerting"

0 commit comments

Comments
 (0)