From 64c30c75f40586331ed54034b89cd745f0e06f01 Mon Sep 17 00:00:00 2001 From: Mika Ayenson Date: Mon, 6 Feb 2023 19:46:05 -0500 Subject: [PATCH 1/6] validate against both beats and integrations --- detection_rules/rule_validators.py | 95 ++++++++++++++++-------------- 1 file changed, 52 insertions(+), 43 deletions(-) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index e2ae9f2177c..29c6e732365 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -11,7 +11,7 @@ import kql -from . import ecs, endgame +from . import beats, ecs, endgame from .integrations import get_integration_schema_data, load_integrations_manifests from .rule import QueryRuleData, QueryValidator, RuleMeta, TOMLRuleContents @@ -43,28 +43,33 @@ def validate(self, data: QueryRuleData, meta: RuleMeta) -> None: if package_integrations: # validate the query against related integration fields self.validate_integration(data, meta, package_integrations) - else: - for stack_version, mapping in meta.get_validation_stack_versions().items(): - beats_version = mapping['beats'] - ecs_version = mapping['ecs'] - err_trailer = f'stack: {stack_version}, beats: {beats_version}, ecs: {ecs_version}' - - beat_types, beat_schema, schema = self.get_beats_schema(data.index or [], - beats_version, ecs_version) - - try: - kql.parse(self.query, schema=schema) - except kql.KqlParseError as exc: - message = exc.error_msg - trailer = err_trailer - if "Unknown field" in message and beat_types: - trailer = f"\nTry adding event.module or event.dataset to specify beats module\n\n{trailer}" - - raise kql.KqlParseError(exc.error_msg, exc.line, exc.column, exc.source, - len(exc.caret.lstrip()), trailer=trailer) from None - except Exception: - print(err_trailer) - raise + + if beats.parse_beats_from_index(data.index or []): + self.validate_beats(data, meta) + + def validate_beats(self, data: QueryRuleData, meta: RuleMeta) -> None: + """Validate the query against the beats schema.""" + for stack_version, mapping in meta.get_validation_stack_versions().items(): + beats_version = mapping['beats'] + ecs_version = mapping['ecs'] + err_trailer = f'stack: {stack_version}, beats: {beats_version}, ecs: {ecs_version}' + + beat_types, beat_schema, schema = self.get_beats_schema(data.index or [], + beats_version, ecs_version) + + try: + kql.parse(self.query, schema=schema) + except kql.KqlParseError as exc: + message = exc.error_msg + trailer = err_trailer + if "Unknown field" in message and beat_types: + trailer = f"\nTry adding event.module or event.dataset to specify beats module\n\n{trailer}" + + raise kql.KqlParseError(exc.error_msg, exc.line, exc.column, exc.source, + len(exc.caret.lstrip()), trailer=trailer) from None + except Exception: + print(err_trailer) + raise def validate_integration(self, data: QueryRuleData, meta: RuleMeta, package_integrations: List[dict]) -> None: """Validate the query, called from the parent which contains [metadata] information.""" @@ -158,26 +163,30 @@ def validate(self, data: 'QueryRuleData', meta: RuleMeta) -> None: # validate the query against related integration fields self.validate_integration(data, meta, package_integrations) - else: - for stack_version, mapping in meta.get_validation_stack_versions().items(): - beats_version = mapping['beats'] - ecs_version = mapping['ecs'] - endgame_version = mapping['endgame'] - err_trailer = f'stack: {stack_version}, beats: {beats_version},' \ - f'ecs: {ecs_version}, endgame: {endgame_version}' - - beat_types, beat_schema, schema = self.get_beats_schema(data.index or [], - beats_version, ecs_version) - endgame_schema = self.get_endgame_schema(data.index, endgame_version) - eql_schema = ecs.KqlSchema2Eql(schema) - - # validate query against the beats and eql schema - self.validate_query_with_schema(data=data, schema=eql_schema, err_trailer=err_trailer, - beat_types=beat_types) - - if endgame_schema: - # validate query against the endgame schema - self.validate_query_with_schema(data=data, schema=endgame_schema, err_trailer=err_trailer) + if beats.parse_beats_from_index(data.index or []): + self.validate_beats(data, meta) + + def validate_beats(self, data: QueryRuleData, meta: RuleMeta) -> None: + """Validate the query against the beats schema.""" + for stack_version, mapping in meta.get_validation_stack_versions().items(): + beats_version = mapping['beats'] + ecs_version = mapping['ecs'] + endgame_version = mapping['endgame'] + err_trailer = f'stack: {stack_version}, beats: {beats_version},' \ + f'ecs: {ecs_version}, endgame: {endgame_version}' + + beat_types, beat_schema, schema = self.get_beats_schema(data.index or [], + beats_version, ecs_version) + endgame_schema = self.get_endgame_schema(data.index, endgame_version) + eql_schema = ecs.KqlSchema2Eql(schema) + + # validate query against the beats and eql schema + self.validate_query_with_schema(data=data, schema=eql_schema, err_trailer=err_trailer, + beat_types=beat_types) + + if endgame_schema: + # validate query against the endgame schema + self.validate_query_with_schema(data=data, schema=endgame_schema, err_trailer=err_trailer) def validate_integration(self, data: QueryRuleData, meta: RuleMeta, package_integrations: List[dict]) -> None: """Validate an EQL query while checking TOMLRule against integration schemas.""" From f19bf5633ed9c181682de908f439a5c40817937b Mon Sep 17 00:00:00 2001 From: Mika Ayenson Date: Mon, 6 Feb 2023 19:47:02 -0500 Subject: [PATCH 2/6] add comment --- detection_rules/rule_validators.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 29c6e732365..7eafd421a41 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -45,6 +45,7 @@ def validate(self, data: QueryRuleData, meta: RuleMeta) -> None: self.validate_integration(data, meta, package_integrations) if beats.parse_beats_from_index(data.index or []): + # validate the query against fields within beats self.validate_beats(data, meta) def validate_beats(self, data: QueryRuleData, meta: RuleMeta) -> None: @@ -164,6 +165,7 @@ def validate(self, data: 'QueryRuleData', meta: RuleMeta) -> None: self.validate_integration(data, meta, package_integrations) if beats.parse_beats_from_index(data.index or []): + # validate the query against fields within beats self.validate_beats(data, meta) def validate_beats(self, data: QueryRuleData, meta: RuleMeta) -> None: From 335fca2fbaf042a307bbe3ba2ab1eb5c2e96cea4 Mon Sep 17 00:00:00 2001 From: Mika Ayenson Date: Mon, 6 Feb 2023 20:30:22 -0500 Subject: [PATCH 3/6] verbose warning only on unit tests --- detection_rules/rule_validators.py | 6 ++++-- tests/base.py | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 7eafd421a41..d6dff188891 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -111,7 +111,8 @@ def validate_integration(self, data: QueryRuleData, meta: RuleMeta, package_inte f"{stack_version=}, {ecs_version=}" ) error_fields[field] = {"error": exc, "trailer": trailer} - print(f"\nWarning: `{field}` in `{data.name}` not found in schema. {trailer}") + if data.get("notify", False): + print(f"\nWarning: `{field}` in `{data.name}` not found in schema. {trailer}") else: raise kql.KqlParseError(exc.error_msg, exc.line, exc.column, exc.source, len(exc.caret.lstrip()), trailer=trailer) from None @@ -234,7 +235,8 @@ def validate_integration(self, data: QueryRuleData, meta: RuleMeta, package_inte f"{stack_version=}, {ecs_version=}" ) error_fields[field] = {"error": exc, "trailer": trailer} - print(f"\nWarning: `{field}` in `{data.name}` not found in schema. {trailer}") + if data.get("notify", False): + print(f"\nWarning: `{field}` in `{data.name}` not found in schema. {trailer}") else: raise exc diff --git a/tests/base.py b/tests/base.py index 823714aa7fd..f090f66b162 100644 --- a/tests/base.py +++ b/tests/base.py @@ -5,6 +5,7 @@ """Shared resources for tests.""" +import os import unittest from typing import Union @@ -17,6 +18,7 @@ class BaseRuleTest(unittest.TestCase): @classmethod def setUpClass(cls): + os.environ["DR_NOTIFY_INTEGRATION_UPDATE_AVAILABLE"] = "1" rc = RuleCollection.default() cls.all_rules = rc.rules cls.rule_lookup = rc.id_map From e67ff143a4d3e9fc3deff4b99b329bcf974d9673 Mon Sep 17 00:00:00 2001 From: Mika Ayenson Date: Mon, 6 Feb 2023 20:35:15 -0500 Subject: [PATCH 4/6] validate ecs, then beats all the time. --- detection_rules/rule_validators.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index d6dff188891..e89286146fe 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -40,14 +40,13 @@ def validate(self, data: QueryRuleData, meta: RuleMeta) -> None: packages_manifest = load_integrations_manifests() package_integrations = TOMLRuleContents.get_packaged_integrations(data, meta, packages_manifest) + # validate the query against fields within beats + self.validate_beats(data, meta) + if package_integrations: # validate the query against related integration fields self.validate_integration(data, meta, package_integrations) - if beats.parse_beats_from_index(data.index or []): - # validate the query against fields within beats - self.validate_beats(data, meta) - def validate_beats(self, data: QueryRuleData, meta: RuleMeta) -> None: """Validate the query against the beats schema.""" for stack_version, mapping in meta.get_validation_stack_versions().items(): @@ -161,14 +160,13 @@ def validate(self, data: 'QueryRuleData', meta: RuleMeta) -> None: packages_manifest = load_integrations_manifests() package_integrations = TOMLRuleContents.get_packaged_integrations(data, meta, packages_manifest) + # validate the query against fields within beats + self.validate_beats(data, meta) + if package_integrations: # validate the query against related integration fields self.validate_integration(data, meta, package_integrations) - if beats.parse_beats_from_index(data.index or []): - # validate the query against fields within beats - self.validate_beats(data, meta) - def validate_beats(self, data: QueryRuleData, meta: RuleMeta) -> None: """Validate the query against the beats schema.""" for stack_version, mapping in meta.get_validation_stack_versions().items(): @@ -240,13 +238,6 @@ def validate_integration(self, data: QueryRuleData, meta: RuleMeta, package_inte else: raise exc - # Still need to check endgame if it's in the index - endgame_schema = self.get_endgame_schema(data.index, endgame_version) - if endgame_schema: - # validate query against the endgame schema - err_trailer = f'stack: {stack_version}, endgame: {endgame_version}' - self.validate_query_with_schema(data=data, schema=endgame_schema, err_trailer=err_trailer) - # don't error on fields that are in another integration schema for field in list(error_fields.keys()): if field in combined_schema: From 476cdfb3e07140e2281aaa43a20b73082da6fbf6 Mon Sep 17 00:00:00 2001 From: Mika Ayenson Date: Mon, 6 Feb 2023 20:42:51 -0500 Subject: [PATCH 5/6] linting --- detection_rules/rule_validators.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index e89286146fe..d5d633050f7 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -11,7 +11,7 @@ import kql -from . import beats, ecs, endgame +from . import ecs, endgame from .integrations import get_integration_schema_data, load_integrations_manifests from .rule import QueryRuleData, QueryValidator, RuleMeta, TOMLRuleContents @@ -205,7 +205,6 @@ def validate_integration(self, data: QueryRuleData, meta: RuleMeta, package_inte package_version = integration_schema_data['package_version'] integration_schema = integration_schema_data['schema'] stack_version = integration_schema_data['stack_version'] - endgame_version = integration_schema_data['endgame_version'] if stack_version != current_stack_version: # reset the combined schema for each stack version From 99e0d4854a5a534c8b126bff9211217f668428fe Mon Sep 17 00:00:00 2001 From: Mika Ayenson Date: Mon, 6 Feb 2023 20:59:35 -0500 Subject: [PATCH 6/6] rename validate_beats --- detection_rules/rule_validators.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index d5d633050f7..94601e988aa 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -41,14 +41,14 @@ def validate(self, data: QueryRuleData, meta: RuleMeta) -> None: package_integrations = TOMLRuleContents.get_packaged_integrations(data, meta, packages_manifest) # validate the query against fields within beats - self.validate_beats(data, meta) + self.validate_stack_combos(data, meta) if package_integrations: # validate the query against related integration fields self.validate_integration(data, meta, package_integrations) - def validate_beats(self, data: QueryRuleData, meta: RuleMeta) -> None: - """Validate the query against the beats schema.""" + def validate_stack_combos(self, data: QueryRuleData, meta: RuleMeta) -> None: + """Validate the query against ECS and beats schemas across stack combinations.""" for stack_version, mapping in meta.get_validation_stack_versions().items(): beats_version = mapping['beats'] ecs_version = mapping['ecs'] @@ -161,14 +161,14 @@ def validate(self, data: 'QueryRuleData', meta: RuleMeta) -> None: package_integrations = TOMLRuleContents.get_packaged_integrations(data, meta, packages_manifest) # validate the query against fields within beats - self.validate_beats(data, meta) + self.validate_stack_combos(data, meta) if package_integrations: # validate the query against related integration fields self.validate_integration(data, meta, package_integrations) - def validate_beats(self, data: QueryRuleData, meta: RuleMeta) -> None: - """Validate the query against the beats schema.""" + def validate_stack_combos(self, data: QueryRuleData, meta: RuleMeta) -> None: + """Validate the query against ECS and beats schemas across stack combinations.""" for stack_version, mapping in meta.get_validation_stack_versions().items(): beats_version = mapping['beats'] ecs_version = mapping['ecs']