1
+ import numpy as np
2
+ import pandas as pd
3
+ import networkx as nx
4
+
5
+ from typing import List
6
+
7
+ from keep .api .models .db .alert import Alert
8
+
9
+
10
+ def mine_incidents (alerts : List [Alert ], incident_sliding_window_size : int = 6 * 24 * 60 * 60 , statistic_sliding_window_size : int = 60 * 60 ,
11
+ jaccard_threshold : float = 0.0 , fingerprint_threshold : int = 1 ):
12
+ """
13
+ Mine incidents from alerts.
14
+ """
15
+
16
+ alert_dict = {
17
+ 'fingerprint' : [alert .fingerprint for alert in alerts ],
18
+ 'timestamp' : [alert .timestamp for alert in alerts ],
19
+ }
20
+ alert_df = pd .DataFrame (alert_dict )
21
+ mined_incidents = shape_incidents (alert_df , 'fingerprint' , incident_sliding_window_size , statistic_sliding_window_size ,
22
+ jaccard_threshold , fingerprint_threshold )
23
+
24
+ return [
25
+ {
26
+ "incident_fingerprint" : incident ['incident_fingerprint' ],
27
+ "alerts" : [alert for alert in alerts if alert .fingerprint in incident ['alert_fingerprints' ]],
28
+ }
29
+ for incident in mined_incidents
30
+ ]
31
+
32
+
33
+ def get_batched_alert_counts (alerts : pd .DataFrame , unique_alert_identifier : str , sliding_window_size : int ) -> np .ndarray :
34
+ """
35
+ Get the number of alerts in a sliding window.
36
+ """
37
+
38
+ resampled_alert_counts = alerts .set_index ('timestamp' ).resample (
39
+ f'{ sliding_window_size // 2 } s' )[unique_alert_identifier ].value_counts ().unstack (fill_value = 0 )
40
+ rolling_counts = resampled_alert_counts .rolling (
41
+ window = f'{ sliding_window_size } s' , min_periods = 1 ).sum ()
42
+ alert_counts = rolling_counts .to_numpy ()
43
+
44
+ return alert_counts
45
+
46
+
47
+ def get_batched_alert_occurrences (alerts : pd .DataFrame , unique_alert_identifier : str , sliding_window_size : int ) -> np .ndarray :
48
+ """
49
+ Get the occurrence of alerts in a sliding window.
50
+ """
51
+
52
+ alert_counts = get_batched_alert_counts (
53
+ alerts , unique_alert_identifier , sliding_window_size )
54
+ alert_occurences = np .where (alert_counts > 0 , 1 , 0 )
55
+
56
+ return alert_occurences
57
+
58
+
59
+ def get_jaccard_scores (P_a : np .ndarray , P_aa : np .ndarray ) -> np .ndarray :
60
+ """
61
+ Calculate the Jaccard similarity scores between alerts.
62
+ """
63
+
64
+ P_a_matrix = P_a [:, None ] + P_a
65
+ union_matrix = P_a_matrix - P_aa
66
+
67
+ with np .errstate (divide = 'ignore' , invalid = 'ignore' ):
68
+ jaccard_matrix = np .where (union_matrix != 0 , P_aa / union_matrix , 0 )
69
+
70
+ np .fill_diagonal (jaccard_matrix , 1 )
71
+
72
+ return jaccard_matrix
73
+
74
+
75
+ def get_alert_jaccard_matrix (alerts : pd .DataFrame , unique_alert_identifier : str , sliding_window_size : int ) -> np .ndarray :
76
+ """
77
+ Calculate the Jaccard similarity scores between alerts.
78
+ """
79
+
80
+ alert_occurrences = get_batched_alert_occurrences (
81
+ alerts , unique_alert_identifier , sliding_window_size )
82
+ alert_probabilities = np .mean (alert_occurrences , axis = 0 )
83
+ joint_alert_occurrences = np .dot (alert_occurrences .T , alert_occurrences )
84
+ pairwise_alert_probabilities = joint_alert_occurrences / \
85
+ alert_occurrences .shape [0 ]
86
+
87
+ return get_jaccard_scores (alert_probabilities , pairwise_alert_probabilities )
88
+
89
+
90
+ def build_graph_from_occurrence (occurrence_row : pd .DataFrame , jaccard_matrix : np .ndarray , unique_alert_identifiers : List [str ],
91
+ jaccard_threshold : float = 0.05 ) -> nx .Graph :
92
+ """
93
+ Build a weighted graph using alert occurrence matrix and Jaccard coefficients.
94
+ """
95
+
96
+ present_indices = np .where (occurrence_row > 0 )[0 ]
97
+
98
+ G = nx .Graph ()
99
+
100
+ for idx in present_indices :
101
+ alert_desc = unique_alert_identifiers [idx ]
102
+ G .add_node (alert_desc )
103
+
104
+ for i in present_indices :
105
+ for j in present_indices :
106
+ if i != j and jaccard_matrix [i , j ] >= jaccard_threshold :
107
+ alert_i = unique_alert_identifiers [i ]
108
+ alert_j = unique_alert_identifiers [j ]
109
+ G .add_edge (alert_i , alert_j , weight = jaccard_matrix [i , j ])
110
+
111
+ return G
112
+
113
+ def shape_incidents (alerts : pd .DataFrame , unique_alert_identifier : str , incident_sliding_window_size : int , statistic_sliding_window_size : int ,
114
+ jaccard_threshold : float = 0.2 , fingerprint_threshold : int = 5 ) -> List [dict ]:
115
+ """
116
+ Shape incidents from alerts.
117
+ """
118
+
119
+ incidents = []
120
+ incident_number = 0
121
+
122
+ resampled_alert_counts = alerts .set_index ('timestamp' ).resample (
123
+ f'{ incident_sliding_window_size // 2 } s' )[unique_alert_identifier ].value_counts ().unstack (fill_value = 0 )
124
+ jaccard_matrix = get_alert_jaccard_matrix (
125
+ alerts , unique_alert_identifier , statistic_sliding_window_size )
126
+
127
+ for idx in range (resampled_alert_counts .shape [0 ]):
128
+ graph = build_graph_from_occurrence (
129
+ resampled_alert_counts .iloc [idx ], jaccard_matrix , resampled_alert_counts .columns , jaccard_threshold = jaccard_threshold )
130
+ max_component = max (nx .connected_components (graph ), key = len )
131
+
132
+ min_starts_at = resampled_alert_counts .index [idx ]
133
+ max_starts_at = min_starts_at + \
134
+ pd .Timedelta (seconds = incident_sliding_window_size )
135
+
136
+ local_alerts = alerts [(alerts ['timestamp' ] >= min_starts_at ) & (
137
+ alerts ['timestamp' ] <= max_starts_at )]
138
+ local_alerts = local_alerts [local_alerts [unique_alert_identifier ].isin (
139
+ max_component )]
140
+
141
+ if len (max_component ) > fingerprint_threshold :
142
+
143
+ incidents .append ({
144
+ 'incident_fingerprint' : f'Incident #{ incident_number } ' ,
145
+ 'alert_fingerprints' : local_alerts [unique_alert_identifier ].unique ().tolist (),
146
+ })
147
+
148
+ return incidents
0 commit comments