1
1
package com .datadog .appsec .gateway ;
2
2
3
3
import static com .datadog .appsec .event .data .MapDataBundle .Builder .CAPACITY_0_2 ;
4
+ import static com .datadog .appsec .event .data .MapDataBundle .Builder .CAPACITY_3_4 ;
4
5
import static com .datadog .appsec .event .data .MapDataBundle .Builder .CAPACITY_6_10 ;
5
6
import static com .datadog .appsec .gateway .AppSecRequestContext .DEFAULT_REQUEST_HEADERS_ALLOW_LIST ;
6
7
import static com .datadog .appsec .gateway .AppSecRequestContext .REQUEST_HEADERS_ALLOW_LIST ;
7
8
import static com .datadog .appsec .gateway .AppSecRequestContext .RESPONSE_HEADERS_ALLOW_LIST ;
9
+ import static datadog .trace .api .UserIdCollectionMode .ANONYMIZATION ;
10
+ import static datadog .trace .api .UserIdCollectionMode .DISABLED ;
11
+ import static datadog .trace .api .UserIdCollectionMode .SDK ;
12
+ import static datadog .trace .api .telemetry .LogCollector .SEND_TELEMETRY ;
13
+ import static datadog .trace .util .Strings .toHexString ;
8
14
9
15
import com .datadog .appsec .AppSecSystem ;
10
16
import com .datadog .appsec .api .security .ApiSecurityRequestSampler ;
22
28
import com .datadog .appsec .report .AppSecEventWrapper ;
23
29
import datadog .trace .api .Config ;
24
30
import datadog .trace .api .UserIdCollectionMode ;
25
- import datadog .trace .api .function .TriFunction ;
26
31
import datadog .trace .api .gateway .Events ;
27
32
import datadog .trace .api .gateway .Flow ;
28
33
import datadog .trace .api .gateway .IGSpanInfo ;
41
46
import java .net .URISyntaxException ;
42
47
import java .nio .charset .Charset ;
43
48
import java .nio .charset .StandardCharsets ;
49
+ import java .security .MessageDigest ;
50
+ import java .security .NoSuchAlgorithmException ;
44
51
import java .util .ArrayList ;
45
52
import java .util .Arrays ;
46
53
import java .util .Collection ;
51
58
import java .util .Map ;
52
59
import java .util .Set ;
53
60
import java .util .concurrent .ConcurrentHashMap ;
61
+ import java .util .concurrent .atomic .AtomicBoolean ;
54
62
import java .util .regex .Pattern ;
63
+ import java .util .stream .Collectors ;
55
64
import org .slf4j .Logger ;
56
65
import org .slf4j .LoggerFactory ;
57
66
@@ -65,6 +74,10 @@ public class GatewayBridge {
65
74
private static final Pattern QUERY_PARAM_SPLITTER = Pattern .compile ("&" );
66
75
private static final Map <String , List <String >> EMPTY_QUERY_PARAMS = Collections .emptyMap ();
67
76
77
+ private static final int HASH_SIZE_BYTES = 16 ; // 128 bits
78
+ private static final String ANON_PREFIX = "anon_" ;
79
+ private static final AtomicBoolean SHA_MISSING_REPORTED = new AtomicBoolean (false );
80
+
68
81
/** User tracking tags that will force the collection of request headers */
69
82
private static final String [] USER_TRACKING_TAGS = {
70
83
"appsec.events.users.login.success.track" , "appsec.events.users.login.failure.track"
@@ -91,7 +104,8 @@ public class GatewayBridge {
91
104
private volatile DataSubscriberInfo ioNetUrlSubInfo ;
92
105
private volatile DataSubscriberInfo ioFileSubInfo ;
93
106
private volatile DataSubscriberInfo sessionIdSubInfo ;
94
- private final ConcurrentHashMap <Address <String >, DataSubscriberInfo > userIdSubInfo =
107
+ private volatile DataSubscriberInfo userIdSubInfo ;
108
+ private final ConcurrentHashMap <String , DataSubscriberInfo > loginEventSubInfo =
95
109
new ConcurrentHashMap <>();
96
110
97
111
public GatewayBridge (
@@ -134,11 +148,8 @@ public void init() {
134
148
subscriptionService .registerCallback (EVENTS .networkConnection (), this ::onNetworkConnection );
135
149
subscriptionService .registerCallback (EVENTS .fileLoaded (), this ::onFileLoaded );
136
150
subscriptionService .registerCallback (EVENTS .requestSession (), this ::onRequestSession );
137
- subscriptionService .registerCallback (EVENTS .userId (), this .onUserEvent (KnownAddresses .USER_ID ));
138
- subscriptionService .registerCallback (
139
- EVENTS .loginSuccess (), this .onUserEvent (KnownAddresses .LOGIN_SUCCESS ));
140
- subscriptionService .registerCallback (
141
- EVENTS .loginFailure (), this .onUserEvent (KnownAddresses .LOGIN_FAILURE ));
151
+ subscriptionService .registerCallback (EVENTS .user (), this ::onUser );
152
+ subscriptionService .registerCallback (EVENTS .loginEvent (), this ::onLoginEvent );
142
153
143
154
if (additionalIGEvents .contains (EVENTS .requestPathParams ())) {
144
155
subscriptionService .registerCallback (EVENTS .requestPathParams (), this ::onRequestPathParams );
@@ -149,55 +160,157 @@ public void init() {
149
160
}
150
161
}
151
162
152
- private TriFunction <RequestContext , UserIdCollectionMode , String , Flow <Void >> onUserEvent (
153
- final Address <String > address ) {
154
- return (ctx_ , mode , userId ) -> {
155
- final AppSecRequestContext ctx = ctx_ .getData (RequestContextSlot .APPSEC );
156
- if (userId == null || ctx == null ) {
163
+ private Flow <Void > onUser (
164
+ final RequestContext ctx_ , final UserIdCollectionMode mode , final String originalUser ) {
165
+ if (mode == DISABLED ) {
166
+ return NoopFlow .INSTANCE ;
167
+ }
168
+ final String user = anonymizeUser (mode , originalUser );
169
+ if (user == null ) {
170
+ return NoopFlow .INSTANCE ;
171
+ }
172
+ final AppSecRequestContext ctx = ctx_ .getData (RequestContextSlot .APPSEC );
173
+ if (ctx == null ) {
174
+ return NoopFlow .INSTANCE ;
175
+ }
176
+ final TraceSegment segment = ctx_ .getTraceSegment ();
177
+
178
+ // span with ASM data
179
+ segment .setTagTop (Tags .ASM_KEEP , true );
180
+ segment .setTagTop (Tags .PROPAGATED_APPSEC , true );
181
+
182
+ // skip event if we have an SDK one
183
+ if (mode != SDK ) {
184
+ segment .setTagTop ("_dd.appsec.usr.id" , user );
185
+ if (ctx .getUserIdSource () == SDK ) {
157
186
return NoopFlow .INSTANCE ;
158
187
}
159
- final TraceSegment segment = ctx_ .getTraceSegment ();
160
- // user id can be set by the SDK overriding the auto event, always update the segment
161
- segment .setTagTop ("usr.id" , userId );
162
- segment .setTagTop ("_dd.appsec.user.collection_mode" , mode .shortName ());
163
- final List <Address <?>> addresses = new ArrayList <>(2 );
164
- final boolean newUserId = !userId .equals (ctx .getUserId ());
165
- if (newUserId ) {
166
- // unlikely that multiple threads will update the value at the same time
167
- ctx .setUserId (userId );
168
- addresses .add (KnownAddresses .USER_ID );
169
- }
170
- if (address != KnownAddresses .USER_ID ) {
171
- addresses .add (address );
172
- }
173
- if (addresses .isEmpty ()) {
174
- // nothing to publish so short-circuit here
188
+ }
189
+
190
+ // update span tags
191
+ segment .setTagTop ("usr.id" , user );
192
+ segment .setTagTop ("_dd.appsec.user.collection_mode" , mode .fullName ());
193
+
194
+ // update current context with new user id
195
+ ctx .setUserIdSource (mode );
196
+ final boolean newUserId = !user .equals (ctx .getUserId ());
197
+ if (!newUserId ) {
198
+ return NoopFlow .INSTANCE ;
199
+ }
200
+ ctx .setUserId (user );
201
+
202
+ // call waf if we have a new user id
203
+ while (true ) {
204
+ DataSubscriberInfo subInfo = userIdSubInfo ;
205
+ if (subInfo == null ) {
206
+ subInfo = producerService .getDataSubscribers (KnownAddresses .USER_ID );
207
+ userIdSubInfo = subInfo ;
208
+ }
209
+ if (subInfo == null || subInfo .isEmpty ()) {
175
210
return NoopFlow .INSTANCE ;
176
211
}
177
- final Address <?>[] addressArray = addresses .toArray (new Address [0 ]);
178
- while (true ) {
179
- DataSubscriberInfo subInfo =
180
- userIdSubInfo .computeIfAbsent (
181
- address , k -> producerService .getDataSubscribers (addressArray ));
182
- if (subInfo == null || subInfo .isEmpty ()) {
183
- return NoopFlow .INSTANCE ;
184
- }
185
- MapDataBundle .Builder bundle = new MapDataBundle .Builder (CAPACITY_0_2 );
186
- if (newUserId ) {
187
- bundle .add (KnownAddresses .USER_ID , userId );
188
- }
189
- if (address != KnownAddresses .USER_ID ) {
190
- // we don't support null values for the address so we use an invalid placeholder here
191
- bundle .add (address , "invalid" );
192
- }
193
- try {
194
- GatewayContext gwCtx = new GatewayContext (false );
195
- return producerService .publishDataEvent (subInfo , ctx , bundle .build (), gwCtx );
196
- } catch (ExpiredSubscriberInfoException e ) {
197
- userIdSubInfo .remove (address );
198
- }
212
+ DataBundle bundle =
213
+ new MapDataBundle .Builder (CAPACITY_0_2 ).add (KnownAddresses .USER_ID , user ).build ();
214
+ try {
215
+ GatewayContext gwCtx = new GatewayContext (false );
216
+ return producerService .publishDataEvent (subInfo , ctx , bundle , gwCtx );
217
+ } catch (ExpiredSubscriberInfoException e ) {
218
+ userIdSubInfo = null ;
219
+ }
220
+ }
221
+ }
222
+
223
+ private Flow <Void > onLoginEvent (
224
+ final RequestContext ctx_ ,
225
+ final UserIdCollectionMode mode ,
226
+ final String eventName ,
227
+ final Boolean exists ,
228
+ final String originalUser ,
229
+ final Map <String , String > metadata ) {
230
+ if (mode == DISABLED ) {
231
+ return NoopFlow .INSTANCE ;
232
+ }
233
+ final String user = anonymizeUser (mode , originalUser );
234
+ if (user == null ) {
235
+ return NoopFlow .INSTANCE ;
236
+ }
237
+ final AppSecRequestContext ctx = ctx_ .getData (RequestContextSlot .APPSEC );
238
+ if (ctx == null ) {
239
+ return NoopFlow .INSTANCE ;
240
+ }
241
+ final TraceSegment segment = ctx_ .getTraceSegment ();
242
+
243
+ // span with ASM data
244
+ segment .setTagTop (Tags .ASM_KEEP , true );
245
+ segment .setTagTop (Tags .PROPAGATED_APPSEC , true );
246
+
247
+ // skip event if we have an SDK one
248
+ if (mode != SDK ) {
249
+ segment .setTagTop ("_dd.appsec.usr.login" , user );
250
+ segment .setTagTop ("_dd.appsec.usr.id" , user );
251
+ segment .setTagTop (
252
+ "_dd.appsec.events.users." + eventName + ".auto.mode" , mode .fullName (), true );
253
+ if (ctx .getUserLoginSource () == SDK ) {
254
+ return NoopFlow .INSTANCE ;
255
+ }
256
+ } else {
257
+ segment .setTagTop ("_dd.appsec.events.users." + eventName + ".sdk" , true , true );
258
+ }
259
+
260
+ // update span tags
261
+ segment .setTagTop ("appsec.events.users." + eventName + ".usr.login" , user , true );
262
+ segment .setTagTop ("appsec.events.users." + eventName + ".usr.id" , user , true );
263
+ segment .setTagTop ("appsec.events.users." + eventName + ".track" , true , true );
264
+ if (exists != null ) {
265
+ segment .setTagTop ("appsec.events.users." + eventName + ".usr.exists" , exists , true );
266
+ }
267
+ if (metadata != null && !metadata .isEmpty ()) {
268
+ segment .setTagTop ("appsec.events.users." + eventName , metadata , true );
269
+ }
270
+
271
+ // update current context with new user login
272
+ ctx .setUserLoginSource (mode );
273
+ final boolean newUserLogin = !user .equals (ctx .getUserLogin ());
274
+ if (!newUserLogin ) {
275
+ return NoopFlow .INSTANCE ;
276
+ }
277
+ ctx .setUserLogin (user );
278
+
279
+ // call waf if we have a new user login
280
+ final List <Address <?>> addresses = new ArrayList <>(3 );
281
+ addresses .add (KnownAddresses .USER_LOGIN );
282
+ addresses .add (KnownAddresses .USER_ID );
283
+ if (KnownAddresses .LOGIN_SUCCESS .getKey ().endsWith (eventName )) {
284
+ addresses .add (KnownAddresses .LOGIN_SUCCESS );
285
+ } else if (KnownAddresses .LOGIN_FAILURE .getKey ().endsWith (eventName )) {
286
+ addresses .add (KnownAddresses .LOGIN_FAILURE );
287
+ }
288
+ final MapDataBundle .Builder bundleBuilder =
289
+ new MapDataBundle .Builder (addresses .size () == 2 ? CAPACITY_0_2 : CAPACITY_3_4 );
290
+ bundleBuilder .add (KnownAddresses .USER_ID , user );
291
+ bundleBuilder .add (KnownAddresses .USER_LOGIN , user );
292
+ if (addresses .size () == 3 ) {
293
+ // we don't support null values for the address so we use an invalid placeholder here
294
+ bundleBuilder .add (addresses .get (2 ), "invalid" );
295
+ }
296
+ final DataBundle bundle = bundleBuilder .build ();
297
+ final String subInfoKey =
298
+ addresses .stream ().map (Address ::getKey ).collect (Collectors .joining ("|" ));
299
+ while (true ) {
300
+ DataSubscriberInfo subInfo =
301
+ loginEventSubInfo .computeIfAbsent (
302
+ subInfoKey ,
303
+ t -> producerService .getDataSubscribers (addresses .toArray (new Address [0 ])));
304
+ if (subInfo == null || subInfo .isEmpty ()) {
305
+ return NoopFlow .INSTANCE ;
306
+ }
307
+ try {
308
+ GatewayContext gwCtx = new GatewayContext (false );
309
+ return producerService .publishDataEvent (subInfo , ctx , bundle , gwCtx );
310
+ } catch (ExpiredSubscriberInfoException e ) {
311
+ loginEventSubInfo .remove (subInfoKey );
199
312
}
200
- };
313
+ }
201
314
}
202
315
203
316
private Flow <Void > onRequestSession (final RequestContext ctx_ , final String sessionId ) {
@@ -940,6 +1053,33 @@ private static int byteToDigit(byte b) {
940
1053
return -1 ;
941
1054
}
942
1055
1056
+ protected static String anonymizeUser (final UserIdCollectionMode mode , final String userId ) {
1057
+ if (mode != ANONYMIZATION || userId == null ) {
1058
+ return userId ;
1059
+ }
1060
+ MessageDigest digest ;
1061
+ try {
1062
+ // TODO avoid lookup a new instance every time
1063
+ digest = MessageDigest .getInstance ("SHA-256" );
1064
+ } catch (NoSuchAlgorithmException e ) {
1065
+ if (!SHA_MISSING_REPORTED .getAndSet (true )) {
1066
+ log .error (
1067
+ SEND_TELEMETRY ,
1068
+ "Missing SHA-256 digest, user collection in 'anon' mode cannot continue" ,
1069
+ e );
1070
+ }
1071
+ return null ;
1072
+ }
1073
+ digest .update (userId .getBytes ());
1074
+ byte [] hash = digest .digest ();
1075
+ if (hash .length > HASH_SIZE_BYTES ) {
1076
+ byte [] temp = new byte [HASH_SIZE_BYTES ];
1077
+ System .arraycopy (hash , 0 , temp , 0 , temp .length );
1078
+ hash = temp ;
1079
+ }
1080
+ return ANON_PREFIX + toHexString (hash );
1081
+ }
1082
+
943
1083
private static class IGAppSecEventDependencies {
944
1084
945
1085
private static final Map <Address <?>, Collection <datadog .trace .api .gateway .EventType <?>>>
0 commit comments