18
18
19
19
import cd .go .contrib .elasticagent .model .JobIdentifier ;
20
20
import cd .go .contrib .elasticagent .requests .CreateAgentRequest ;
21
+ import cd .go .contrib .elasticagent .KubernetesInstance .AgentState ;
22
+ import cd .go .contrib .elasticagent .utils .Util ;
21
23
import io .fabric8 .kubernetes .api .model .Pod ;
22
- import io .fabric8 .kubernetes .api .model .PodList ;
23
24
import io .fabric8 .kubernetes .client .KubernetesClient ;
24
25
25
26
import java .net .SocketTimeoutException ;
26
27
import java .time .Duration ;
27
28
import java .time .Instant ;
28
- import java .util .ArrayList ;
29
- import java .util .Map ;
29
+ import java .util .*;
30
30
import java .util .concurrent .ConcurrentHashMap ;
31
- import java .util .concurrent .Semaphore ;
31
+ import java .util .function .Function ;
32
+ import java .util .stream .Collectors ;
32
33
33
34
import static cd .go .contrib .elasticagent .KubernetesPlugin .LOG ;
34
35
import static java .text .MessageFormat .format ;
35
36
36
37
public class KubernetesAgentInstances implements AgentInstances <KubernetesInstance > {
37
- private final ConcurrentHashMap <String , KubernetesInstance > instances = new ConcurrentHashMap <>() ;
38
+ private final ConcurrentHashMap <String , KubernetesInstance > instances ;
38
39
public Clock clock = Clock .DEFAULT ;
39
- final Semaphore semaphore = new Semaphore (0 , true );
40
40
41
41
private KubernetesClientFactory factory ;
42
42
private KubernetesInstanceFactory kubernetesInstanceFactory ;
@@ -50,55 +50,127 @@ public KubernetesAgentInstances(KubernetesClientFactory factory) {
50
50
}
51
51
52
52
public KubernetesAgentInstances (KubernetesClientFactory factory , KubernetesInstanceFactory kubernetesInstanceFactory ) {
53
+ this (factory , kubernetesInstanceFactory , Collections .emptyMap ());
54
+ }
55
+
56
+ public KubernetesAgentInstances (KubernetesClientFactory factory , KubernetesInstanceFactory kubernetesInstanceFactory , Map <String , KubernetesInstance > initialInstances ) {
53
57
this .factory = factory ;
54
58
this .kubernetesInstanceFactory = kubernetesInstanceFactory ;
59
+ this .instances = new ConcurrentHashMap <>(initialInstances );
55
60
}
56
61
57
62
@ Override
58
- public KubernetesInstance create (CreateAgentRequest request , PluginSettings settings , PluginRequest pluginRequest , ConsoleLogAppender consoleLogAppender ) {
59
- final Integer maxAllowedContainers = settings .getMaxPendingPods ();
63
+ public Optional < KubernetesInstance > requestCreateAgent (CreateAgentRequest request , PluginSettings settings , PluginRequest pluginRequest , ConsoleLogAppender consoleLogAppender ) {
64
+ final Integer maxAllowedPods = settings .getMaxPendingPods ();
60
65
synchronized (instances ) {
61
- refreshAll (settings );
62
- doWithLockOnSemaphore (new SetupSemaphore (maxAllowedContainers , instances , semaphore ));
63
- consoleLogAppender .accept ("Waiting to create agent pod." );
64
- if (semaphore .tryAcquire ()) {
65
- return createKubernetesInstance (request , settings , pluginRequest , consoleLogAppender );
66
+ if (instances .size () < maxAllowedPods ) {
67
+ return requestCreateAgentHelper (request , settings , pluginRequest , consoleLogAppender );
66
68
} else {
67
- String message = format ("[Create Agent Request] The number of pending kubernetes pods is currently at the maximum permissible limit ({0}). Total kubernetes pods ({1}). Not creating any more containers." , maxAllowedContainers , instances .size ());
69
+ String message = String .format ("[Create Agent Request] The number of pending kubernetes pods is currently at the maximum permissible limit (%s). Total kubernetes pods (%s). Not creating any more pods." ,
70
+ maxAllowedPods ,
71
+ instances .size ());
68
72
LOG .warn (message );
69
73
consoleLogAppender .accept (message );
70
- return null ;
74
+ return Optional . empty () ;
71
75
}
72
76
}
73
77
}
74
78
75
- private void doWithLockOnSemaphore (Runnable runnable ) {
76
- synchronized (semaphore ) {
77
- runnable .run ();
79
+ private List <KubernetesInstance > findPodsEligibleForReuse (CreateAgentRequest request ) {
80
+ Long jobId = request .jobIdentifier ().getJobId ();
81
+ String jobElasticConfigHash = KubernetesInstanceFactory .agentConfigHash (
82
+ request .clusterProfileProperties (), request .elasticProfileProperties ());
83
+
84
+ List <KubernetesInstance > eligiblePods = new ArrayList <>();
85
+
86
+ for (KubernetesInstance instance : instances .values ()) {
87
+ if (instance .getJobId ().equals (jobId )) {
88
+ eligiblePods .add (instance );
89
+ continue ;
90
+ }
91
+
92
+ String podElasticConfigHash = instance .getPodAnnotations ().get (KubernetesInstance .ELASTIC_CONFIG_HASH );
93
+ boolean sameElasticConfig = Objects .equals (podElasticConfigHash , jobElasticConfigHash );
94
+ boolean instanceIsIdle = instance .getAgentState ().equals (KubernetesInstance .AgentState .Idle );
95
+ boolean podIsRunning = instance .getPodState ().equals (PodState .Running );
96
+ boolean isReusable = sameElasticConfig && instanceIsIdle && podIsRunning ;
97
+
98
+ LOG .info (
99
+ "[reuse] Is pod {} reusable for job {}? {}. Job has {}={}; pod has {}={}, agentState={}, podState={}" ,
100
+ instance .getPodName (),
101
+ jobId ,
102
+ isReusable ,
103
+ KubernetesInstance .ELASTIC_CONFIG_HASH ,
104
+ jobElasticConfigHash ,
105
+ KubernetesInstance .ELASTIC_CONFIG_HASH ,
106
+ podElasticConfigHash ,
107
+ instance .getAgentState (),
108
+ instance .getPodState ()
109
+ );
110
+
111
+ if (isReusable ) {
112
+ eligiblePods .add (instance );
113
+ }
78
114
}
115
+
116
+ return eligiblePods ;
79
117
}
80
118
81
- private KubernetesInstance createKubernetesInstance (CreateAgentRequest request , PluginSettings settings , PluginRequest pluginRequest , ConsoleLogAppender consoleLogAppender ) {
119
+
120
+ private Optional <KubernetesInstance > requestCreateAgentHelper (
121
+ CreateAgentRequest request ,
122
+ PluginSettings settings ,
123
+ PluginRequest pluginRequest ,
124
+ ConsoleLogAppender consoleLogAppender ) {
82
125
JobIdentifier jobIdentifier = request .jobIdentifier ();
83
- if (isAgentCreatedForJob (jobIdentifier .getJobId ())) {
84
- String message = format ("[Create Agent Request] Request for creating an agent for Job Identifier [{0}] has already been scheduled. Skipping current request." , jobIdentifier );
85
- LOG .warn (message );
86
- consoleLogAppender .accept (message );
87
- return null ;
126
+ Long jobId = jobIdentifier .getJobId ();
127
+
128
+ // Agent reuse disabled - create a new pod only if one hasn't already been created for this job ID.
129
+ if (!settings .getEnableAgentReuse ()) {
130
+ // Already created a pod for this job ID.
131
+ if (isAgentCreatedForJob (jobId )) {
132
+ String message = format ("[Create Agent Request] Request for creating an agent for Job Identifier [{0}] has already been scheduled. Skipping current request." , jobIdentifier );
133
+ LOG .warn (message );
134
+ consoleLogAppender .accept (message );
135
+ return Optional .empty ();
136
+ }
137
+ // No pod created yet for this job ID. Create one.
138
+ KubernetesClient client = factory .client (settings );
139
+ KubernetesInstance instance = kubernetesInstanceFactory .create (request , settings , client , pluginRequest );
140
+ consoleLogAppender .accept (String .format ("Created pod: %s" , instance .getPodName ()));
141
+ instance = instance .toBuilder ().agentState (AgentState .Building ).build ();
142
+ register (instance );
143
+ consoleLogAppender .accept (String .format ("Agent pod %s created. Waiting for it to register to the GoCD server." , instance .getPodName ()));
144
+ return Optional .of (instance );
88
145
}
89
146
90
- KubernetesClient client = factory .client (settings );
91
- KubernetesInstance instance = kubernetesInstanceFactory .create (request , settings , client , pluginRequest );
92
- consoleLogAppender .accept (String .format ("Creating pod: %s" , instance .name ()));
93
- register (instance );
94
- consoleLogAppender .accept (String .format ("Agent pod %s created. Waiting for it to register to the GoCD server." , instance .name ()));
147
+ // Agent reuse enabled - look for any extant pods that match this job,
148
+ // and create a new one only if there are none.
149
+ List <KubernetesInstance > reusablePods = findPodsEligibleForReuse (request );
150
+ LOG .info ("[reuse] Found {} pods eligible for reuse for CreateAgentRequest for job {}: {}" ,
151
+ reusablePods .size (),
152
+ jobId ,
153
+ reusablePods .stream ().map (pod -> pod .getPodName ()).collect (Collectors .toList ()));
95
154
96
- return instance ;
155
+ if (reusablePods .isEmpty ()) {
156
+ KubernetesClient client = factory .client (settings );
157
+ KubernetesInstance instance = kubernetesInstanceFactory .create (request , settings , client , pluginRequest );
158
+ consoleLogAppender .accept (String .format ("Created pod: %s" , instance .getPodName ()));
159
+ instance = instance .toBuilder ().agentState (AgentState .Building ).build ();
160
+ register (instance );
161
+ consoleLogAppender .accept (String .format ("Agent pod %s created. Waiting for it to register to the GoCD server." , instance .getPodName ()));
162
+ return Optional .of (instance );
163
+ } else {
164
+ String message = String .format ("[reuse] Not creating a new pod - found %s eligible for reuse." , reusablePods .size ());
165
+ consoleLogAppender .accept (message );
166
+ LOG .info (message );
167
+ return Optional .empty ();
168
+ }
97
169
}
98
170
99
171
private boolean isAgentCreatedForJob (Long jobId ) {
100
172
for (KubernetesInstance instance : instances .values ()) {
101
- if (instance .jobId ().equals (jobId )) {
173
+ if (instance .getJobId ().equals (jobId )) {
102
174
return true ;
103
175
}
104
176
}
@@ -111,7 +183,7 @@ public void terminate(String agentId, PluginSettings settings) {
111
183
KubernetesInstance instance = instances .get (agentId );
112
184
if (instance != null ) {
113
185
KubernetesClient client = factory .client (settings );
114
- instance .terminate ( client );
186
+ client . pods (). withName ( instance .getPodName ()). delete ( );
115
187
} else {
116
188
LOG .warn (format ("Requested to terminate an instance that does not exist {0}." , agentId ));
117
189
}
@@ -140,56 +212,77 @@ public Agents instancesCreatedAfterTimeout(PluginSettings settings, Agents agent
140
212
continue ;
141
213
}
142
214
143
- if (clock .now ().isAfter (instance .createdAt ().plus (settings .getAutoRegisterPeriod ()))) {
215
+ if (clock .now ().isAfter (instance .getCreatedAt ().plus (settings .getAutoRegisterPeriod ()))) {
144
216
oldAgents .add (agent );
145
217
}
146
218
}
147
219
return new Agents (oldAgents );
148
220
}
149
221
222
+ public List <Pod > listAgentPods (KubernetesClient client ) {
223
+ if (client == null ) {
224
+ throw new IllegalArgumentException ("client is null" );
225
+ }
226
+ return client .pods ()
227
+ .withLabel (Constants .KUBERNETES_POD_KIND_LABEL_KEY , Constants .KUBERNETES_POD_KIND_LABEL_VALUE )
228
+ .list ()
229
+ .getItems ();
230
+ }
231
+
150
232
@ Override
151
233
public void refreshAll (PluginSettings properties ) {
152
234
LOG .debug ("[Refresh Instances] Syncing k8s elastic agent pod information for cluster {}." , properties );
153
- PodList list = null ;
235
+ List < Pod > pods = null ;
154
236
try {
155
237
KubernetesClient client = factory .client (properties );
156
- list = client . pods (). list ( );
238
+ pods = listAgentPods ( client );
157
239
} catch (Exception e ) {
158
240
LOG .error ("Error occurred while trying to list kubernetes pods:" , e );
159
241
160
242
if (e .getCause () instanceof SocketTimeoutException ) {
161
243
LOG .error ("Error caused due to SocketTimeoutException. This generally happens due to stale kubernetes client. Clearing out existing kubernetes client and creating a new one!" );
162
244
factory .clearOutExistingClient ();
163
245
KubernetesClient client = factory .client (properties );
164
- list = client . pods (). list ( );
246
+ pods = listAgentPods ( client );
165
247
}
166
248
}
167
249
168
- if (list == null ) {
250
+ if (pods == null ) {
169
251
LOG .info ("Did not find any running kubernetes pods." );
170
252
return ;
171
253
}
172
254
255
+ Map <String , KubernetesInstance > oldInstances = Map .copyOf (instances );
173
256
instances .clear ();
174
- for (Pod pod : list .getItems ()) {
175
- Map <String , String > podLabels = pod .getMetadata ().getLabels ();
176
- if (podLabels != null ) {
177
- if (Constants .KUBERNETES_POD_KIND_LABEL_VALUE .equals (podLabels .get (Constants .KUBERNETES_POD_KIND_LABEL_KEY ))) {
178
- register (kubernetesInstanceFactory .fromKubernetesPod (pod ));
179
- }
257
+
258
+ for (Pod pod : pods ) {
259
+ String podName = pod .getMetadata ().getName ();
260
+ // preserve pod's agent state
261
+ KubernetesInstance newInstance = kubernetesInstanceFactory .fromKubernetesPod (pod );
262
+ KubernetesInstance oldInstance = oldInstances .get (podName );
263
+ if (oldInstance != null ) {
264
+ AgentState oldAgentState = oldInstances .get (podName ).getAgentState ();
265
+ newInstance = newInstance .toBuilder ().agentState (oldAgentState ).build ();
266
+ LOG .debug ("[reuse] Preserved AgentState {} upon refresh of pod {}" , oldAgentState , podName );
180
267
}
268
+ register (newInstance );
181
269
}
182
270
183
271
LOG .info (String .format ("[refresh-pod-state] Pod information successfully synced. All(Running/Pending) pod count is %d." , instances .size ()));
184
272
}
185
273
274
+ @ Override
275
+ public KubernetesInstance updateAgent (String agentId , Function <KubernetesInstance , KubernetesInstance > updateFn ) {
276
+ return instances .compute (agentId , (_agentId , instance ) -> updateFn .apply (instance ));
277
+ }
278
+
186
279
@ Override
187
280
public KubernetesInstance find (String agentId ) {
188
281
return instances .get (agentId );
189
282
}
190
283
191
284
public void register (KubernetesInstance instance ) {
192
- instances .put (instance .name (), instance );
285
+ instances .put (instance .getPodName (), instance );
193
286
}
194
287
195
288
private KubernetesAgentInstances unregisteredAfterTimeout (PluginSettings settings , Agents knownAgents ) throws Exception {
0 commit comments