Skip to content

Commit 0cee31c

Browse files
committed
[JENKINS-48149] Slave Pod Retention
New configuration to control how the kuberenetes-plugin deletes slave build pods. * Support for cloud and pod template configurations in Jenkins UI * Groovy support for pipeline builds * Describable implementation for future extensions OpenShift Bug 1515940
1 parent 06371e3 commit 0cee31c

File tree

25 files changed

+777
-78
lines changed

25 files changed

+777
-78
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,8 @@ Either way it provides access to the following fields:
130130
* **annotations** Annotations to apply to the pod.
131131
* **inheritFrom** List of one or more pod templates to inherit from *(more details below)*.
132132
* **slaveConnectTimeout** Timeout in seconds for an agent to be online.
133-
* **activeDeadlineSeconds** Pod is deleted after this deadline is passed.
133+
* **podRetention** Controls the behavior of keeping slave pods. Can be 'never()', 'onFailure()', 'always()', or 'default()' - if empty will default to deleting the pod after `activeDeadlineSeconds` has passed.
134+
* **activeDeadlineSeconds** If `podRetention` is set to 'never()' or 'onFailure()', pod is deleted after this deadline is passed.
134135
* **idleMinutes** Allows the Pod to remain active for reuse until the configured number of minutes has passed since the last step was executed on it.
135136

136137
The `containerTemplate` is a template of container that will be added to the pod. Again, its configurable via the user interface or via pipeline and allows you to set the following fields:

src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesCloud.java

Lines changed: 60 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
import org.apache.commons.codec.binary.Base64;
2929
import org.apache.commons.lang.StringUtils;
3030
import org.csanchez.jenkins.plugins.kubernetes.pipeline.PodTemplateMap;
31+
import org.csanchez.jenkins.plugins.kubernetes.pod.retention.Default;
32+
import org.csanchez.jenkins.plugins.kubernetes.pod.retention.PodRetention;
3133
import org.jenkinsci.plugins.plaincredentials.FileCredentials;
3234
import org.jenkinsci.plugins.plaincredentials.StringCredentials;
3335
import org.jenkinsci.plugins.plaincredentials.impl.StringCredentialsImpl;
@@ -43,8 +45,6 @@
4345
import com.cloudbees.plugins.credentials.common.StandardListBoxModel;
4446
import com.cloudbees.plugins.credentials.common.StandardUsernamePasswordCredentials;
4547
import com.cloudbees.plugins.credentials.domains.URIRequirementBuilder;
46-
import com.ctc.wstx.util.StringUtil;
47-
import com.google.common.base.Preconditions;
4848
import com.google.common.base.Strings;
4949
import com.google.common.collect.ImmutableMap;
5050

@@ -55,6 +55,7 @@
5555
import hudson.init.InitMilestone;
5656
import hudson.init.Initializer;
5757
import hudson.model.Descriptor;
58+
import hudson.model.DescriptorVisibilityFilter;
5859
import hudson.model.Label;
5960
import hudson.security.ACL;
6061
import hudson.slaves.Cloud;
@@ -115,6 +116,8 @@ public class KubernetesCloud extends Cloud {
115116

116117
private transient KubernetesClient client;
117118
private int maxRequestsPerHost;
119+
@CheckForNull
120+
private PodRetention podRetention = PodRetention.getKubernetesCloudDefault();
118121

119122
@DataBoundConstructor
120123
public KubernetesCloud(String name) {
@@ -143,6 +146,7 @@ public KubernetesCloud(@NonNull String name, @NonNull KubernetesCloud source) {
143146
this.containerCap = source.containerCap;
144147
this.retentionTimeout = source.retentionTimeout;
145148
this.connectTimeout = source.connectTimeout;
149+
this.podRetention = source.podRetention;
146150
}
147151

148152
@Deprecated
@@ -251,10 +255,12 @@ public String getJenkinsUrl() {
251255
}
252256

253257
@DataBoundSetter
258+
@Deprecated
254259
public void setCapOnlyOnAlivePods(boolean capOnlyOnAlivePods) {
255260
this.capOnlyOnAlivePods = capOnlyOnAlivePods;
256261
}
257262

263+
@Deprecated
258264
public boolean isCapOnlyOnAlivePods() {
259265
return capOnlyOnAlivePods;
260266
}
@@ -378,6 +384,26 @@ public void setConnectTimeout(int connectTimeout) {
378384
this.connectTimeout = connectTimeout;
379385
}
380386

387+
/**
388+
* Gets the global pod retention policy for the plugin.
389+
*/
390+
public PodRetention getPodRetention() {
391+
return this.podRetention;
392+
}
393+
394+
/**
395+
* Set the global pod retention policy for the plugin.
396+
*
397+
* @param podRetention the pod retention policy for the plugin.
398+
*/
399+
@DataBoundSetter
400+
public void setPodRetention(PodRetention podRetention) {
401+
if (podRetention == null || podRetention instanceof Default) {
402+
podRetention = PodRetention.getKubernetesCloudDefault();
403+
}
404+
this.podRetention = podRetention;
405+
}
406+
381407
/**
382408
* Connects to Kubernetes.
383409
*
@@ -454,42 +480,30 @@ private boolean addProvisionedSlave(@Nonnull PodTemplate template, @CheckForNull
454480
}
455481

456482
PodList slaveList = client.pods().inNamespace(templateNamespace).withLabels(getLabels()).list();
457-
List<Pod> slaveListItems = slaveList.getItems();
483+
List<Pod> allActiveSlavePods = slaveList.getItems().stream()
484+
.filter(x -> x.getStatus().getPhase().toLowerCase().matches("(running|pending)"))
485+
.collect(Collectors.toList());
458486

459487
Map<String, String> labelsMap = new HashMap<>(this.getLabels());
460488
labelsMap.putAll(template.getLabelsMap());
461-
PodList namedList = client.pods().inNamespace(templateNamespace).withLabels(labelsMap).list();
462-
List<Pod> namedListItems = namedList.getItems();
463-
464-
if (this.isCapOnlyOnAlivePods()) {
465-
slaveListItems = slaveListItems.stream()
466-
.filter(x -> x.getStatus()
467-
.getPhase().toLowerCase()
468-
.matches("(running|pending)"))
469-
.collect(Collectors.toList());
470-
}
471-
472-
if (template.isCapOnlyOnAlivePods()) {
473-
namedListItems = namedListItems.stream()
474-
.filter(x -> x.getStatus()
475-
.getPhase().toLowerCase()
476-
.matches("(running|pending)"))
477-
.collect(Collectors.toList());
478-
}
489+
PodList templateSlaveList = client.pods().inNamespace(templateNamespace).withLabels(labelsMap).list();
490+
List<Pod> activeTemplateSlavePods = templateSlaveList.getItems().stream()
491+
.filter(x -> x.getStatus().getPhase().toLowerCase().matches("(running|pending)"))
492+
.collect(Collectors.toList());
479493

480-
if (slaveListItems != null && containerCap <= slaveListItems.size()) {
494+
if (allActiveSlavePods != null && containerCap <= allActiveSlavePods.size()) {
481495
LOGGER.log(Level.INFO,
482-
"Total container cap of {0} reached, not provisioning: {1} running or errored in namespace {2} with Kubernetes labels {3}",
483-
new Object[] { containerCap, slaveListItems.size(), templateNamespace, getLabels() });
496+
"Total container cap of {0} reached, not provisioning: {1} running or pending in namespace {2} with Kubernetes labels {3}",
497+
new Object[] { containerCap, allActiveSlavePods.size(), templateNamespace, getLabels() });
484498
return false;
485499
}
486500

487-
if (namedListItems != null && slaveListItems != null && template.getInstanceCap() <= namedListItems.size()) {
501+
if (activeTemplateSlavePods != null && allActiveSlavePods != null && template.getInstanceCap() <= activeTemplateSlavePods.size()) {
488502
LOGGER.log(Level.INFO,
489-
"Template instance cap of {0} reached for template {1}, not provisioning: {2} running or errored in namespace {3} with label \"{4}\" and Kubernetes labels {5}",
490-
new Object[] { template.getInstanceCap(), template.getName(), slaveListItems.size(),
503+
"Template instance cap of {0} reached for template {1}, not provisioning: {2} running or pending in namespace {3} with label \"{4}\" and Kubernetes labels {5}",
504+
new Object[] { template.getInstanceCap(), template.getName(), allActiveSlavePods.size(),
491505
templateNamespace, label == null ? "" : label.toString(), labelsMap });
492-
return false; // maxed out
506+
return false;
493507
}
494508
return true;
495509
}
@@ -652,6 +666,24 @@ public FormValidation doCheckMaxRequestsPerHostStr(@QueryParameter String value)
652666
return FormValidation.error("Please supply an integer");
653667
}
654668
}
669+
670+
public List<Descriptor<PodRetention>> getAllowedPodRetentions() {
671+
Jenkins jenkins = Jenkins.getInstanceOrNull();
672+
if (jenkins == null) {
673+
return new ArrayList<>(0);
674+
}
675+
return DescriptorVisibilityFilter.apply(this, jenkins.getDescriptorList(PodRetention.class));
676+
}
677+
678+
@SuppressWarnings("rawtypes")
679+
public Descriptor getDefaultPodRetention() {
680+
Jenkins jenkins = Jenkins.getInstanceOrNull();
681+
if (jenkins == null) {
682+
return null;
683+
}
684+
return jenkins.getDescriptor(PodRetention.getKubernetesCloudDefault().getClass());
685+
}
686+
655687
}
656688

657689
@Override

src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesSlave.java

Lines changed: 80 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.apache.commons.lang.RandomStringUtils;
1818
import org.apache.commons.lang.StringUtils;
1919
import org.apache.commons.lang.Validate;
20+
import org.csanchez.jenkins.plugins.kubernetes.pod.retention.PodRetention;
2021
import org.jenkinsci.plugins.durabletask.executors.OnceRetentionStrategy;
2122
import org.jvnet.localizer.Localizable;
2223
import org.jvnet.localizer.ResourceBundleHolder;
@@ -33,15 +34,19 @@
3334
import hudson.model.Node;
3435
import hudson.model.Queue;
3536
import hudson.model.TaskListener;
37+
import hudson.remoting.Engine;
38+
import hudson.remoting.VirtualChannel;
3639
import hudson.slaves.AbstractCloudSlave;
3740
import hudson.slaves.Cloud;
3841
import hudson.slaves.CloudRetentionStrategy;
3942
import hudson.slaves.ComputerLauncher;
4043
import hudson.slaves.OfflineCause;
4144
import hudson.slaves.RetentionStrategy;
45+
import io.fabric8.kubernetes.api.model.Pod;
4246
import io.fabric8.kubernetes.client.KubernetesClient;
4347
import io.fabric8.kubernetes.client.KubernetesClientException;
4448
import jenkins.model.Jenkins;
49+
import jenkins.security.MasterToSlaveCallable;
4550

4651
/**
4752
* @author Carlos Sanchez [email protected]
@@ -178,10 +183,45 @@ public KubernetesComputer createComputer() {
178183
return new KubernetesComputer(this);
179184
}
180185

186+
public PodRetention getPodRetention(KubernetesCloud cloud) {
187+
PodRetention retentionPolicy = cloud.getPodRetention();
188+
if (template != null) {
189+
retentionPolicy = template.getPodRetention();
190+
}
191+
return retentionPolicy;
192+
}
193+
181194
@Override
182195
protected void _terminate(TaskListener listener) throws IOException, InterruptedException {
183196
LOGGER.log(Level.INFO, "Terminating Kubernetes instance for agent {0}", name);
197+
198+
KubernetesCloud cloud;
199+
try {
200+
cloud = getKubernetesCloud();
201+
} catch (IllegalStateException e) {
202+
e.printStackTrace(listener.fatalError("Unable to terminate agent. Cloud may have been removed. There may be leftover resources on the Kubernetes cluster."));
203+
LOGGER.log(Level.SEVERE, String.format("Unable to terminate agent %s. Cloud may have been removed. There may be leftover resources on the Kubernetes cluster.", name));
204+
return;
205+
}
184206

207+
KubernetesClient client;
208+
try {
209+
client = cloud.connect();
210+
} catch (UnrecoverableKeyException | CertificateEncodingException | NoSuchAlgorithmException
211+
| KeyStoreException e) {
212+
String msg = String.format("Failed to connect to cloud %s. There may be leftover resources on the Kubernetes cluster.", getCloudName());
213+
e.printStackTrace(listener.fatalError(msg));
214+
LOGGER.log(Level.SEVERE, msg);
215+
return;
216+
}
217+
218+
// Prior to termination, determine if we should delete the slave pod based on
219+
// the slave pod's current state and the pod retention policy.
220+
// Healthy slave pods should still have a JNLP agent running at this point.
221+
String actualNamespace = getNamespace() == null ? client.getNamespace() : getNamespace();
222+
Pod pod = client.pods().inNamespace(actualNamespace).withName(name).get();
223+
boolean deletePod = getPodRetention(cloud).shouldDeletePod(cloud, pod);
224+
185225
Computer computer = toComputer();
186226
if (computer == null) {
187227
String msg = String.format("Computer for agent is null: %s", name);
@@ -190,6 +230,13 @@ protected void _terminate(TaskListener listener) throws IOException, Interrupted
190230
return;
191231
}
192232

233+
// Tell the slave to stop JNLP reconnects.
234+
VirtualChannel ch = computer.getChannel();
235+
if (ch != null) {
236+
ch.call(new SlaveDisconnector());
237+
}
238+
239+
// Disconnect the master from the slave agent
193240
OfflineCause offlineCause = OfflineCause.create(new Localizable(HOLDER, "offline"));
194241

195242
Future<?> disconnected = computer.disconnect(offlineCause);
@@ -207,24 +254,20 @@ protected void _terminate(TaskListener listener) throws IOException, Interrupted
207254
listener.fatalError(msg);
208255
return;
209256
}
210-
KubernetesCloud cloud;
211-
try {
212-
cloud = getKubernetesCloud();
213-
} catch (IllegalStateException e) {
214-
e.printStackTrace(listener.fatalError("Unable to terminate agent. Cloud may have been removed. There may be leftover resources on the Kubernetes cluster."));
215-
LOGGER.log(Level.SEVERE, String.format("Unable to terminate agent %s. Cloud may have been removed. There may be leftover resources on the Kubernetes cluster.", name));
216-
return;
217-
}
218-
KubernetesClient client;
219-
try {
220-
client = cloud.connect();
221-
} catch (UnrecoverableKeyException | CertificateEncodingException | NoSuchAlgorithmException
222-
| KeyStoreException e) {
223-
String msg = String.format("Failed to connect to cloud %s", getCloudName());
224-
e.printStackTrace(listener.fatalError(msg));
225-
return;
257+
258+
if (deletePod) {
259+
deleteSlavePod(listener, client);
260+
} else {
261+
// Log warning, as the slave pod may still be running
262+
LOGGER.log(Level.WARNING, "Slave pod {0} was not deleted due to retention policy {1}.",
263+
new Object[] { name, getPodRetention(cloud) });
226264
}
265+
String msg = String.format("Disconnected computer %s", name);
266+
LOGGER.log(Level.INFO, msg);
267+
listener.getLogger().println(msg);
268+
}
227269

270+
private void deleteSlavePod(TaskListener listener, KubernetesClient client) throws IOException {
228271
String actualNamespace = getNamespace() == null ? client.getNamespace() : getNamespace();
229272
try {
230273
Boolean deleted = client.pods().inNamespace(actualNamespace).withName(name).delete();
@@ -245,7 +288,6 @@ protected void _terminate(TaskListener listener) throws IOException, Interrupted
245288
String msg = String.format("Terminated Kubernetes instance for agent %s/%s", actualNamespace, name);
246289
LOGGER.log(Level.INFO, msg);
247290
listener.getLogger().println(msg);
248-
LOGGER.log(Level.INFO, "Disconnected computer {0}", name);
249291
}
250292

251293
@Override
@@ -424,4 +466,25 @@ public boolean isInstantiable() {
424466
}
425467

426468
}
469+
470+
private static class SlaveDisconnector extends MasterToSlaveCallable<Void, IOException> {
471+
472+
private static final long serialVersionUID = 8683427258340193283L;
473+
474+
private static final Logger LOGGER = Logger.getLogger(SlaveDisconnector.class.getName());
475+
476+
@Override
477+
public Void call() throws IOException {
478+
Engine e = Engine.current();
479+
// No engine, do nothing.
480+
if (e == null) {
481+
return null;
482+
}
483+
// Tell the slave JNLP agent to not attempt further reconnects.
484+
e.setNoReconnect(true);
485+
LOGGER.log(Level.INFO, "Disabled slave engine reconnects.");
486+
return null;
487+
}
488+
489+
}
427490
}

0 commit comments

Comments
 (0)