Skip to content

Commit 7c757a1

Browse files
committed
fix for generation aware retries issue
1 parent 311db65 commit 7c757a1

File tree

3 files changed

+71
-16
lines changed

3 files changed

+71
-16
lines changed

Diff for: operator-framework/src/main/java/com/github/containersolutions/operator/processing/EventScheduler.java

+32-14
Original file line numberDiff line numberDiff line change
@@ -64,19 +64,22 @@ void scheduleEventFromApi(CustomResourceEvent event) {
6464
try {
6565
lock.lock();
6666
log.debug("Scheduling event from Api: {}", event);
67-
if (event.getAction() == Action.DELETED) {
68-
// This removes data from memory for deleted resource (prevent memory leak basically).
69-
// Its quite interesting that this is always sufficient here (no finalizer or other mechanism needs to be involved).
70-
// Thus, if operator is running we get DELETE the event, if not the memory is already gone anyways.
71-
eventStore.removeLastGenerationForDeletedResource(event.resourceUid());
72-
if (event.getResource().getMetadata().getDeletionTimestamp() != null) {
73-
// Note that we always use finalizers, we want to process delete event just in corner case,
74-
// when we are not able to add finalizer (lets say because of optimistic locking error, and the resource was deleted instantly).
75-
// We want to skip in case of finalizer was there since we don't want to execute delete method always at least 2x,
76-
// which would be the result if we don't skip here. (there is no deletion timestamp if resource deleted without finalizer.)
77-
log.debug("Skipping delete event since deletion timestamp is present on resource, so finalizer was in place.");
78-
return;
79-
}
67+
if (event.getAction() == Action.DELETED && event.getResource().getMetadata().getDeletionTimestamp() != null) {
68+
// This removes data from memory for deleted resource (prevent memory leak).
69+
// There is am extreme corner case when there is no finalizer, we ignore this situation now.
70+
eventStore.cleanup(event.resourceUid());
71+
// Note that we always use finalizers, we want to process delete event just in corner case,
72+
// when we are not able to add finalizer (lets say because of optimistic locking error, and the resource was deleted instantly).
73+
// We want to skip in case of finalizer was there since we don't want to execute delete method always at least 2x,
74+
// which would be the result if we don't skip here. (there is no deletion timestamp if resource deleted without finalizer.)
75+
log.debug("Skipping delete event since deletion timestamp is present on resource, so finalizer was in place.");
76+
return;
77+
}
78+
if (generationAware) {
79+
// we have to store the last event for generation aware retries, since if we received new events since
80+
// the execution, which did not have increased generation we will fail automatically on a conflict
81+
// on a retry.
82+
eventStore.addLastEventForGenerationAwareRetry(event);
8083
}
8184
// In case of generation aware processing, we want to replace this even if generation not increased,
8285
// to have the most recent copy of the event.
@@ -145,13 +148,28 @@ void eventProcessingFailed(CustomResourceEvent event) {
145148
scheduleNotYetScheduledEventForExecution(event.resourceUid());
146149
} else {
147150
log.debug("Event processing failed. Attempting to re-schedule the event: {}", event);
148-
scheduleEventForExecution(event);
151+
if (generationAware) {
152+
CustomResourceEvent eventToRetry = selectEventToRetry(event);
153+
scheduleEventForExecution(eventToRetry);
154+
} else {
155+
scheduleEventForExecution(event);
156+
}
149157
}
150158
} finally {
151159
lock.unlock();
152160
}
153161
}
154162

163+
private CustomResourceEvent selectEventToRetry(CustomResourceEvent event) {
164+
CustomResourceEvent lastEvent = eventStore.getReceivedLastEventForGenerationAwareRetry(event.resourceUid());
165+
if (!event.getResource().getMetadata().getResourceVersion()
166+
.equals(lastEvent.getResource().getMetadata().getResourceVersion())) {
167+
return lastEvent;
168+
} else {
169+
return event;
170+
}
171+
}
172+
155173
private void scheduleNotYetScheduledEventForExecution(String uuid) {
156174
CustomResourceEvent notScheduledEvent = eventStore.removeEventNotScheduled(uuid);
157175
scheduleEventForExecution(notScheduledEvent);

Diff for: operator-framework/src/main/java/com/github/containersolutions/operator/processing/EventStore.java

+11-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ public class EventStore {
88
private final Map<String, CustomResourceEvent> eventsNotScheduled = new HashMap<>();
99
private final Map<String, CustomResourceEvent> eventsUnderProcessing = new HashMap<>();
1010
private final Map<String, Long> lastGeneration = new HashMap<>();
11+
private final Map<String, CustomResourceEvent> receivedLastEventForGenerationAwareRetry = new HashMap<>();
1112

1213
public boolean containsNotScheduledEvent(String uuid) {
1314
return eventsNotScheduled.containsKey(uuid);
@@ -52,7 +53,16 @@ public Long getLastStoredGeneration(CustomResourceEvent event) {
5253
return lastGeneration.get(event.getResource().getMetadata().getUid());
5354
}
5455

55-
public void removeLastGenerationForDeletedResource(String uuid) {
56+
public void addLastEventForGenerationAwareRetry(CustomResourceEvent event) {
57+
receivedLastEventForGenerationAwareRetry.put(event.resourceUid(), event);
58+
}
59+
60+
public CustomResourceEvent getReceivedLastEventForGenerationAwareRetry(String uuid) {
61+
return receivedLastEventForGenerationAwareRetry.get(uuid);
62+
}
63+
64+
public void cleanup(String uuid) {
5665
lastGeneration.remove(uuid);
66+
receivedLastEventForGenerationAwareRetry.remove(uuid);
5767
}
5868
}

Diff for: operator-framework/src/test/java/com/github/containersolutions/operator/ControllerExecutionIT.java

+28-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import org.slf4j.Logger;
1010
import org.slf4j.LoggerFactory;
1111

12+
import java.util.HashMap;
1213
import java.util.concurrent.TimeUnit;
1314

1415
import static com.github.containersolutions.operator.IntegrationTestSupport.TEST_NAMESPACE;
@@ -36,6 +37,7 @@ public void configMapGetsCreatedForTestCustomResource() {
3637

3738
awaitResourcesCreatedOrUpdated();
3839
awaitStatusUpdated();
40+
assertThat(integrationTestSupport.numberOfControllerExecutions()).isEqualTo(2);
3941
});
4042
}
4143

@@ -52,6 +54,26 @@ public void eventIsSkippedChangedOnMetadataOnlyUpdate() {
5254
});
5355
}
5456

57+
// We test the scenario when we receive 2 events, while the generation is not increased by the other.
58+
// This will cause a conflict, and on retry the new version of the resource needs to be scheduled
59+
// to avoid repeating conflicts
60+
@Test
61+
public void generationAwareRetryConflict() {
62+
initAndCleanup(true);
63+
integrationTestSupport.teardownIfSuccess(() -> {
64+
TestCustomResource resource = testCustomResource();
65+
TestCustomResource resource2 = testCustomResource();
66+
resource2.getMetadata().getAnnotations().put("testannotation", "val");
67+
68+
integrationTestSupport.getCrOperations().inNamespace(TEST_NAMESPACE).create(resource);
69+
integrationTestSupport.getCrOperations().inNamespace(TEST_NAMESPACE).createOrReplace(resource2);
70+
71+
awaitResourcesCreatedOrUpdated();
72+
awaitStatusUpdated(10);
73+
});
74+
}
75+
76+
5577
void awaitResourcesCreatedOrUpdated() {
5678
await("configmap created").atMost(5, TimeUnit.SECONDS)
5779
.untilAsserted(() -> {
@@ -63,7 +85,11 @@ void awaitResourcesCreatedOrUpdated() {
6385
}
6486

6587
void awaitStatusUpdated() {
66-
await("cr status updated").atMost(5, TimeUnit.SECONDS)
88+
awaitStatusUpdated(5);
89+
}
90+
91+
void awaitStatusUpdated(int timeout) {
92+
await("cr status updated").atMost(timeout, TimeUnit.SECONDS)
6793
.untilAsserted(() -> {
6894
TestCustomResource cr = integrationTestSupport.getCrOperations().inNamespace(TEST_NAMESPACE).withName("test-custom-resource").get();
6995
assertThat(cr).isNotNull();
@@ -78,6 +104,7 @@ private TestCustomResource testCustomResource() {
78104
.withName("test-custom-resource")
79105
.withNamespace(TEST_NAMESPACE)
80106
.build());
107+
resource.getMetadata().setAnnotations(new HashMap<>());
81108
resource.setKind("CustomService");
82109
resource.setSpec(new TestCustomResourceSpec());
83110
resource.getSpec().setConfigMapName("test-config-map");

0 commit comments

Comments
 (0)