Skip to content

fix for generation aware retries issue #100

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -64,19 +64,22 @@ void scheduleEventFromApi(CustomResourceEvent event) {
try {
lock.lock();
log.debug("Scheduling event from Api: {}", event);
if (event.getAction() == Action.DELETED) {
// This removes data from memory for deleted resource (prevent memory leak basically).
// Its quite interesting that this is always sufficient here (no finalizer or other mechanism needs to be involved).
// Thus, if operator is running we get DELETE the event, if not the memory is already gone anyways.
eventStore.removeLastGenerationForDeletedResource(event.resourceUid());
if (event.getResource().getMetadata().getDeletionTimestamp() != null) {
// Note that we always use finalizers, we want to process delete event just in corner case,
// when we are not able to add finalizer (lets say because of optimistic locking error, and the resource was deleted instantly).
// We want to skip in case of finalizer was there since we don't want to execute delete method always at least 2x,
// which would be the result if we don't skip here. (there is no deletion timestamp if resource deleted without finalizer.)
log.debug("Skipping delete event since deletion timestamp is present on resource, so finalizer was in place.");
return;
}
if (event.getAction() == Action.DELETED && event.getResource().getMetadata().getDeletionTimestamp() != null) {
// This removes data from memory for deleted resource (prevent memory leak).
// There is am extreme corner case when there is no finalizer, we ignore this situation now.
eventStore.cleanup(event.resourceUid());
// Note that we always use finalizers, we want to process delete event just in corner case,
// when we are not able to add finalizer (lets say because of optimistic locking error, and the resource was deleted instantly).
// We want to skip in case of finalizer was there since we don't want to execute delete method always at least 2x,
// which would be the result if we don't skip here. (there is no deletion timestamp if resource deleted without finalizer.)
log.debug("Skipping delete event since deletion timestamp is present on resource, so finalizer was in place.");
return;
}
if (generationAware) {
// we have to store the last event for generation aware retries, since if we received new events since
// the execution, which did not have increased generation we will fail automatically on a conflict
// on a retry.
eventStore.addLastEventForGenerationAwareRetry(event);
}
// In case of generation aware processing, we want to replace this even if generation not increased,
// to have the most recent copy of the event.
Expand Down Expand Up @@ -145,13 +148,28 @@ void eventProcessingFailed(CustomResourceEvent event) {
scheduleNotYetScheduledEventForExecution(event.resourceUid());
} else {
log.debug("Event processing failed. Attempting to re-schedule the event: {}", event);
scheduleEventForExecution(event);
if (generationAware) {
CustomResourceEvent eventToRetry = selectEventToRetry(event);
scheduleEventForExecution(eventToRetry);
} else {
scheduleEventForExecution(event);
}
}
} finally {
lock.unlock();
}
}

private CustomResourceEvent selectEventToRetry(CustomResourceEvent event) {
CustomResourceEvent lastEvent = eventStore.getReceivedLastEventForGenerationAwareRetry(event.resourceUid());
if (!event.getResource().getMetadata().getResourceVersion()
.equals(lastEvent.getResource().getMetadata().getResourceVersion())) {
return lastEvent;
} else {
return event;
}
}

private void scheduleNotYetScheduledEventForExecution(String uuid) {
CustomResourceEvent notScheduledEvent = eventStore.removeEventNotScheduled(uuid);
scheduleEventForExecution(notScheduledEvent);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ public class EventStore {
private final Map<String, CustomResourceEvent> eventsNotScheduled = new HashMap<>();
private final Map<String, CustomResourceEvent> eventsUnderProcessing = new HashMap<>();
private final Map<String, Long> lastGeneration = new HashMap<>();
private final Map<String, CustomResourceEvent> receivedLastEventForGenerationAwareRetry = new HashMap<>();

public boolean containsNotScheduledEvent(String uuid) {
return eventsNotScheduled.containsKey(uuid);
Expand Down Expand Up @@ -52,7 +53,16 @@ public Long getLastStoredGeneration(CustomResourceEvent event) {
return lastGeneration.get(event.getResource().getMetadata().getUid());
}

public void removeLastGenerationForDeletedResource(String uuid) {
public void addLastEventForGenerationAwareRetry(CustomResourceEvent event) {
receivedLastEventForGenerationAwareRetry.put(event.resourceUid(), event);
}

public CustomResourceEvent getReceivedLastEventForGenerationAwareRetry(String uuid) {
return receivedLastEventForGenerationAwareRetry.get(uuid);
}

public void cleanup(String uuid) {
lastGeneration.remove(uuid);
receivedLastEventForGenerationAwareRetry.remove(uuid);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.HashMap;
import java.util.concurrent.TimeUnit;

import static com.github.containersolutions.operator.IntegrationTestSupport.TEST_NAMESPACE;
Expand Down Expand Up @@ -36,6 +37,7 @@ public void configMapGetsCreatedForTestCustomResource() {

awaitResourcesCreatedOrUpdated();
awaitStatusUpdated();
assertThat(integrationTestSupport.numberOfControllerExecutions()).isEqualTo(2);
});
}

Expand All @@ -52,6 +54,26 @@ public void eventIsSkippedChangedOnMetadataOnlyUpdate() {
});
}

// We test the scenario when we receive 2 events, while the generation is not increased by the other.
// This will cause a conflict, and on retry the new version of the resource needs to be scheduled
// to avoid repeating conflicts
@Test
public void generationAwareRetryConflict() {
initAndCleanup(true);
integrationTestSupport.teardownIfSuccess(() -> {
TestCustomResource resource = testCustomResource();
TestCustomResource resource2 = testCustomResource();
resource2.getMetadata().getAnnotations().put("testannotation", "val");

integrationTestSupport.getCrOperations().inNamespace(TEST_NAMESPACE).create(resource);
integrationTestSupport.getCrOperations().inNamespace(TEST_NAMESPACE).createOrReplace(resource2);

awaitResourcesCreatedOrUpdated();
awaitStatusUpdated(10);
});
}


void awaitResourcesCreatedOrUpdated() {
await("configmap created").atMost(5, TimeUnit.SECONDS)
.untilAsserted(() -> {
Expand All @@ -63,7 +85,11 @@ void awaitResourcesCreatedOrUpdated() {
}

void awaitStatusUpdated() {
await("cr status updated").atMost(5, TimeUnit.SECONDS)
awaitStatusUpdated(5);
}

void awaitStatusUpdated(int timeout) {
await("cr status updated").atMost(timeout, TimeUnit.SECONDS)
.untilAsserted(() -> {
TestCustomResource cr = integrationTestSupport.getCrOperations().inNamespace(TEST_NAMESPACE).withName("test-custom-resource").get();
assertThat(cr).isNotNull();
Expand All @@ -78,6 +104,7 @@ private TestCustomResource testCustomResource() {
.withName("test-custom-resource")
.withNamespace(TEST_NAMESPACE)
.build());
resource.getMetadata().setAnnotations(new HashMap<>());
resource.setKind("CustomService");
resource.setSpec(new TestCustomResourceSpec());
resource.getSpec().setConfigMapName("test-config-map");
Expand Down