Skip to content

Commit eb17e8c

Browse files
committed
rls: Fix a local and remote race
The local race passes `rlsPicker` to the channel before CachingRlsLbClient is finished constructing. `RlsPicker` can use multiple of the fields not yet initialized. This seems not to be happening in practice, because it appears like it would break things very loudly (e.g., NPE). The remote race seems incredibly hard to hit, because it requires an RPC to complete before the pending data tracking the RPC is added to a map. But with if a system is at 100% CPU utilization, maybe it can be hit. If it is hit, all RPCs needing the impacted cache entry will forever be buffered.
1 parent a1515f9 commit eb17e8c

File tree

1 file changed

+13
-13
lines changed

1 file changed

+13
-13
lines changed

rls/src/main/java/io/grpc/rls/CachingRlsLbClient.java

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,6 @@ private CachingRlsLbClient(Builder builder) {
166166
rlsChannelBuilder.disableServiceConfigLookUp();
167167
}
168168
rlsChannel = rlsChannelBuilder.build();
169-
helper.updateBalancingState(ConnectivityState.CONNECTING, rlsPicker);
170169
rlsStub = RouteLookupServiceGrpc.newStub(rlsChannel);
171170
childLbResolvedAddressFactory =
172171
checkNotNull(builder.resolvedAddressFactory, "resolvedAddressFactory");
@@ -285,7 +284,11 @@ private CachedRouteLookupResponse handleNewRequest(RouteLookupRequest request) {
285284
ListenableFuture<RouteLookupResponse> asyncCall = asyncRlsCall(request);
286285
if (!asyncCall.isDone()) {
287286
pendingEntry = new PendingCacheEntry(request, asyncCall);
287+
// Add the entry to the map before adding the Listener, because the listener removes the
288+
// entry from the map
288289
pendingCallCache.put(request, pendingEntry);
290+
// Beware that the listener can run immediately on the current thread
291+
asyncCall.addListener(pendingEntry::handleDoneFuture, synchronizationContext);
289292
return CachedRouteLookupResponse.pendingResponse(pendingEntry);
290293
} else {
291294
// async call returned finished future is most likely throttled
@@ -462,17 +465,9 @@ final class PendingCacheEntry {
462465
this.request = checkNotNull(request, "request");
463466
this.pendingCall = pendingCall;
464467
this.backoffPolicy = backoffPolicy == null ? backoffProvider.get() : backoffPolicy;
465-
pendingCall.addListener(
466-
new Runnable() {
467-
@Override
468-
public void run() {
469-
handleDoneFuture();
470-
}
471-
},
472-
synchronizationContext);
473468
}
474469

475-
private void handleDoneFuture() {
470+
void handleDoneFuture() {
476471
synchronized (lock) {
477472
pendingCallCache.remove(request);
478473
if (pendingCall.isCancelled()) {
@@ -589,7 +584,9 @@ void maybeRefresh() {
589584
}
590585
final ListenableFuture<RouteLookupResponse> asyncCall = asyncRlsCall(request);
591586
if (!asyncCall.isDone()) {
592-
pendingCallCache.put(request, new PendingCacheEntry(request, asyncCall));
587+
PendingCacheEntry pendingEntry = new PendingCacheEntry(request, asyncCall);
588+
pendingCallCache.put(request, pendingEntry);
589+
asyncCall.addListener(pendingEntry::handleDoneFuture, synchronizationContext);
593590
} else {
594591
// async call returned finished future is most likely throttled
595592
try {
@@ -727,9 +724,10 @@ private void transitionToPending() {
727724
}
728725
ListenableFuture<RouteLookupResponse> call = asyncRlsCall(request);
729726
if (!call.isDone()) {
727+
linkedHashLruCache.invalidate(request);
730728
PendingCacheEntry pendingEntry = new PendingCacheEntry(request, call, backoffPolicy);
731729
pendingCallCache.put(request, pendingEntry);
732-
linkedHashLruCache.invalidate(request);
730+
call.addListener(pendingEntry::handleDoneFuture, synchronizationContext);
733731
} else {
734732
try {
735733
RouteLookupResponse response = call.get();
@@ -837,7 +835,9 @@ Builder setBackoffProvider(BackoffPolicy.Provider provider) {
837835
}
838836

839837
CachingRlsLbClient build() {
840-
return new CachingRlsLbClient(this);
838+
CachingRlsLbClient client = new CachingRlsLbClient(this);
839+
helper.updateBalancingState(ConnectivityState.CONNECTING, client.rlsPicker);
840+
return client;
841841
}
842842
}
843843

0 commit comments

Comments
 (0)