From 3e09a8820d13f765d9bdd91d5820ce928d1b4b3b Mon Sep 17 00:00:00 2001 From: Igor Berntein Date: Tue, 27 Feb 2024 13:16:46 -0500 Subject: [PATCH 1/3] feat: improve batching summary errors To avoid data loss, the Batching api throws an exception when the batcher is closed when at least 1 entry failed. To help debugging, the BatchingException tries to be helpful by giving some details about the errors. Since the Batcher lifetime can extend indefinitely, the Batcher implementation tries to keep a bound on the amount of resources it uses to track the errors. Previously it would only track exception types and counts. The idea being that if the customer has the need for fine grained details, the customer can retrieve the details from the ApiFuture that was returned when an entry was added. However this hasn't panned out and creates confusion. This PR stores a sample of the error messages. The sample is by default capped to 50 entry and 50 rpc error messages. This can be adjusted by setting system properties --- .../google/api/gax/batching/BatcherStats.java | 20 +++++++++++++++++++ .../api/gax/batching/BatcherStatsTest.java | 11 +++++++--- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/gax-java/gax/src/main/java/com/google/api/gax/batching/BatcherStats.java b/gax-java/gax/src/main/java/com/google/api/gax/batching/BatcherStats.java index 784af6f599..8463b95053 100644 --- a/gax-java/gax/src/main/java/com/google/api/gax/batching/BatcherStats.java +++ b/gax-java/gax/src/main/java/com/google/api/gax/batching/BatcherStats.java @@ -31,7 +31,9 @@ import com.google.api.gax.rpc.ApiException; import com.google.api.gax.rpc.StatusCode.Code; +import com.google.common.base.Joiner; import com.google.common.base.MoreObjects; +import com.google.common.collect.EvictingQueue; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -52,6 +54,9 @@ class BatcherStats { private final Map entryExceptionCounts = new HashMap<>(); private final Map entryStatusCounts = new HashMap<>(); + private final EvictingQueue sampleOfRpcErrors = EvictingQueue.create(Integer.getInteger("com.google.api.gax.batching.errors.max-samples", 50)); + private final EvictingQueue sampleOfEntryErrors = EvictingQueue.create(Integer.getInteger("com.google.api.gax.batching.errors.max-samples", 50)); + /** * Records the count of the exception and it's type when a complete batch failed to apply. * @@ -69,6 +74,8 @@ synchronized void recordBatchFailure(Throwable throwable) { requestStatusCounts.put(code, oldStatusCount + 1); } + sampleOfRpcErrors.add(throwable.toString()); + int oldExceptionCount = MoreObjects.firstNonNull(requestExceptionCounts.get(exceptionClass), 0); requestExceptionCounts.put(exceptionClass, oldExceptionCount + 1); } @@ -96,6 +103,8 @@ synchronized void recordBatchElementsCompletion( Throwable actualCause = throwable.getCause(); Class exceptionClass = actualCause.getClass(); + sampleOfEntryErrors.add(actualCause.toString()); + if (actualCause instanceof ApiException) { Code code = ((ApiException) actualCause).getStatusCode().getCode(); exceptionClass = ApiException.class; @@ -144,6 +153,17 @@ synchronized BatchingException asException() { .append(buildExceptionList(entryExceptionCounts, entryStatusCounts)) .append("."); } + + if (!sampleOfRpcErrors.isEmpty()) { + messageBuilder.append(" Sample of RPC errors: "); + messageBuilder.append(Joiner.on(", ").join(sampleOfRpcErrors)); + messageBuilder.append("."); + } + if (!sampleOfEntryErrors.isEmpty()) { + messageBuilder.append(" Sample of entry errors: "); + messageBuilder.append(Joiner.on(", ").join(sampleOfEntryErrors)); + messageBuilder.append("."); + } return new BatchingException(messageBuilder.toString()); } diff --git a/gax-java/gax/src/test/java/com/google/api/gax/batching/BatcherStatsTest.java b/gax-java/gax/src/test/java/com/google/api/gax/batching/BatcherStatsTest.java index 1a95e4d3cb..b0742ec0ed 100644 --- a/gax-java/gax/src/test/java/com/google/api/gax/batching/BatcherStatsTest.java +++ b/gax-java/gax/src/test/java/com/google/api/gax/batching/BatcherStatsTest.java @@ -55,7 +55,7 @@ public void testRequestFailuresOnly() { batcherStats.recordBatchFailure( ApiExceptionFactory.createException( - new RuntimeException(), FakeStatusCode.of(StatusCode.Code.INVALID_ARGUMENT), false)); + "fake api error", new RuntimeException(), FakeStatusCode.of(StatusCode.Code.INVALID_ARGUMENT), false)); batcherStats.recordBatchFailure(new RuntimeException("Request failed")); @@ -65,6 +65,8 @@ public void testRequestFailuresOnly() { assertThat(exception).hasMessageThat().contains("1 RuntimeException"); assertThat(exception).hasMessageThat().contains("1 ApiException(1 INVALID_ARGUMENT)"); assertThat(exception).hasMessageThat().contains("and 0 partial failures."); + assertThat(exception).hasMessageThat().contains("com.google.api.gax.rpc.InvalidArgumentException: fake api error, java.lang.RuntimeException: Request failed."); + } @Test @@ -79,7 +81,7 @@ public void testEntryFailureOnly() { SettableApiFuture batchTwoResult = SettableApiFuture.create(); batchTwoResult.setException( ApiExceptionFactory.createException( - new RuntimeException(), FakeStatusCode.of(StatusCode.Code.UNAVAILABLE), false)); + "fake entry error", new RuntimeException(), FakeStatusCode.of(StatusCode.Code.UNAVAILABLE), false)); batcherStats.recordBatchElementsCompletion( ImmutableList.of(BatchEntry.create(2, batchTwoResult))); @@ -89,6 +91,7 @@ public void testEntryFailureOnly() { .contains("The 2 partial failures contained 2 entries that failed with:"); assertThat(ex).hasMessageThat().contains("1 ApiException(1 UNAVAILABLE)"); assertThat(ex).hasMessageThat().contains("1 IllegalStateException"); + assertThat(ex).hasMessageThat().contains("Sample of entry errors: java.lang.IllegalStateException: local element failure, com.google.api.gax.rpc.UnavailableException: fake entry error."); } @Test @@ -110,6 +113,8 @@ public void testRequestAndEntryFailures() { .contains( "Batching finished with 1 batches failed to apply due to: 1 RuntimeException and 1 " + "partial failures. The 1 partial failures contained 1 entries that failed with:" - + " 1 ApiException(1 ALREADY_EXISTS)."); + + " 1 ApiException(1 ALREADY_EXISTS)." + + " Sample of RPC errors: java.lang.RuntimeException: Batch failure." + + " Sample of entry errors: com.google.api.gax.rpc.AlreadyExistsException: java.lang.RuntimeException."); } } From 8c96fcdb2cfe1ae0d5e8e9ebfed2dc92590c20fe Mon Sep 17 00:00:00 2001 From: Igor Berntein Date: Tue, 27 Feb 2024 13:28:46 -0500 Subject: [PATCH 2/3] formatting --- .../google/api/gax/batching/BatcherStats.java | 8 +++++-- .../api/gax/batching/BatcherStatsTest.java | 21 ++++++++++++++----- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/gax-java/gax/src/main/java/com/google/api/gax/batching/BatcherStats.java b/gax-java/gax/src/main/java/com/google/api/gax/batching/BatcherStats.java index 8463b95053..a5301d48a0 100644 --- a/gax-java/gax/src/main/java/com/google/api/gax/batching/BatcherStats.java +++ b/gax-java/gax/src/main/java/com/google/api/gax/batching/BatcherStats.java @@ -54,8 +54,12 @@ class BatcherStats { private final Map entryExceptionCounts = new HashMap<>(); private final Map entryStatusCounts = new HashMap<>(); - private final EvictingQueue sampleOfRpcErrors = EvictingQueue.create(Integer.getInteger("com.google.api.gax.batching.errors.max-samples", 50)); - private final EvictingQueue sampleOfEntryErrors = EvictingQueue.create(Integer.getInteger("com.google.api.gax.batching.errors.max-samples", 50)); + private final EvictingQueue sampleOfRpcErrors = + EvictingQueue.create( + Integer.getInteger("com.google.api.gax.batching.errors.max-samples", 50)); + private final EvictingQueue sampleOfEntryErrors = + EvictingQueue.create( + Integer.getInteger("com.google.api.gax.batching.errors.max-samples", 50)); /** * Records the count of the exception and it's type when a complete batch failed to apply. diff --git a/gax-java/gax/src/test/java/com/google/api/gax/batching/BatcherStatsTest.java b/gax-java/gax/src/test/java/com/google/api/gax/batching/BatcherStatsTest.java index b0742ec0ed..1d10917aeb 100644 --- a/gax-java/gax/src/test/java/com/google/api/gax/batching/BatcherStatsTest.java +++ b/gax-java/gax/src/test/java/com/google/api/gax/batching/BatcherStatsTest.java @@ -55,7 +55,10 @@ public void testRequestFailuresOnly() { batcherStats.recordBatchFailure( ApiExceptionFactory.createException( - "fake api error", new RuntimeException(), FakeStatusCode.of(StatusCode.Code.INVALID_ARGUMENT), false)); + "fake api error", + new RuntimeException(), + FakeStatusCode.of(StatusCode.Code.INVALID_ARGUMENT), + false)); batcherStats.recordBatchFailure(new RuntimeException("Request failed")); @@ -65,8 +68,10 @@ public void testRequestFailuresOnly() { assertThat(exception).hasMessageThat().contains("1 RuntimeException"); assertThat(exception).hasMessageThat().contains("1 ApiException(1 INVALID_ARGUMENT)"); assertThat(exception).hasMessageThat().contains("and 0 partial failures."); - assertThat(exception).hasMessageThat().contains("com.google.api.gax.rpc.InvalidArgumentException: fake api error, java.lang.RuntimeException: Request failed."); - + assertThat(exception) + .hasMessageThat() + .contains( + "com.google.api.gax.rpc.InvalidArgumentException: fake api error, java.lang.RuntimeException: Request failed."); } @Test @@ -81,7 +86,10 @@ public void testEntryFailureOnly() { SettableApiFuture batchTwoResult = SettableApiFuture.create(); batchTwoResult.setException( ApiExceptionFactory.createException( - "fake entry error", new RuntimeException(), FakeStatusCode.of(StatusCode.Code.UNAVAILABLE), false)); + "fake entry error", + new RuntimeException(), + FakeStatusCode.of(StatusCode.Code.UNAVAILABLE), + false)); batcherStats.recordBatchElementsCompletion( ImmutableList.of(BatchEntry.create(2, batchTwoResult))); @@ -91,7 +99,10 @@ public void testEntryFailureOnly() { .contains("The 2 partial failures contained 2 entries that failed with:"); assertThat(ex).hasMessageThat().contains("1 ApiException(1 UNAVAILABLE)"); assertThat(ex).hasMessageThat().contains("1 IllegalStateException"); - assertThat(ex).hasMessageThat().contains("Sample of entry errors: java.lang.IllegalStateException: local element failure, com.google.api.gax.rpc.UnavailableException: fake entry error."); + assertThat(ex) + .hasMessageThat() + .contains( + "Sample of entry errors: java.lang.IllegalStateException: local element failure, com.google.api.gax.rpc.UnavailableException: fake entry error."); } @Test From d1efd527dd1cb3bade7c56a7bf49724f246ef586 Mon Sep 17 00:00:00 2001 From: Igor Berntein Date: Wed, 28 Feb 2024 10:15:28 -0500 Subject: [PATCH 3/3] document internal sys prop --- .../google/api/gax/batching/BatcherStats.java | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/gax-java/gax/src/main/java/com/google/api/gax/batching/BatcherStats.java b/gax-java/gax/src/main/java/com/google/api/gax/batching/BatcherStats.java index a5301d48a0..20b0f95954 100644 --- a/gax-java/gax/src/main/java/com/google/api/gax/batching/BatcherStats.java +++ b/gax-java/gax/src/main/java/com/google/api/gax/batching/BatcherStats.java @@ -54,12 +54,20 @@ class BatcherStats { private final Map entryExceptionCounts = new HashMap<>(); private final Map entryStatusCounts = new HashMap<>(); + /** + * The maximum number of error messages that a Batcher instance will retain. By default, a Batcher + * instance will retain 50 entry error messages and 50 RPC error messages. This limit can be + * temporarily increased by setting the {@code com.google.api.gax.batching.errors.max-samples} + * system property. This should only be needed in very rare situations and should not be + * considered part of the public api. + */ + private final int MAX_ERROR_MSG_SAMPLES = + Integer.getInteger("com.google.api.gax.batching.errors.max-samples", 50); + private final EvictingQueue sampleOfRpcErrors = - EvictingQueue.create( - Integer.getInteger("com.google.api.gax.batching.errors.max-samples", 50)); + EvictingQueue.create(MAX_ERROR_MSG_SAMPLES); private final EvictingQueue sampleOfEntryErrors = - EvictingQueue.create( - Integer.getInteger("com.google.api.gax.batching.errors.max-samples", 50)); + EvictingQueue.create(MAX_ERROR_MSG_SAMPLES); /** * Records the count of the exception and it's type when a complete batch failed to apply.