Skip to content

Revert "feat: built in metrics for afe latency and connectivity error" #3818

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 4 additions & 11 deletions google-cloud-spanner/clirr-ignored-differences.xml
Original file line number Diff line number Diff line change
Expand Up @@ -751,13 +751,6 @@
<method>boolean isEnableBuiltInMetrics()</method>
</difference>

<!-- Added AFE Server Timing option -->
<difference>
<differenceType>7012</differenceType>
<className>com/google/cloud/spanner/SpannerOptions$SpannerEnvironment</className>
<method>boolean isEnableAFEServerTiming()</method>
</difference>

<!-- Added Monitoring host option -->
<difference>
<differenceType>7012</differenceType>
Expand Down Expand Up @@ -814,7 +807,7 @@
<className>com/google/cloud/spanner/connection/Connection</className>
<method>boolean isKeepTransactionAlive()</method>
</difference>

<!-- Automatic DML batching -->
<difference>
<differenceType>7012</differenceType>
Expand Down Expand Up @@ -846,7 +839,7 @@
<className>com/google/cloud/spanner/connection/Connection</className>
<method>boolean isAutoBatchDmlUpdateCountVerification()</method>
</difference>

<!-- Retry DML as Partitioned DML -->
<difference>
<differenceType>7012</differenceType>
Expand All @@ -870,7 +863,7 @@
<className>com/google/cloud/spanner/connection/Connection</className>
<method>java.lang.Object runTransaction(com.google.cloud.spanner.connection.Connection$TransactionCallable)</method>
</difference>

<!-- Added experimental host option -->
<difference>
<differenceType>7012</differenceType>
Expand Down Expand Up @@ -899,7 +892,7 @@
<className>com/google/cloud/spanner/connection/Connection</className>
<method>java.lang.String getDefaultSequenceKind()</method>
</difference>

<!-- Default isolation level -->
<difference>
<differenceType>7012</differenceType>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import io.opentelemetry.sdk.metrics.InstrumentSelector;
import io.opentelemetry.sdk.metrics.InstrumentType;
import io.opentelemetry.sdk.metrics.View;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
Expand All @@ -38,9 +37,6 @@ public class BuiltInMetricsConstant {
public static final String GAX_METER_NAME = OpenTelemetryMetricsRecorder.GAX_METER_NAME;
static final String SPANNER_METER_NAME = "spanner-java";
static final String GFE_LATENCIES_NAME = "gfe_latencies";
static final String AFE_LATENCIES_NAME = "afe_latencies";
static final String GFE_CONNECTIVITY_ERROR_NAME = "gfe_connectivity_error_count";
static final String AFE_CONNECTIVITY_ERROR_NAME = "afe_connectivity_error_count";
static final String OPERATION_LATENCIES_NAME = "operation_latencies";
static final String ATTEMPT_LATENCIES_NAME = "attempt_latencies";
static final String OPERATION_LATENCY_NAME = "operation_latency";
Expand All @@ -54,10 +50,7 @@ public class BuiltInMetricsConstant {
ATTEMPT_LATENCIES_NAME,
OPERATION_COUNT_NAME,
ATTEMPT_COUNT_NAME,
GFE_LATENCIES_NAME,
AFE_LATENCIES_NAME,
GFE_CONNECTIVITY_ERROR_NAME,
AFE_CONNECTIVITY_ERROR_NAME)
GFE_LATENCIES_NAME)
.stream()
.map(m -> METER_NAME + '/' + m)
.collect(Collectors.toSet());
Expand Down Expand Up @@ -109,14 +102,14 @@ public class BuiltInMetricsConstant {
DIRECT_PATH_ENABLED_KEY,
DIRECT_PATH_USED_KEY);

static List<Double> BUCKET_BOUNDARIES =
ImmutableList.of(
0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
16.0, 17.0, 18.0, 19.0, 20.0, 25.0, 30.0, 40.0, 50.0, 65.0, 80.0, 100.0, 130.0, 160.0,
200.0, 250.0, 300.0, 400.0, 500.0, 650.0, 800.0, 1000.0, 2000.0, 5000.0, 10000.0, 20000.0,
50000.0, 100000.0, 200000.0, 400000.0, 800000.0, 1600000.0, 3200000.0);
static Aggregation AGGREGATION_WITH_MILLIS_HISTOGRAM =
Aggregation.explicitBucketHistogram(BUCKET_BOUNDARIES);
Aggregation.explicitBucketHistogram(
ImmutableList.of(
0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 25.0, 30.0, 40.0, 50.0, 65.0, 80.0, 100.0, 130.0,
160.0, 200.0, 250.0, 300.0, 400.0, 500.0, 650.0, 800.0, 1000.0, 2000.0, 5000.0,
10000.0, 20000.0, 50000.0, 100000.0, 200000.0, 400000.0, 800000.0, 1600000.0,
3200000.0));

static Map<InstrumentSelector, View> getAllViews() {
ImmutableMap.Builder<InstrumentSelector, View> views = ImmutableMap.builder();
Expand All @@ -136,6 +129,14 @@ static Map<InstrumentSelector, View> getAllViews() {
BuiltInMetricsConstant.AGGREGATION_WITH_MILLIS_HISTOGRAM,
InstrumentType.HISTOGRAM,
"ms");
defineView(
views,
BuiltInMetricsConstant.SPANNER_METER_NAME,
BuiltInMetricsConstant.GFE_LATENCIES_NAME,
BuiltInMetricsConstant.GFE_LATENCIES_NAME,
BuiltInMetricsConstant.AGGREGATION_WITH_MILLIS_HISTOGRAM,
InstrumentType.HISTOGRAM,
"ms");
defineView(
views,
BuiltInMetricsConstant.GAX_METER_NAME,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.common.AttributesBuilder;
import io.opentelemetry.api.metrics.DoubleHistogram;
import io.opentelemetry.api.metrics.LongCounter;
import io.opentelemetry.api.metrics.Meter;
import java.util.Map;

Expand All @@ -36,9 +35,6 @@
class BuiltInMetricsRecorder extends OpenTelemetryMetricsRecorder {

private final DoubleHistogram gfeLatencyRecorder;
private final DoubleHistogram afeLatencyRecorder;
private final LongCounter gfeHeaderMissingCountRecorder;
private final LongCounter afeHeaderMissingCountRecorder;

/**
* Creates the following instruments for the following metrics:
Expand All @@ -63,27 +59,6 @@ class BuiltInMetricsRecorder extends OpenTelemetryMetricsRecorder {
.setDescription(
"Latency between Google's network receiving an RPC and reading back the first byte of the response")
.setUnit("ms")
.setExplicitBucketBoundariesAdvice(BuiltInMetricsConstant.BUCKET_BOUNDARIES)
.build();
this.afeLatencyRecorder =
meter
.histogramBuilder(serviceName + '/' + BuiltInMetricsConstant.AFE_LATENCIES_NAME)
.setDescription(
"Latency between Spanner API Frontend receiving an RPC and starting to write back the response.")
.setExplicitBucketBoundariesAdvice(BuiltInMetricsConstant.BUCKET_BOUNDARIES)
.setUnit("ms")
.build();
this.gfeHeaderMissingCountRecorder =
meter
.counterBuilder(serviceName + '/' + BuiltInMetricsConstant.GFE_CONNECTIVITY_ERROR_NAME)
.setDescription("Number of requests that failed to reach the Google network.")
.setUnit("1")
.build();
this.afeHeaderMissingCountRecorder =
meter
.counterBuilder(serviceName + '/' + BuiltInMetricsConstant.AFE_CONNECTIVITY_ERROR_NAME)
.setDescription("Number of requests that failed to reach the Spanner API Frontend.")
.setUnit("1")
.build();
}

Expand All @@ -94,25 +69,8 @@ class BuiltInMetricsRecorder extends OpenTelemetryMetricsRecorder {
* @param gfeLatency Attempt Latency in ms
* @param attributes Map of the attributes to store
*/
void recordServerTimingHeaderMetrics(
Long gfeLatency,
Long afeLatency,
Long gfeHeaderMissingCount,
Long afeHeaderMissingCount,
Map<String, String> attributes) {
io.opentelemetry.api.common.Attributes otelAttributes = toOtelAttributes(attributes);
if (gfeLatency != null) {
gfeLatencyRecorder.record(gfeLatency, otelAttributes);
}
if (gfeHeaderMissingCount > 0) {
gfeHeaderMissingCountRecorder.add(gfeHeaderMissingCount, otelAttributes);
}
if (afeLatency != null) {
afeLatencyRecorder.record(afeLatency, otelAttributes);
}
if (afeHeaderMissingCount > 0) {
afeHeaderMissingCountRecorder.add(afeHeaderMissingCount, otelAttributes);
}
void recordGFELatency(double gfeLatency, Map<String, String> attributes) {
gfeLatencyRecorder.record(gfeLatency, toOtelAttributes(attributes));
}

Attributes toOtelAttributes(Map<String, String> attributes) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,8 @@ class BuiltInMetricsTracer extends MetricsTracer implements ApiTracer {
private final BuiltInMetricsRecorder builtInOpenTelemetryMetricsRecorder;
// These are RPC specific attributes and pertain to a specific API Trace
private final Map<String, String> attributes = new HashMap<>();

private Long gfeLatency = null;
private Long afeLatency = null;
private long gfeHeaderMissingCount = 0;
private long afeHeaderMissingCount = 0;

BuiltInMetricsTracer(
MethodName methodName, BuiltInMetricsRecorder builtInOpenTelemetryMetricsRecorder) {
Expand All @@ -56,9 +54,10 @@ class BuiltInMetricsTracer extends MetricsTracer implements ApiTracer {
@Override
public void attemptSucceeded() {
super.attemptSucceeded();
attributes.put(STATUS_ATTRIBUTE, StatusCode.Code.OK.toString());
builtInOpenTelemetryMetricsRecorder.recordServerTimingHeaderMetrics(
gfeLatency, afeLatency, gfeHeaderMissingCount, afeHeaderMissingCount, attributes);
if (gfeLatency != null) {
attributes.put(STATUS_ATTRIBUTE, StatusCode.Code.OK.toString());
builtInOpenTelemetryMetricsRecorder.recordGFELatency(gfeLatency, attributes);
}
}

/**
Expand All @@ -68,9 +67,10 @@ public void attemptSucceeded() {
@Override
public void attemptCancelled() {
super.attemptCancelled();
attributes.put(STATUS_ATTRIBUTE, StatusCode.Code.CANCELLED.toString());
builtInOpenTelemetryMetricsRecorder.recordServerTimingHeaderMetrics(
gfeLatency, afeLatency, gfeHeaderMissingCount, afeHeaderMissingCount, attributes);
if (gfeLatency != null) {
attributes.put(STATUS_ATTRIBUTE, StatusCode.Code.CANCELLED.toString());
builtInOpenTelemetryMetricsRecorder.recordGFELatency(gfeLatency, attributes);
}
}

/**
Expand All @@ -84,9 +84,10 @@ public void attemptCancelled() {
@Override
public void attemptFailedDuration(Throwable error, java.time.Duration delay) {
super.attemptFailedDuration(error, delay);
attributes.put(STATUS_ATTRIBUTE, extractStatus(error));
builtInOpenTelemetryMetricsRecorder.recordServerTimingHeaderMetrics(
gfeLatency, afeLatency, gfeHeaderMissingCount, afeHeaderMissingCount, attributes);
if (gfeLatency != null) {
attributes.put(STATUS_ATTRIBUTE, extractStatus(error));
builtInOpenTelemetryMetricsRecorder.recordGFELatency(gfeLatency, attributes);
}
}

/**
Expand All @@ -99,9 +100,10 @@ public void attemptFailedDuration(Throwable error, java.time.Duration delay) {
@Override
public void attemptFailedRetriesExhausted(Throwable error) {
super.attemptFailedRetriesExhausted(error);
attributes.put(STATUS_ATTRIBUTE, extractStatus(error));
builtInOpenTelemetryMetricsRecorder.recordServerTimingHeaderMetrics(
gfeLatency, afeLatency, gfeHeaderMissingCount, afeHeaderMissingCount, attributes);
if (gfeLatency != null) {
attributes.put(STATUS_ATTRIBUTE, extractStatus(error));
builtInOpenTelemetryMetricsRecorder.recordGFELatency(gfeLatency, attributes);
}
}

/**
Expand All @@ -114,27 +116,16 @@ public void attemptFailedRetriesExhausted(Throwable error) {
@Override
public void attemptPermanentFailure(Throwable error) {
super.attemptPermanentFailure(error);
attributes.put(STATUS_ATTRIBUTE, extractStatus(error));
builtInOpenTelemetryMetricsRecorder.recordServerTimingHeaderMetrics(
gfeLatency, afeLatency, gfeHeaderMissingCount, afeHeaderMissingCount, attributes);
if (gfeLatency != null) {
attributes.put(STATUS_ATTRIBUTE, extractStatus(error));
builtInOpenTelemetryMetricsRecorder.recordGFELatency(gfeLatency, attributes);
}
}

void recordGFELatency(Long gfeLatency) {
this.gfeLatency = gfeLatency;
}

void recordAFELatency(Long afeLatency) {
this.afeLatency = afeLatency;
}

void recordGfeHeaderMissingCount(Long value) {
this.gfeHeaderMissingCount = value;
}

void recordAfeHeaderMissingCount(Long value) {
this.afeHeaderMissingCount = value;
}

@Override
public void addAttributes(Map<String, String> attributes) {
super.addAttributes(attributes);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,28 +198,4 @@ public void recordGFELatency(Long gfeLatency) {
}
}
}

public void recordGfeHeaderMissingCount(Long value) {
for (ApiTracer child : children) {
if (child instanceof BuiltInMetricsTracer) {
((BuiltInMetricsTracer) child).recordGfeHeaderMissingCount(value);
}
}
}

public void recordAFELatency(Long afeLatency) {
for (ApiTracer child : children) {
if (child instanceof BuiltInMetricsTracer) {
((BuiltInMetricsTracer) child).recordAFELatency(afeLatency);
}
}
}

public void recordAfeHeaderMissingCount(Long value) {
for (ApiTracer child : children) {
if (child instanceof BuiltInMetricsTracer) {
((BuiltInMetricsTracer) child).recordAfeHeaderMissingCount(value);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -683,10 +683,6 @@ private static boolean isEmulatorEnabled(SpannerOptions options, String emulator
&& options.getHost().endsWith(emulatorHost);
}

public static boolean isEnableAFEServerTiming() {
return !Boolean.parseBoolean(System.getenv("SPANNER_DISABLE_AFE_SERVER_TIMING"));
}

private static final RetrySettings ADMIN_REQUESTS_LIMIT_EXCEEDED_RETRY_SETTINGS =
RetrySettings.newBuilder()
.setInitialRetryDelayDuration(Duration.ofSeconds(5L))
Expand Down Expand Up @@ -2034,9 +2030,6 @@ <ReqT, RespT> GrpcCallContext newCallContext(
if (endToEndTracingEnabled) {
context = context.withExtraHeaders(metadataProvider.newEndToEndTracingHeader());
}
if (isEnableAFEServerTiming()) {
context = context.withExtraHeaders(metadataProvider.newAfeServerTimingHeader());
}
if (callCredentialsProvider != null) {
CallCredentials callCredentials = callCredentialsProvider.getCallCredentials();
if (callCredentials != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ class HeaderInterceptor implements ClientInterceptor {
private static final Metadata.Key<String> SERVER_TIMING_HEADER_KEY =
Metadata.Key.of("server-timing", Metadata.ASCII_STRING_MARSHALLER);
private static final String GFE_TIMING_HEADER = "gfet4t7";
private static final String AFE_TIMING_HEADER = "afet4t7";
private static final Metadata.Key<String> GOOGLE_CLOUD_RESOURCE_PREFIX_KEY =
Metadata.Key.of("google-cloud-resource-prefix", Metadata.ASCII_STRING_MARSHALLER);
private static final Pattern SERVER_TIMING_PATTERN =
Expand Down Expand Up @@ -175,25 +174,13 @@ private void processHeader(
if (compositeTracer != null) {
compositeTracer.recordGFELatency(gfeLatency);
}

if (span != null) {
span.setAttribute("gfe_latency", String.valueOf(gfeLatency));
}
} else {
measureMap.put(SPANNER_GFE_HEADER_MISSING_COUNT, 1L).record(tagContext);
spannerRpcMetrics.recordGfeHeaderMissingCount(1L, attributes);
if (compositeTracer != null) {
compositeTracer.recordGfeHeaderMissingCount(1L);
}
}

// Record AFE metrics
if (compositeTracer != null && GapicSpannerRpc.isEnableAFEServerTiming()) {
if (serverTimingMetrics.containsKey(AFE_TIMING_HEADER)) {
long afeLatency = serverTimingMetrics.get(AFE_TIMING_HEADER);
compositeTracer.recordAFELatency(afeLatency);
} else {
compositeTracer.recordAfeHeaderMissingCount(1L);
}
}
} catch (NumberFormatException e) {
LOGGER.log(LEVEL, "Invalid server-timing object in header: {}", serverTiming);
Expand Down
Loading
Loading