Skip to content

Commit 45b6b5b

Browse files
authored
Revert "Revert "feat: built in metrics for afe latency and connectivity error…"
This reverts commit b6c9c6e.
1 parent b6c9c6e commit 45b6b5b

11 files changed

+245
-50
lines changed

google-cloud-spanner/clirr-ignored-differences.xml

+11-4
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,13 @@
751751
<method>boolean isEnableBuiltInMetrics()</method>
752752
</difference>
753753

754+
<!-- Added AFE Server Timing option -->
755+
<difference>
756+
<differenceType>7012</differenceType>
757+
<className>com/google/cloud/spanner/SpannerOptions$SpannerEnvironment</className>
758+
<method>boolean isEnableAFEServerTiming()</method>
759+
</difference>
760+
754761
<!-- Added Monitoring host option -->
755762
<difference>
756763
<differenceType>7012</differenceType>
@@ -807,7 +814,7 @@
807814
<className>com/google/cloud/spanner/connection/Connection</className>
808815
<method>boolean isKeepTransactionAlive()</method>
809816
</difference>
810-
817+
811818
<!-- Automatic DML batching -->
812819
<difference>
813820
<differenceType>7012</differenceType>
@@ -839,7 +846,7 @@
839846
<className>com/google/cloud/spanner/connection/Connection</className>
840847
<method>boolean isAutoBatchDmlUpdateCountVerification()</method>
841848
</difference>
842-
849+
843850
<!-- Retry DML as Partitioned DML -->
844851
<difference>
845852
<differenceType>7012</differenceType>
@@ -863,7 +870,7 @@
863870
<className>com/google/cloud/spanner/connection/Connection</className>
864871
<method>java.lang.Object runTransaction(com.google.cloud.spanner.connection.Connection$TransactionCallable)</method>
865872
</difference>
866-
873+
867874
<!-- Added experimental host option -->
868875
<difference>
869876
<differenceType>7012</differenceType>
@@ -892,7 +899,7 @@
892899
<className>com/google/cloud/spanner/connection/Connection</className>
893900
<method>java.lang.String getDefaultSequenceKind()</method>
894901
</difference>
895-
902+
896903
<!-- Default isolation level -->
897904
<difference>
898905
<differenceType>7012</differenceType>

google-cloud-spanner/src/main/java/com/google/cloud/spanner/BuiltInMetricsConstant.java

+15-16
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import io.opentelemetry.sdk.metrics.InstrumentSelector;
2727
import io.opentelemetry.sdk.metrics.InstrumentType;
2828
import io.opentelemetry.sdk.metrics.View;
29+
import java.util.List;
2930
import java.util.Map;
3031
import java.util.Set;
3132
import java.util.stream.Collectors;
@@ -37,6 +38,9 @@ public class BuiltInMetricsConstant {
3738
public static final String GAX_METER_NAME = OpenTelemetryMetricsRecorder.GAX_METER_NAME;
3839
static final String SPANNER_METER_NAME = "spanner-java";
3940
static final String GFE_LATENCIES_NAME = "gfe_latencies";
41+
static final String AFE_LATENCIES_NAME = "afe_latencies";
42+
static final String GFE_CONNECTIVITY_ERROR_NAME = "gfe_connectivity_error_count";
43+
static final String AFE_CONNECTIVITY_ERROR_NAME = "afe_connectivity_error_count";
4044
static final String OPERATION_LATENCIES_NAME = "operation_latencies";
4145
static final String ATTEMPT_LATENCIES_NAME = "attempt_latencies";
4246
static final String OPERATION_LATENCY_NAME = "operation_latency";
@@ -50,7 +54,10 @@ public class BuiltInMetricsConstant {
5054
ATTEMPT_LATENCIES_NAME,
5155
OPERATION_COUNT_NAME,
5256
ATTEMPT_COUNT_NAME,
53-
GFE_LATENCIES_NAME)
57+
GFE_LATENCIES_NAME,
58+
AFE_LATENCIES_NAME,
59+
GFE_CONNECTIVITY_ERROR_NAME,
60+
AFE_CONNECTIVITY_ERROR_NAME)
5461
.stream()
5562
.map(m -> METER_NAME + '/' + m)
5663
.collect(Collectors.toSet());
@@ -102,14 +109,14 @@ public class BuiltInMetricsConstant {
102109
DIRECT_PATH_ENABLED_KEY,
103110
DIRECT_PATH_USED_KEY);
104111

112+
static List<Double> BUCKET_BOUNDARIES =
113+
ImmutableList.of(
114+
0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
115+
16.0, 17.0, 18.0, 19.0, 20.0, 25.0, 30.0, 40.0, 50.0, 65.0, 80.0, 100.0, 130.0, 160.0,
116+
200.0, 250.0, 300.0, 400.0, 500.0, 650.0, 800.0, 1000.0, 2000.0, 5000.0, 10000.0, 20000.0,
117+
50000.0, 100000.0, 200000.0, 400000.0, 800000.0, 1600000.0, 3200000.0);
105118
static Aggregation AGGREGATION_WITH_MILLIS_HISTOGRAM =
106-
Aggregation.explicitBucketHistogram(
107-
ImmutableList.of(
108-
0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
109-
15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 25.0, 30.0, 40.0, 50.0, 65.0, 80.0, 100.0, 130.0,
110-
160.0, 200.0, 250.0, 300.0, 400.0, 500.0, 650.0, 800.0, 1000.0, 2000.0, 5000.0,
111-
10000.0, 20000.0, 50000.0, 100000.0, 200000.0, 400000.0, 800000.0, 1600000.0,
112-
3200000.0));
119+
Aggregation.explicitBucketHistogram(BUCKET_BOUNDARIES);
113120

114121
static Map<InstrumentSelector, View> getAllViews() {
115122
ImmutableMap.Builder<InstrumentSelector, View> views = ImmutableMap.builder();
@@ -129,14 +136,6 @@ static Map<InstrumentSelector, View> getAllViews() {
129136
BuiltInMetricsConstant.AGGREGATION_WITH_MILLIS_HISTOGRAM,
130137
InstrumentType.HISTOGRAM,
131138
"ms");
132-
defineView(
133-
views,
134-
BuiltInMetricsConstant.SPANNER_METER_NAME,
135-
BuiltInMetricsConstant.GFE_LATENCIES_NAME,
136-
BuiltInMetricsConstant.GFE_LATENCIES_NAME,
137-
BuiltInMetricsConstant.AGGREGATION_WITH_MILLIS_HISTOGRAM,
138-
InstrumentType.HISTOGRAM,
139-
"ms");
140139
defineView(
141140
views,
142141
BuiltInMetricsConstant.GAX_METER_NAME,

google-cloud-spanner/src/main/java/com/google/cloud/spanner/BuiltInMetricsRecorder.java

+44-2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import io.opentelemetry.api.common.Attributes;
2424
import io.opentelemetry.api.common.AttributesBuilder;
2525
import io.opentelemetry.api.metrics.DoubleHistogram;
26+
import io.opentelemetry.api.metrics.LongCounter;
2627
import io.opentelemetry.api.metrics.Meter;
2728
import java.util.Map;
2829

@@ -35,6 +36,9 @@
3536
class BuiltInMetricsRecorder extends OpenTelemetryMetricsRecorder {
3637

3738
private final DoubleHistogram gfeLatencyRecorder;
39+
private final DoubleHistogram afeLatencyRecorder;
40+
private final LongCounter gfeHeaderMissingCountRecorder;
41+
private final LongCounter afeHeaderMissingCountRecorder;
3842

3943
/**
4044
* Creates the following instruments for the following metrics:
@@ -59,6 +63,27 @@ class BuiltInMetricsRecorder extends OpenTelemetryMetricsRecorder {
5963
.setDescription(
6064
"Latency between Google's network receiving an RPC and reading back the first byte of the response")
6165
.setUnit("ms")
66+
.setExplicitBucketBoundariesAdvice(BuiltInMetricsConstant.BUCKET_BOUNDARIES)
67+
.build();
68+
this.afeLatencyRecorder =
69+
meter
70+
.histogramBuilder(serviceName + '/' + BuiltInMetricsConstant.AFE_LATENCIES_NAME)
71+
.setDescription(
72+
"Latency between Spanner API Frontend receiving an RPC and starting to write back the response.")
73+
.setExplicitBucketBoundariesAdvice(BuiltInMetricsConstant.BUCKET_BOUNDARIES)
74+
.setUnit("ms")
75+
.build();
76+
this.gfeHeaderMissingCountRecorder =
77+
meter
78+
.counterBuilder(serviceName + '/' + BuiltInMetricsConstant.GFE_CONNECTIVITY_ERROR_NAME)
79+
.setDescription("Number of requests that failed to reach the Google network.")
80+
.setUnit("1")
81+
.build();
82+
this.afeHeaderMissingCountRecorder =
83+
meter
84+
.counterBuilder(serviceName + '/' + BuiltInMetricsConstant.AFE_CONNECTIVITY_ERROR_NAME)
85+
.setDescription("Number of requests that failed to reach the Spanner API Frontend.")
86+
.setUnit("1")
6287
.build();
6388
}
6489

@@ -69,8 +94,25 @@ class BuiltInMetricsRecorder extends OpenTelemetryMetricsRecorder {
6994
* @param gfeLatency Attempt Latency in ms
7095
* @param attributes Map of the attributes to store
7196
*/
72-
void recordGFELatency(double gfeLatency, Map<String, String> attributes) {
73-
gfeLatencyRecorder.record(gfeLatency, toOtelAttributes(attributes));
97+
void recordServerTimingHeaderMetrics(
98+
Long gfeLatency,
99+
Long afeLatency,
100+
Long gfeHeaderMissingCount,
101+
Long afeHeaderMissingCount,
102+
Map<String, String> attributes) {
103+
io.opentelemetry.api.common.Attributes otelAttributes = toOtelAttributes(attributes);
104+
if (gfeLatency != null) {
105+
gfeLatencyRecorder.record(gfeLatency, otelAttributes);
106+
}
107+
if (gfeHeaderMissingCount > 0) {
108+
gfeHeaderMissingCountRecorder.add(gfeHeaderMissingCount, otelAttributes);
109+
}
110+
if (afeLatency != null) {
111+
afeLatencyRecorder.record(afeLatency, otelAttributes);
112+
}
113+
if (afeHeaderMissingCount > 0) {
114+
afeHeaderMissingCountRecorder.add(afeHeaderMissingCount, otelAttributes);
115+
}
74116
}
75117

76118
Attributes toOtelAttributes(Map<String, String> attributes) {

google-cloud-spanner/src/main/java/com/google/cloud/spanner/BuiltInMetricsTracer.java

+30-21
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,10 @@ class BuiltInMetricsTracer extends MetricsTracer implements ApiTracer {
3737
private final BuiltInMetricsRecorder builtInOpenTelemetryMetricsRecorder;
3838
// These are RPC specific attributes and pertain to a specific API Trace
3939
private final Map<String, String> attributes = new HashMap<>();
40-
4140
private Long gfeLatency = null;
41+
private Long afeLatency = null;
42+
private long gfeHeaderMissingCount = 0;
43+
private long afeHeaderMissingCount = 0;
4244

4345
BuiltInMetricsTracer(
4446
MethodName methodName, BuiltInMetricsRecorder builtInOpenTelemetryMetricsRecorder) {
@@ -54,10 +56,9 @@ class BuiltInMetricsTracer extends MetricsTracer implements ApiTracer {
5456
@Override
5557
public void attemptSucceeded() {
5658
super.attemptSucceeded();
57-
if (gfeLatency != null) {
58-
attributes.put(STATUS_ATTRIBUTE, StatusCode.Code.OK.toString());
59-
builtInOpenTelemetryMetricsRecorder.recordGFELatency(gfeLatency, attributes);
60-
}
59+
attributes.put(STATUS_ATTRIBUTE, StatusCode.Code.OK.toString());
60+
builtInOpenTelemetryMetricsRecorder.recordServerTimingHeaderMetrics(
61+
gfeLatency, afeLatency, gfeHeaderMissingCount, afeHeaderMissingCount, attributes);
6162
}
6263

6364
/**
@@ -67,10 +68,9 @@ public void attemptSucceeded() {
6768
@Override
6869
public void attemptCancelled() {
6970
super.attemptCancelled();
70-
if (gfeLatency != null) {
71-
attributes.put(STATUS_ATTRIBUTE, StatusCode.Code.CANCELLED.toString());
72-
builtInOpenTelemetryMetricsRecorder.recordGFELatency(gfeLatency, attributes);
73-
}
71+
attributes.put(STATUS_ATTRIBUTE, StatusCode.Code.CANCELLED.toString());
72+
builtInOpenTelemetryMetricsRecorder.recordServerTimingHeaderMetrics(
73+
gfeLatency, afeLatency, gfeHeaderMissingCount, afeHeaderMissingCount, attributes);
7474
}
7575

7676
/**
@@ -84,10 +84,9 @@ public void attemptCancelled() {
8484
@Override
8585
public void attemptFailedDuration(Throwable error, java.time.Duration delay) {
8686
super.attemptFailedDuration(error, delay);
87-
if (gfeLatency != null) {
88-
attributes.put(STATUS_ATTRIBUTE, extractStatus(error));
89-
builtInOpenTelemetryMetricsRecorder.recordGFELatency(gfeLatency, attributes);
90-
}
87+
attributes.put(STATUS_ATTRIBUTE, extractStatus(error));
88+
builtInOpenTelemetryMetricsRecorder.recordServerTimingHeaderMetrics(
89+
gfeLatency, afeLatency, gfeHeaderMissingCount, afeHeaderMissingCount, attributes);
9190
}
9291

9392
/**
@@ -100,10 +99,9 @@ public void attemptFailedDuration(Throwable error, java.time.Duration delay) {
10099
@Override
101100
public void attemptFailedRetriesExhausted(Throwable error) {
102101
super.attemptFailedRetriesExhausted(error);
103-
if (gfeLatency != null) {
104-
attributes.put(STATUS_ATTRIBUTE, extractStatus(error));
105-
builtInOpenTelemetryMetricsRecorder.recordGFELatency(gfeLatency, attributes);
106-
}
102+
attributes.put(STATUS_ATTRIBUTE, extractStatus(error));
103+
builtInOpenTelemetryMetricsRecorder.recordServerTimingHeaderMetrics(
104+
gfeLatency, afeLatency, gfeHeaderMissingCount, afeHeaderMissingCount, attributes);
107105
}
108106

109107
/**
@@ -116,16 +114,27 @@ public void attemptFailedRetriesExhausted(Throwable error) {
116114
@Override
117115
public void attemptPermanentFailure(Throwable error) {
118116
super.attemptPermanentFailure(error);
119-
if (gfeLatency != null) {
120-
attributes.put(STATUS_ATTRIBUTE, extractStatus(error));
121-
builtInOpenTelemetryMetricsRecorder.recordGFELatency(gfeLatency, attributes);
122-
}
117+
attributes.put(STATUS_ATTRIBUTE, extractStatus(error));
118+
builtInOpenTelemetryMetricsRecorder.recordServerTimingHeaderMetrics(
119+
gfeLatency, afeLatency, gfeHeaderMissingCount, afeHeaderMissingCount, attributes);
123120
}
124121

125122
void recordGFELatency(Long gfeLatency) {
126123
this.gfeLatency = gfeLatency;
127124
}
128125

126+
void recordAFELatency(Long afeLatency) {
127+
this.afeLatency = afeLatency;
128+
}
129+
130+
void recordGfeHeaderMissingCount(Long value) {
131+
this.gfeHeaderMissingCount = value;
132+
}
133+
134+
void recordAfeHeaderMissingCount(Long value) {
135+
this.afeHeaderMissingCount = value;
136+
}
137+
129138
@Override
130139
public void addAttributes(Map<String, String> attributes) {
131140
super.addAttributes(attributes);

google-cloud-spanner/src/main/java/com/google/cloud/spanner/CompositeTracer.java

+24
Original file line numberDiff line numberDiff line change
@@ -198,4 +198,28 @@ public void recordGFELatency(Long gfeLatency) {
198198
}
199199
}
200200
}
201+
202+
public void recordGfeHeaderMissingCount(Long value) {
203+
for (ApiTracer child : children) {
204+
if (child instanceof BuiltInMetricsTracer) {
205+
((BuiltInMetricsTracer) child).recordGfeHeaderMissingCount(value);
206+
}
207+
}
208+
}
209+
210+
public void recordAFELatency(Long afeLatency) {
211+
for (ApiTracer child : children) {
212+
if (child instanceof BuiltInMetricsTracer) {
213+
((BuiltInMetricsTracer) child).recordAFELatency(afeLatency);
214+
}
215+
}
216+
}
217+
218+
public void recordAfeHeaderMissingCount(Long value) {
219+
for (ApiTracer child : children) {
220+
if (child instanceof BuiltInMetricsTracer) {
221+
((BuiltInMetricsTracer) child).recordAfeHeaderMissingCount(value);
222+
}
223+
}
224+
}
201225
}

google-cloud-spanner/src/main/java/com/google/cloud/spanner/spi/v1/GapicSpannerRpc.java

+7
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,10 @@ private static boolean isEmulatorEnabled(SpannerOptions options, String emulator
683683
&& options.getHost().endsWith(emulatorHost);
684684
}
685685

686+
public static boolean isEnableAFEServerTiming() {
687+
return !Boolean.parseBoolean(System.getenv("SPANNER_DISABLE_AFE_SERVER_TIMING"));
688+
}
689+
686690
private static final RetrySettings ADMIN_REQUESTS_LIMIT_EXCEEDED_RETRY_SETTINGS =
687691
RetrySettings.newBuilder()
688692
.setInitialRetryDelayDuration(Duration.ofSeconds(5L))
@@ -2030,6 +2034,9 @@ <ReqT, RespT> GrpcCallContext newCallContext(
20302034
if (endToEndTracingEnabled) {
20312035
context = context.withExtraHeaders(metadataProvider.newEndToEndTracingHeader());
20322036
}
2037+
if (isEnableAFEServerTiming()) {
2038+
context = context.withExtraHeaders(metadataProvider.newAfeServerTimingHeader());
2039+
}
20332040
if (callCredentialsProvider != null) {
20342041
CallCredentials callCredentials = callCredentialsProvider.getCallCredentials();
20352042
if (callCredentials != null) {

google-cloud-spanner/src/main/java/com/google/cloud/spanner/spi/v1/HeaderInterceptor.java

+14-1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ class HeaderInterceptor implements ClientInterceptor {
7272
private static final Metadata.Key<String> SERVER_TIMING_HEADER_KEY =
7373
Metadata.Key.of("server-timing", Metadata.ASCII_STRING_MARSHALLER);
7474
private static final String GFE_TIMING_HEADER = "gfet4t7";
75+
private static final String AFE_TIMING_HEADER = "afet4t7";
7576
private static final Metadata.Key<String> GOOGLE_CLOUD_RESOURCE_PREFIX_KEY =
7677
Metadata.Key.of("google-cloud-resource-prefix", Metadata.ASCII_STRING_MARSHALLER);
7778
private static final Pattern SERVER_TIMING_PATTERN =
@@ -174,13 +175,25 @@ private void processHeader(
174175
if (compositeTracer != null) {
175176
compositeTracer.recordGFELatency(gfeLatency);
176177
}
177-
178178
if (span != null) {
179179
span.setAttribute("gfe_latency", String.valueOf(gfeLatency));
180180
}
181181
} else {
182182
measureMap.put(SPANNER_GFE_HEADER_MISSING_COUNT, 1L).record(tagContext);
183183
spannerRpcMetrics.recordGfeHeaderMissingCount(1L, attributes);
184+
if (compositeTracer != null) {
185+
compositeTracer.recordGfeHeaderMissingCount(1L);
186+
}
187+
}
188+
189+
// Record AFE metrics
190+
if (compositeTracer != null && GapicSpannerRpc.isEnableAFEServerTiming()) {
191+
if (serverTimingMetrics.containsKey(AFE_TIMING_HEADER)) {
192+
long afeLatency = serverTimingMetrics.get(AFE_TIMING_HEADER);
193+
compositeTracer.recordAFELatency(afeLatency);
194+
} else {
195+
compositeTracer.recordAfeHeaderMissingCount(1L);
196+
}
184197
}
185198
} catch (NumberFormatException e) {
186199
LOGGER.log(LEVEL, "Invalid server-timing object in header: {}", serverTiming);

0 commit comments

Comments
 (0)