Skip to content

fix bug on proto schema extraction #8403

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ public SchemaExtractor(Descriptor descriptor) {
this.descriptor = descriptor;
}

/** @return false if no more properties should be extracted */
public static boolean extractProperty(
FieldDescriptor field,
String schemaName,
Expand Down Expand Up @@ -107,9 +108,7 @@ public static boolean extractProperty(
case TYPE_MESSAGE:
ref = "#/components/schemas/" + field.getMessageType().getFullName();
// Recursively add nested message schemas
if (!extractSchema(field.getMessageType(), builder, depth)) {
return false;
}
extractSchema(field.getMessageType(), builder, depth);
builder.addToHash(field.getMessageType().getFullName());
break;
case TYPE_BYTES:
Expand Down Expand Up @@ -154,22 +153,20 @@ public static boolean extractProperty(
schemaName, fieldName, array, type, description, ref, format, enumValues, extensions);
}

public static boolean extractSchema(Descriptor descriptor, SchemaBuilder builder, int depth) {
public static void extractSchema(Descriptor descriptor, SchemaBuilder builder, int depth) {
depth++;
String schemaName = descriptor.getFullName();
if (!builder.shouldExtractSchema(schemaName, depth)) {
return false;
}
// iterate fields in number order to ensure hash stability
for (FieldDescriptor field :
descriptor.getFields().stream()
.sorted(Comparator.comparingInt(FieldDescriptor::getNumber))
.collect(Collectors.toList())) {
if (!extractProperty(field, schemaName, field.getName(), builder, depth)) {
return false;
if (builder.shouldExtractSchema(schemaName, depth)) {
for (FieldDescriptor field :
descriptor.getFields().stream()
// iterate fields in number order to ensure hash stability
.sorted(Comparator.comparingInt(FieldDescriptor::getNumber))
.collect(Collectors.toList())) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the collector for?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah it was already there (this part is best viewed with "ignore whitespace changes" turned on because I changed the indentation). But indeed I think it's unnecessary, I can remove it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well it says Foreach not applicable to type 'java. util. stream. Stream<com. google. protobuf. Descriptors. FieldDescriptor>'

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess I'd have to use it with .forEach(...) but I don't know how to break out of that.
We could also say that we don't break, and if we're looking at a descriptor, we iterate over all properties even if the limit has been reached, the gain from breaking early is probably marginal.

if (!extractProperty(field, schemaName, field.getName(), builder, depth)) {
break; // we have reached the max nb of properties to extract
}
}
}
return true;
}

public static Schema extractSchemas(Descriptor descriptor) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package com.datadog.instrumentation.protobuf

import com.datadog.instrumentation.protobuf.generated.Message.MyMessage
import com.datadog.instrumentation.protobuf.generated.Message.OtherMessage
import com.datadog.instrumentation.protobuf.generated.Message.RecursiveMessage
import com.google.protobuf.InvalidProtocolBufferException
import datadog.trace.agent.test.AgentTestRunner
import datadog.trace.api.DDTags
Expand All @@ -16,17 +17,74 @@ class AbstractMessageInstrumentationTest extends AgentTestRunner {
return true
}

String expectedSchema = "{\"components\":{\"schemas\":{\"com.datadog.instrumentation.protobuf.generated.MyMessage\":{\"properties\":{\"id\":{\"extensions\":{\"x-protobuf-number\":\"1\"},\"type\":\"string\"},\"value\":{\"extensions\":{\"x-protobuf-number\":\"2\"},\"type\":\"string\"},\"other_message\":{\"extensions\":{\"x-protobuf-number\":\"3\"},\"items\":{\"\$ref\":\"#/components/schemas/com.datadog.instrumentation.protobuf.generated.OtherMessage\"},\"type\":\"array\"}},\"type\":\"object\"},\"com.datadog.instrumentation.protobuf.generated.OtherMessage\":{\"properties\":{\"name\":{\"extensions\":{\"x-protobuf-number\":\"1\"},\"type\":\"string\"},\"age\":{\"extensions\":{\"x-protobuf-number\":\"2\"},\"format\":\"int32\",\"type\":\"integer\"}},\"type\":\"object\"}}},\"openapi\":\"3.0.0\"}"
String expectedSchemaID = "4690647329509494987"
void "test extract protobuf schema on serialize & deserialize"() {

String expectedSchema = """{
"components":{
"schemas":{
"com.datadog.instrumentation.protobuf.generated.MyMessage":{
"properties":{
"id":{
"extensions":{
"x-protobuf-number":"1"
},
"type":"string"
},
"value":{
"extensions":{
"x-protobuf-number":"2"
},
"type":"string"
},
"other_message":{
"extensions":{
"x-protobuf-number":"3"
},
"items":{
"\$ref":"#/components/schemas/com.datadog.instrumentation.protobuf.generated.OtherMessage"
},
"type":"array"
},
"nested":{
"\$ref":"#/components/schemas/com.datadog.instrumentation.protobuf.generated.OtherMessage",
"extensions":{
"x-protobuf-number":"4"
}
}
},
"type":"object"
},
"com.datadog.instrumentation.protobuf.generated.OtherMessage":{
"properties":{
"name":{
"extensions":{
"x-protobuf-number":"1"
},
"type":"string"
},
"age":{
"extensions":{
"x-protobuf-number":"2"
},
"format":"int32",
"type":"integer"
}
},
"type":"object"
}
}
},
"openapi":"3.0.0"
}"""
expectedSchema = expectedSchema.replaceAll("[ \n]", "") // the spaces are just here to make it readable
String expectedSchemaID = "2792908287829424040"

void 'test extract protobuf schema on serialize & deserialize'() {
setup:
MyMessage message = MyMessage.newBuilder()
.setId("1")
.setValue("Hello from Protobuf!")
.setNested(OtherMessage.newBuilder().setName("hello").setAge(10).build())
.build()
.setId("1")
.setValue("Hello from Protobuf!")
.setNested(OtherMessage.newBuilder().setName("hello").setAge(10).build())
.build()
when:
var bytes
runUnderTrace("parent_serialize") {
Expand Down Expand Up @@ -81,12 +139,100 @@ class AbstractMessageInstrumentationTest extends AgentTestRunner {
}
}

void 'test error when de-serializing'() {
void "test extract protobuf schema with recursive message"() {
String expectedSchema = """{
"components":{
"schemas":{
"com.datadog.instrumentation.protobuf.generated.RecursiveMessage":{
"properties":{
"value":{
"extensions":{
"x-protobuf-number":"1"
},
"format":"int32",
"type":"integer"
},
"next":{
"\$ref":"#/components/schemas/com.datadog.instrumentation.protobuf.generated.RecursiveMessage",
"extensions":{
"x-protobuf-number":"2"
}
}
},
"type":"object"
}
}
},
"openapi":"3.0.0"
}"""
expectedSchema = expectedSchema.replaceAll("[ \n]", "") // the spaces are just here to make it readable
String expectedSchemaID = "8377547842972884891"

setup:
getTEST_DATA_STREAMS_MONITORING()
RecursiveMessage message = RecursiveMessage.newBuilder()
.setValue(12)
.build()
when:
byte[] bytes
runUnderTrace("parent_serialize") {
AgentSpan span = activeSpan()
span.setTag(DDTags.MANUAL_KEEP, true)
bytes = message.toByteArray()
}
runUnderTrace("parent_deserialize") {
AgentSpan span = activeSpan()
span.setTag(DDTags.MANUAL_KEEP, true)
RecursiveMessage.parseFrom(bytes)
}
TEST_WRITER.waitForTraces(2)
then:
assertTraces(2, SORT_TRACES_BY_ID) {
trace(1) {
span {
hasServiceName()
operationName "parent_serialize"
resourceName "parent_serialize"
errored false
measured false
tags {
"$DDTags.SCHEMA_DEFINITION" expectedSchema
"$DDTags.SCHEMA_WEIGHT" 1
"$DDTags.SCHEMA_TYPE" "protobuf"
"$DDTags.SCHEMA_NAME" "com.datadog.instrumentation.protobuf.generated.RecursiveMessage"
"$DDTags.SCHEMA_OPERATION" "serialization"
"$DDTags.SCHEMA_ID" expectedSchemaID
defaultTags(false)
}
}
}
trace(1) {
span {
hasServiceName()
operationName "parent_deserialize"
resourceName "parent_deserialize"
errored false
measured false
tags {
"$DDTags.SCHEMA_DEFINITION" expectedSchema
"$DDTags.SCHEMA_WEIGHT" 1
"$DDTags.SCHEMA_TYPE" "protobuf"
"$DDTags.SCHEMA_NAME" "com.datadog.instrumentation.protobuf.generated.RecursiveMessage"
"$DDTags.SCHEMA_OPERATION" "deserialization"
"$DDTags.SCHEMA_ID" expectedSchemaID
defaultTags(false)
}
}
}
}
}

void "test error when de-serializing"() {
setup:
MyMessage message = MyMessage.newBuilder()
.setId("1")
.setValue("Hello from Protobuf!")
.build()
.setId("1")
.setValue("Hello from Protobuf!")
.build()
when:
runUnderTrace("parent_deserialize") {
AgentSpan span = activeSpan()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,66 @@ class DynamicMessageInstrumentationTest extends AgentTestRunner {
.setValue("Hello from Protobuf!")
.build()
when:
String expectedSchema = "{\"components\":{\"schemas\":{\"com.datadog.instrumentation.protobuf.generated.MyMessage\":{\"properties\":{\"id\":{\"extensions\":{\"x-protobuf-number\":\"1\"},\"type\":\"string\"},\"value\":{\"extensions\":{\"x-protobuf-number\":\"2\"},\"type\":\"string\"},\"other_message\":{\"extensions\":{\"x-protobuf-number\":\"3\"},\"items\":{\"\$ref\":\"#/components/schemas/com.datadog.instrumentation.protobuf.generated.OtherMessage\"},\"type\":\"array\"}},\"type\":\"object\"},\"com.datadog.instrumentation.protobuf.generated.OtherMessage\":{\"properties\":{\"name\":{\"extensions\":{\"x-protobuf-number\":\"1\"},\"type\":\"string\"},\"age\":{\"extensions\":{\"x-protobuf-number\":\"2\"},\"format\":\"int32\",\"type\":\"integer\"}},\"type\":\"object\"}}},\"openapi\":\"3.0.0\"}"
String expectedSchemaID = "4690647329509494987"
String expectedSchema = """{
"components":{
"schemas":{
"com.datadog.instrumentation.protobuf.generated.MyMessage":{
"properties":{
"id":{
"extensions":{
"x-protobuf-number":"1"
},
"type":"string"
},
"value":{
"extensions":{
"x-protobuf-number":"2"
},
"type":"string"
},
"other_message":{
"extensions":{
"x-protobuf-number":"3"
},
"items":{
"\$ref":"#/components/schemas/com.datadog.instrumentation.protobuf.generated.OtherMessage"
},
"type":"array"
},
"nested":{
"\$ref":"#/components/schemas/com.datadog.instrumentation.protobuf.generated.OtherMessage",
"extensions":{
"x-protobuf-number":"4"
}
}
},
"type":"object"
},
"com.datadog.instrumentation.protobuf.generated.OtherMessage":{
"properties":{
"name":{
"extensions":{
"x-protobuf-number":"1"
},
"type":"string"
},
"age":{
"extensions":{
"x-protobuf-number":"2"
},
"format":"int32",
"type":"integer"
}
},
"type":"object"
}
}
},
"openapi":"3.0.0"
}"""
expectedSchema = expectedSchema.replaceAll("[ \n]", "") // the spaces are just here to make it readable
String expectedSchemaID = "2792908287829424040"

var bytes
runUnderTrace("parent_serialize") {
AgentSpan span = activeSpan()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
// run gradle target generateTestProto when you modify this

syntax = "proto3";

package com.datadog.instrumentation.protobuf.generated;
Expand All @@ -13,3 +15,8 @@ message MyMessage {
repeated OtherMessage other_message = 3;
OtherMessage nested = 4;
}

message RecursiveMessage {
int32 value = 1;
RecursiveMessage next = 2;
}
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ private void flush(long timestampNanos) {
@Override
public void clear() {
timeToBucket.clear();
schemaSamplers.clear();
}

void report() {
Expand Down
Loading