Skip to content

Commit d77f44e

Browse files
kalzooroblablavthib
authored
Support proto3 field presence (#281)
* Update protobuf pregenerated files * Update grpcio-tools to latest version * Implement proto3 field presence * Fix to_dict with None optional fields. * Add test with optional enum * Properly support optional enums * Add tests for 64-bit ints and floats * Support field presence for int64 types * Fix oneof serialization with proto3 field presence (#292) = Description The serialization of a oneof message that contains a message with fields with explicit presence was buggy. For example: ``` message A { oneof kind { B b = 1; C c = 2; } } message B {} message C { optional bool z = 1; } ``` Serializing `A(b=B())` would lead to this payload: ``` 0A # tag1, length delimited 00 # length: 0 12 # tag2, length delimited 00 # length: 0 ``` Which when deserialized, leads to the message `A(c=C())`. = Explanation The issue lies in the post_init method. All fields are introspected, and if different from PLACEHOLDER, the message is marked as having been "serialized_on_wire". Then, when serializing `A(b=B())`, we go through each field of the oneof: - field 'b': this is the selected field from the group, so it is serialized - field 'c': marked as 'serialized_on_wire', so it is added as well. = Fix The issue is that support for explicit presence changed the default value from PLACEHOLDER to None. This breaks the post_init method in that case, which is relatively easy to fix: if a field is optional, and set to None, this is considered as the default value (which it is). This fix however has a side-effect: the group_current for this field (the oneof trick for explicit presence) is no longer set. This changes the behavior when serializing the message in JSON: as the value is the default one (None), and the group is not set (which would force the serialization of the field), so None fields are no longer serialized in JSON. This break one test, and will be fixed in the next commit. * fix: do not serialize None fields in JSON format This is linked to the fix from the previous commit: after it, scalar None fields were not included in the JSON format, but some were still included. This is all cleaned up: None fields are not added in JSON by default, as they indicate the default value of fields with explicit presence. However, if `include_default_values is set, they are included. * Fix: use builtin annotation prefix * Remove comment Co-authored-by: roblabla <[email protected]> Co-authored-by: Vincent Thiberville <[email protected]>
1 parent 671c0ff commit d77f44e

17 files changed

+838
-602
lines changed

.github/workflows/ci.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,4 +66,4 @@ jobs:
6666

6767
- name: Execute test suite
6868
shell: bash
69-
run: poetry run pytest tests/
69+
run: poetry run python -m pytest tests/

poetry.lock

+114-120
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ python-dateutil = "^2.8"
2323
asv = "^0.4.2"
2424
black = "^21.11b0"
2525
bpython = "^0.19"
26-
grpcio-tools = "^1.30.0"
26+
grpcio-tools = "^1.40.0"
2727
jinja2 = "^2.11.2"
2828
mypy = "^0.770"
2929
poethepoet = ">=0.9.0"

src/betterproto/__init__.py

+101-40
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,8 @@ class FieldMetadata:
145145
group: Optional[str] = None
146146
# Describes the wrapped type (e.g. when using google.protobuf.BoolValue)
147147
wraps: Optional[str] = None
148+
# Is the field optional
149+
optional: Optional[bool] = False
148150

149151
@staticmethod
150152
def get(field: dataclasses.Field) -> "FieldMetadata":
@@ -159,12 +161,15 @@ def dataclass_field(
159161
map_types: Optional[Tuple[str, str]] = None,
160162
group: Optional[str] = None,
161163
wraps: Optional[str] = None,
164+
optional: bool = False,
162165
) -> dataclasses.Field:
163166
"""Creates a dataclass field with attached protobuf metadata."""
164167
return dataclasses.field(
165-
default=PLACEHOLDER,
168+
default=None if optional else PLACEHOLDER,
166169
metadata={
167-
"betterproto": FieldMetadata(number, proto_type, map_types, group, wraps)
170+
"betterproto": FieldMetadata(
171+
number, proto_type, map_types, group, wraps, optional
172+
)
168173
},
169174
)
170175

@@ -174,74 +179,107 @@ def dataclass_field(
174179
# out at runtime. The generated dataclass variables are still typed correctly.
175180

176181

177-
def enum_field(number: int, group: Optional[str] = None) -> Any:
178-
return dataclass_field(number, TYPE_ENUM, group=group)
182+
def enum_field(number: int, group: Optional[str] = None, optional: bool = False) -> Any:
183+
return dataclass_field(number, TYPE_ENUM, group=group, optional=optional)
179184

180185

181-
def bool_field(number: int, group: Optional[str] = None) -> Any:
182-
return dataclass_field(number, TYPE_BOOL, group=group)
186+
def bool_field(number: int, group: Optional[str] = None, optional: bool = False) -> Any:
187+
return dataclass_field(number, TYPE_BOOL, group=group, optional=optional)
183188

184189

185-
def int32_field(number: int, group: Optional[str] = None) -> Any:
186-
return dataclass_field(number, TYPE_INT32, group=group)
190+
def int32_field(
191+
number: int, group: Optional[str] = None, optional: bool = False
192+
) -> Any:
193+
return dataclass_field(number, TYPE_INT32, group=group, optional=optional)
187194

188195

189-
def int64_field(number: int, group: Optional[str] = None) -> Any:
190-
return dataclass_field(number, TYPE_INT64, group=group)
196+
def int64_field(
197+
number: int, group: Optional[str] = None, optional: bool = False
198+
) -> Any:
199+
return dataclass_field(number, TYPE_INT64, group=group, optional=optional)
191200

192201

193-
def uint32_field(number: int, group: Optional[str] = None) -> Any:
194-
return dataclass_field(number, TYPE_UINT32, group=group)
202+
def uint32_field(
203+
number: int, group: Optional[str] = None, optional: bool = False
204+
) -> Any:
205+
return dataclass_field(number, TYPE_UINT32, group=group, optional=optional)
195206

196207

197-
def uint64_field(number: int, group: Optional[str] = None) -> Any:
198-
return dataclass_field(number, TYPE_UINT64, group=group)
208+
def uint64_field(
209+
number: int, group: Optional[str] = None, optional: bool = False
210+
) -> Any:
211+
return dataclass_field(number, TYPE_UINT64, group=group, optional=optional)
199212

200213

201-
def sint32_field(number: int, group: Optional[str] = None) -> Any:
202-
return dataclass_field(number, TYPE_SINT32, group=group)
214+
def sint32_field(
215+
number: int, group: Optional[str] = None, optional: bool = False
216+
) -> Any:
217+
return dataclass_field(number, TYPE_SINT32, group=group, optional=optional)
203218

204219

205-
def sint64_field(number: int, group: Optional[str] = None) -> Any:
206-
return dataclass_field(number, TYPE_SINT64, group=group)
220+
def sint64_field(
221+
number: int, group: Optional[str] = None, optional: bool = False
222+
) -> Any:
223+
return dataclass_field(number, TYPE_SINT64, group=group, optional=optional)
207224

208225

209-
def float_field(number: int, group: Optional[str] = None) -> Any:
210-
return dataclass_field(number, TYPE_FLOAT, group=group)
226+
def float_field(
227+
number: int, group: Optional[str] = None, optional: bool = False
228+
) -> Any:
229+
return dataclass_field(number, TYPE_FLOAT, group=group, optional=optional)
211230

212231

213-
def double_field(number: int, group: Optional[str] = None) -> Any:
214-
return dataclass_field(number, TYPE_DOUBLE, group=group)
232+
def double_field(
233+
number: int, group: Optional[str] = None, optional: bool = False
234+
) -> Any:
235+
return dataclass_field(number, TYPE_DOUBLE, group=group, optional=optional)
215236

216237

217-
def fixed32_field(number: int, group: Optional[str] = None) -> Any:
218-
return dataclass_field(number, TYPE_FIXED32, group=group)
238+
def fixed32_field(
239+
number: int, group: Optional[str] = None, optional: bool = False
240+
) -> Any:
241+
return dataclass_field(number, TYPE_FIXED32, group=group, optional=optional)
219242

220243

221-
def fixed64_field(number: int, group: Optional[str] = None) -> Any:
222-
return dataclass_field(number, TYPE_FIXED64, group=group)
244+
def fixed64_field(
245+
number: int, group: Optional[str] = None, optional: bool = False
246+
) -> Any:
247+
return dataclass_field(number, TYPE_FIXED64, group=group, optional=optional)
223248

224249

225-
def sfixed32_field(number: int, group: Optional[str] = None) -> Any:
226-
return dataclass_field(number, TYPE_SFIXED32, group=group)
250+
def sfixed32_field(
251+
number: int, group: Optional[str] = None, optional: bool = False
252+
) -> Any:
253+
return dataclass_field(number, TYPE_SFIXED32, group=group, optional=optional)
227254

228255

229-
def sfixed64_field(number: int, group: Optional[str] = None) -> Any:
230-
return dataclass_field(number, TYPE_SFIXED64, group=group)
256+
def sfixed64_field(
257+
number: int, group: Optional[str] = None, optional: bool = False
258+
) -> Any:
259+
return dataclass_field(number, TYPE_SFIXED64, group=group, optional=optional)
231260

232261

233-
def string_field(number: int, group: Optional[str] = None) -> Any:
234-
return dataclass_field(number, TYPE_STRING, group=group)
262+
def string_field(
263+
number: int, group: Optional[str] = None, optional: bool = False
264+
) -> Any:
265+
return dataclass_field(number, TYPE_STRING, group=group, optional=optional)
235266

236267

237-
def bytes_field(number: int, group: Optional[str] = None) -> Any:
238-
return dataclass_field(number, TYPE_BYTES, group=group)
268+
def bytes_field(
269+
number: int, group: Optional[str] = None, optional: bool = False
270+
) -> Any:
271+
return dataclass_field(number, TYPE_BYTES, group=group, optional=optional)
239272

240273

241274
def message_field(
242-
number: int, group: Optional[str] = None, wraps: Optional[str] = None
275+
number: int,
276+
group: Optional[str] = None,
277+
wraps: Optional[str] = None,
278+
optional: bool = False,
243279
) -> Any:
244-
return dataclass_field(number, TYPE_MESSAGE, group=group, wraps=wraps)
280+
return dataclass_field(
281+
number, TYPE_MESSAGE, group=group, wraps=wraps, optional=optional
282+
)
245283

246284

247285
def map_field(
@@ -586,7 +624,8 @@ def __post_init__(self) -> None:
586624
if meta.group:
587625
group_current.setdefault(meta.group)
588626

589-
if self.__raw_get(field_name) != PLACEHOLDER:
627+
value = self.__raw_get(field_name)
628+
if value != PLACEHOLDER and not (meta.optional and value is None):
590629
# Found a non-sentinel value
591630
all_sentinel = False
592631

@@ -701,12 +740,16 @@ def __bytes__(self) -> bytes:
701740

702741
if value is None:
703742
# Optional items should be skipped. This is used for the Google
704-
# wrapper types.
743+
# wrapper types and proto3 field presence/optional fields.
705744
continue
706745

707746
# Being selected in a a group means this field is the one that is
708747
# currently set in a `oneof` group, so it must be serialized even
709748
# if the value is the default zero value.
749+
#
750+
# Note that proto3 field presence/optional fields are put in a
751+
# synthetic single-item oneof by protoc, which helps us ensure we
752+
# send the value even if the value is the default zero value.
710753
selected_in_group = (
711754
meta.group and self._group_current[meta.group] == field_name
712755
)
@@ -829,8 +872,9 @@ def _get_field_default_gen(cls, field: dataclasses.Field) -> Any:
829872
# This is some kind of list (repeated) field.
830873
return list
831874
elif t.__origin__ is Union and t.__args__[1] is type(None):
832-
# This is an optional (wrapped) field. For setting the default we
833-
# really don't care what kind of field it is.
875+
# This is an optional field (either wrapped, or using proto3
876+
# field presence). For setting the default we really don't care
877+
# what kind of field it is.
834878
return type(None)
835879
else:
836880
return t
@@ -1041,6 +1085,9 @@ def to_dict(
10411085
]
10421086
if value or include_default_values:
10431087
output[cased_name] = value
1088+
elif value is None:
1089+
if include_default_values:
1090+
output[cased_name] = value
10441091
elif (
10451092
value._serialized_on_wire
10461093
or include_default_values
@@ -1066,13 +1113,18 @@ def to_dict(
10661113
if meta.proto_type in INT_64_TYPES:
10671114
if field_is_repeated:
10681115
output[cased_name] = [str(n) for n in value]
1116+
elif value is None:
1117+
if include_default_values:
1118+
output[cased_name] = value
10691119
else:
10701120
output[cased_name] = str(value)
10711121
elif meta.proto_type == TYPE_BYTES:
10721122
if field_is_repeated:
10731123
output[cased_name] = [
10741124
b64encode(b).decode("utf8") for b in value
10751125
]
1126+
elif value is None and include_default_values:
1127+
output[cased_name] = value
10761128
else:
10771129
output[cased_name] = b64encode(value).decode("utf8")
10781130
elif meta.proto_type == TYPE_ENUM:
@@ -1085,6 +1137,12 @@ def to_dict(
10851137
else:
10861138
# transparently upgrade single value to repeated
10871139
output[cased_name] = [enum_class(value).name]
1140+
elif value is None:
1141+
if include_default_values:
1142+
output[cased_name] = value
1143+
elif meta.optional:
1144+
enum_class = field_types[field_name].__args__[0]
1145+
output[cased_name] = enum_class(value).name
10881146
else:
10891147
enum_class = field_types[field_name] # noqa
10901148
output[cased_name] = enum_class(value).name
@@ -1141,6 +1199,9 @@ def from_dict(self: T, value: Dict[str, Any]) -> T:
11411199
setattr(self, field_name, v)
11421200
elif meta.wraps:
11431201
setattr(self, field_name, value[key])
1202+
elif v is None:
1203+
cls = self._betterproto.cls_by_field[field_name]
1204+
setattr(self, field_name, cls().from_dict(value[key]))
11441205
else:
11451206
# NOTE: `from_dict` mutates the underlying message, so no
11461207
# assignment here is necessary.

0 commit comments

Comments
 (0)