Skip to content

Commit a77ee4a

Browse files
adrianlizarragarachguo
authored and
rachguo
committed
Add contrib Q/DQ ops to symbolic shape inference tool (#19340)
### Description Adds type/shape inferencing support for MSFT domain QuantizeLinear and DequantizeLinear operators to symbolic_shape_infer.py ### Motivation and Context Need a way to infer the types and shapes of Q/DQ ops in models that use the MSFT domain versions (e.g., int16 quantization).
1 parent ad63507 commit a77ee4a

File tree

2 files changed

+229
-0
lines changed

2 files changed

+229
-0
lines changed

onnxruntime/python/tools/symbolic_shape_infer.py

+27
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ def __init__(self, int_max, auto_merge, guess_output_rank, verbose, prefix=""):
197197
"BiasGelu": self._infer_BiasGelu,
198198
"BiasSplitGelu": self._infer_BiasSplitGelu,
199199
"DecoderMaskedMultiHeadAttention": self._infer_DecoderMaskedMultiHeadAttention,
200+
"DequantizeLinear": self._infer_DequantizeLinear,
200201
"EmbedLayerNormalization": self._infer_EmbedLayerNormalization,
201202
"FastGelu": self._infer_FastGelu,
202203
"GatedRelativePositionBias": self._infer_GatedRelativePositionBias,
@@ -212,6 +213,7 @@ def __init__(self, int_max, auto_merge, guess_output_rank, verbose, prefix=""):
212213
"PackedAttention": self._infer_PackedAttention,
213214
"PackedMultiHeadAttention": self._infer_PackedMultiHeadAttention,
214215
"PythonOp": self._infer_PythonOp,
216+
"QuantizeLinear": self._infer_QuantizeLinear,
215217
"QuickGelu": self._infer_FastGelu,
216218
"RelativePositionBias": self._infer_RelativePositionBias,
217219
"RemovePadding": self._infer_RemovePadding,
@@ -457,6 +459,8 @@ def _onnx_infer_single_node(self, node):
457459
"GemmFastGelu",
458460
"LayerNormalization",
459461
"LongformerAttention",
462+
"DequantizeLinear",
463+
"QuantizeLinear",
460464
"RelativePositionBias",
461465
"RemovePadding",
462466
"RestorePadding",
@@ -979,6 +983,29 @@ def _infer_NhwcConv(self, node): # noqa: N802
979983
)
980984
)
981985

986+
def _infer_DequantizeLinear(self, node): # noqa: N802
987+
# Get the output data type from the scale input (index 1, required).
988+
output_dtype = self.known_vi_[node.input[1]].type.tensor_type.elem_type
989+
990+
# Get the output shape from the first input.
991+
output_shape = self._get_shape(node, 0)
992+
993+
vi = self.known_vi_[node.output[0]]
994+
vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, output_shape))
995+
996+
def _infer_QuantizeLinear(self, node): # noqa: N802
997+
# Get the output data type from the zero-point input (index 2, optional).
998+
# Otherwise, default to uint8
999+
output_dtype = onnx.TensorProto.UINT8
1000+
if len(node.input) > 2 and node.input[2]:
1001+
output_dtype = self.known_vi_[node.input[2]].type.tensor_type.elem_type
1002+
1003+
# Get the output shape from the first input.
1004+
output_shape = self._get_shape(node, 0)
1005+
1006+
vi = self.known_vi_[node.output[0]]
1007+
vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, output_shape))
1008+
9821009
def _infer_Einsum(self, node): # noqa: N802
9831010
# ref:https://github.com/onnx/onnx/blob/623dfaa0151b2e4ce49779c3ec31cbd78c592b80/onnx/defs/math/defs.cc#L3275
9841011
equation = get_attribute(node, "equation")

onnxruntime/test/python/onnxruntime_test_python_symbolic_shape_infer.py

+202
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,208 @@ def test_div_precision(self):
392392
self.assertEqual(len(output_dims), 1)
393393
self.assertEqual(output_dims[0].dim_value, 512)
394394

395+
def test_quantize_linear(self):
396+
"""
397+
Test ONNX QuantizeLinear op.
398+
Check that the output shape is propagated from the first input and that the output data
399+
type comes from the zero-point input.
400+
"""
401+
initializers = [
402+
helper.make_tensor(
403+
"scale",
404+
TensorProto.FLOAT,
405+
[],
406+
[1.0],
407+
),
408+
helper.make_tensor(
409+
"zero_point",
410+
TensorProto.INT8,
411+
[],
412+
[16],
413+
),
414+
]
415+
416+
nodes = [
417+
helper.make_node(
418+
"QuantizeLinear",
419+
inputs=[
420+
"input_f32",
421+
"scale",
422+
"zero_point",
423+
],
424+
outputs=["output_s8"],
425+
),
426+
]
427+
428+
inputs = [
429+
helper.make_tensor_value_info("input_f32", TensorProto.FLOAT, ["b", 2, 3, 4]),
430+
]
431+
432+
outputs = [
433+
helper.make_tensor_value_info("output_s8", TensorProto.UNDEFINED, None),
434+
]
435+
436+
graph = helper.make_graph(nodes, "QuantizeLinear_Test", inputs, outputs, initializers)
437+
model = helper.make_model(graph)
438+
439+
inferred = SymbolicShapeInference.infer_shapes(model, auto_merge=True)
440+
441+
expected_shapes = [
442+
helper.make_tensor_value_info("output_s8", TensorProto.INT8, ["b", 2, 3, 4]),
443+
]
444+
self._check_shapes(graph, inferred.graph, expected_shapes)
445+
446+
def test_quantize_linear_ms_domain(self):
447+
"""
448+
Test QuantizeLinear op ('com.microsoft' domain).
449+
Check that the output shape is propagated from the first input and that the output data
450+
type comes from the zero-point input.
451+
"""
452+
initializers = [
453+
helper.make_tensor(
454+
"scale",
455+
TensorProto.FLOAT,
456+
[],
457+
[1.0],
458+
),
459+
helper.make_tensor(
460+
"zero_point",
461+
TensorProto.UINT16,
462+
[],
463+
[16],
464+
),
465+
]
466+
467+
nodes = [
468+
helper.make_node(
469+
"QuantizeLinear",
470+
inputs=[
471+
"input_f32",
472+
"scale",
473+
"zero_point",
474+
],
475+
outputs=["output_u16"],
476+
domain="com.microsoft",
477+
),
478+
]
479+
480+
inputs = [
481+
helper.make_tensor_value_info("input_f32", TensorProto.FLOAT, ["b", 2, 3, 4]),
482+
]
483+
484+
outputs = [
485+
helper.make_tensor_value_info("output_u16", TensorProto.UNDEFINED, None),
486+
]
487+
488+
graph = helper.make_graph(nodes, "QuantizeLinear_MSDomain_Test", inputs, outputs, initializers)
489+
model = helper.make_model(graph)
490+
491+
inferred = SymbolicShapeInference.infer_shapes(model, auto_merge=True)
492+
493+
expected_shapes = [
494+
helper.make_tensor_value_info("output_u16", TensorProto.UINT16, ["b", 2, 3, 4]),
495+
]
496+
self._check_shapes(graph, inferred.graph, expected_shapes)
497+
498+
def test_quantize_linear_no_zp_input(self):
499+
"""
500+
Test QuantizeLinear op ('com.microsoft' domain).
501+
Check that the output shape is propagated from the first input.
502+
The zero-point input is missing, so the output data type should default to uint8.
503+
"""
504+
initializers = [
505+
helper.make_tensor(
506+
"scale",
507+
TensorProto.FLOAT,
508+
[],
509+
[1.0],
510+
),
511+
]
512+
513+
nodes = [
514+
helper.make_node(
515+
"QuantizeLinear",
516+
inputs=[
517+
"input_f32",
518+
"scale",
519+
],
520+
outputs=["output_u8"],
521+
domain="com.microsoft",
522+
),
523+
]
524+
525+
inputs = [
526+
helper.make_tensor_value_info("input_f32", TensorProto.FLOAT, ["b", 2, 3, 4]),
527+
]
528+
529+
outputs = [
530+
helper.make_tensor_value_info("output_u8", TensorProto.UNDEFINED, None),
531+
]
532+
533+
graph = helper.make_graph(nodes, "QuantizeLinear_NoZP_Test", inputs, outputs, initializers)
534+
model = helper.make_model(graph)
535+
536+
inferred = SymbolicShapeInference.infer_shapes(model, auto_merge=True)
537+
538+
# Check that the output shape is propagated from the first input and that the
539+
# output data type comes from the zero-point input.
540+
expected_shapes = [
541+
helper.make_tensor_value_info("output_u8", TensorProto.UINT8, ["b", 2, 3, 4]),
542+
]
543+
self._check_shapes(graph, inferred.graph, expected_shapes)
544+
545+
def test_dequantize_linear_ms_domain(self):
546+
"""
547+
Test DequantizeLinear operator ('com.microsoft' domain).
548+
Check that the output shape is propagated from the first input and that the output data
549+
type comes from the scale input.
550+
"""
551+
initializers = [
552+
helper.make_tensor(
553+
"scale",
554+
TensorProto.FLOAT,
555+
[],
556+
[1.0],
557+
),
558+
helper.make_tensor(
559+
"zero_point",
560+
TensorProto.UINT16,
561+
[],
562+
[16],
563+
),
564+
]
565+
566+
nodes = [
567+
helper.make_node(
568+
"DequantizeLinear",
569+
inputs=[
570+
"input_u16",
571+
"scale",
572+
"zero_point",
573+
],
574+
outputs=["output_f32"],
575+
domain="com.microsoft",
576+
),
577+
]
578+
579+
inputs = [
580+
helper.make_tensor_value_info("input_u16", TensorProto.UINT16, ["b", 2, 3, 4]),
581+
]
582+
583+
outputs = [
584+
helper.make_tensor_value_info("output_f32", TensorProto.UNDEFINED, None),
585+
]
586+
587+
graph = helper.make_graph(nodes, "DequantizeLinear_MSDomain_Test", inputs, outputs, initializers)
588+
model = helper.make_model(graph)
589+
590+
inferred = SymbolicShapeInference.infer_shapes(model, auto_merge=True)
591+
592+
expected_shapes = [
593+
helper.make_tensor_value_info("output_f32", TensorProto.FLOAT, ["b", 2, 3, 4]),
594+
]
595+
self._check_shapes(graph, inferred.graph, expected_shapes)
596+
395597

396598
class TestSymbolicShapeInferenceForSlice(unittest.TestCase):
397599
def check_slice_of_concat(self, input_dims, start, end, step, expected_output_dim):

0 commit comments

Comments
 (0)