Skip to content

Commit 35e4296

Browse files
adrianlizarragayf711
authored andcommitted
[Quant Tool] Add reduce_range option to get_qdq_config() (#22782)
### Description Adds `reduce_range` option to `get_qdq_config()` ### Motivation and Context Make it easier to set this option when calling get_qdq_config(). Otherwise, user has to set the option manually.
1 parent d1ada5e commit 35e4296

File tree

2 files changed

+7
-1
lines changed

2 files changed

+7
-1
lines changed

onnxruntime/python/tools/quantization/quantize.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ def get_qdq_config(
231231
activation_symmetric: bool = False,
232232
weight_symmetric: bool | None = None,
233233
per_channel: bool = False,
234+
reduce_range: bool = False,
234235
keep_removable_activations: bool = False,
235236
min_real_range: float | None = None,
236237
tensor_quant_overrides: dict[str, list[dict[str, Any]]] | None = None,
@@ -245,7 +246,7 @@ def get_qdq_config(
245246
calibration_data_reader: Calibration data reader.
246247
calibrate_methode: The calibration method. Defaults to MinMax.
247248
activation_type: The default activation quantization type. Defaults to QUInt8.
248-
weight_type: The default weight quantization type. Defaults to QUInt8.
249+
weight_type: The default weight quantization type. Defaults to QInt8.
249250
activation_symmetric: True if activations should be quantized symmetrically (i.e, rmax == -rmin) by default.
250251
Defaults to false. For int8 and int16, this results in zero-point values of 0. For uint8 and uint16,
251252
the zero-point values are 127 and 32,767, respectively.
@@ -254,6 +255,8 @@ def get_qdq_config(
254255
per_channel: Global option that determines if a fixed set of operator types should be quantized per-channel.
255256
Defaults to false. Alternatively, use the tensor-level `tensor_quant_overrides` to select individual operators
256257
and their quantization axes.
258+
reduce_range: quantize weights with 1 less bit of precision (e.g., 7 bits for QInt8). Defaults to false.
259+
May improve the accuracy for some models running on non-VNNI machine, especially for per-channel mode.
257260
keep_removable_activations: Defaults to false. If true, "removable" activations (e.g., Clip or Relu) will not
258261
be removed, and will be explicitly represented in the QDQ model. If false, these activations
259262
are automatically removed if activations are asymmetrically quantized. Keeping these activations
@@ -373,6 +376,7 @@ def get_qdq_config(
373376
op_types_to_quantize=list(op_types.difference(op_types_to_exclude)),
374377
nodes_to_exclude=final_nodes_to_exclude,
375378
per_channel=per_channel,
379+
reduce_range=reduce_range,
376380
use_external_data_format=(model_has_external_data or model.ByteSize() >= MODEL_SIZE_THRESHOLD),
377381
extra_options=final_extra_options,
378382
)

onnxruntime/test/python/quantization/test_get_qdq_config.py

+2
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ def test_basic_args(self):
9393
activation_type=QuantType.QUInt16,
9494
weight_type=QuantType.QInt16,
9595
per_channel=True,
96+
reduce_range=True,
9697
nodes_to_exclude=["Mul"],
9798
# Other options converted to extra_options:
9899
min_real_range=0.0001,
@@ -104,6 +105,7 @@ def test_basic_args(self):
104105
self.assertEqual(qdq_config.activation_type, QuantType.QUInt16)
105106
self.assertEqual(qdq_config.weight_type, QuantType.QInt16)
106107
self.assertTrue(qdq_config.per_channel)
108+
self.assertTrue(qdq_config.reduce_range)
107109
self.assertEqual(set(qdq_config.nodes_to_exclude), {"Mul"})
108110
self.assertEqual(set(qdq_config.op_types_to_quantize), {"Add"})
109111

0 commit comments

Comments
 (0)