@@ -231,6 +231,7 @@ def get_qdq_config(
231
231
activation_symmetric : bool = False ,
232
232
weight_symmetric : bool | None = None ,
233
233
per_channel : bool = False ,
234
+ reduce_range : bool = False ,
234
235
keep_removable_activations : bool = False ,
235
236
min_real_range : float | None = None ,
236
237
tensor_quant_overrides : dict [str , list [dict [str , Any ]]] | None = None ,
@@ -245,7 +246,7 @@ def get_qdq_config(
245
246
calibration_data_reader: Calibration data reader.
246
247
calibrate_methode: The calibration method. Defaults to MinMax.
247
248
activation_type: The default activation quantization type. Defaults to QUInt8.
248
- weight_type: The default weight quantization type. Defaults to QUInt8 .
249
+ weight_type: The default weight quantization type. Defaults to QInt8 .
249
250
activation_symmetric: True if activations should be quantized symmetrically (i.e, rmax == -rmin) by default.
250
251
Defaults to false. For int8 and int16, this results in zero-point values of 0. For uint8 and uint16,
251
252
the zero-point values are 127 and 32,767, respectively.
@@ -254,6 +255,8 @@ def get_qdq_config(
254
255
per_channel: Global option that determines if a fixed set of operator types should be quantized per-channel.
255
256
Defaults to false. Alternatively, use the tensor-level `tensor_quant_overrides` to select individual operators
256
257
and their quantization axes.
258
+ reduce_range: quantize weights with 1 less bit of precision (e.g., 7 bits for QInt8). Defaults to false.
259
+ May improve the accuracy for some models running on non-VNNI machine, especially for per-channel mode.
257
260
keep_removable_activations: Defaults to false. If true, "removable" activations (e.g., Clip or Relu) will not
258
261
be removed, and will be explicitly represented in the QDQ model. If false, these activations
259
262
are automatically removed if activations are asymmetrically quantized. Keeping these activations
@@ -373,6 +376,7 @@ def get_qdq_config(
373
376
op_types_to_quantize = list (op_types .difference (op_types_to_exclude )),
374
377
nodes_to_exclude = final_nodes_to_exclude ,
375
378
per_channel = per_channel ,
379
+ reduce_range = reduce_range ,
376
380
use_external_data_format = (model_has_external_data or model .ByteSize () >= MODEL_SIZE_THRESHOLD ),
377
381
extra_options = final_extra_options ,
378
382
)
0 commit comments