Skip to content

Commit 2cb61a6

Browse files
adrianlizarragaankitm3k
authored andcommitted
[QDQ Optimizer] Update WeightBiasQuantization to skip Conv/Gemm if downstream node is not QuantizeLinear (microsoft#24537)
### Description Updates the WeightBiasQuantization optimizer to skip processing on Conv/Gemm nodes if the downstream child node is not a QuantizeLinear. #### Before this PR Original graph: ``` input_0 -> DQ -> Conv -> graph_output (or non-Q node) ^ ^ | | weights_f32------+ | bias_f32------------+ ``` Becomes: ``` input_0 -> DQ ------> Conv -> graph_output (or non-Q node) ^ ^ | | weights_quant -> DQ --+ | bias_quant -> DQ --------+ ``` The above is **NOT** a valid QDQ node unit for Conv because the Conv's output is not consumed by a QuantizeLinear node. #### With this PR The above example graph remains unchanged after L1 optimizations: ``` input_0 -> DQ -> Conv -> graph_output (or non-Q node) ^ ^ | | weights_f32------+ | bias_f32------------+ ``` ### Motivation and Context Caused inaccuracy for a customer model. Automatically quantizing the weights and biases of a Conv/Gemm is detrimental if the output of the Conv/Gemm is not consumed by a QuantizeLinear node. In this scenario, the whole node group is not considered a valid QDQ node unit, and so the EP has to run the Conv/Gemm as float32/float16 anyway. If the Conv/Gemm is running as float32/float16, then quantizing the weights and biases introduces inaccuracy for no gain. PR that originally added this optimizer: microsoft#22969
1 parent 9bc471c commit 2cb61a6

File tree

2 files changed

+64
-0
lines changed

2 files changed

+64
-0
lines changed

onnxruntime/core/optimizer/qdq_transformer/weight_bias_quantization.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,14 @@ Status WeightBiasQuantization::ApplyImpl(Graph& graph, bool& modified, int graph
4343
continue;
4444
}
4545

46+
// Require that the node's output is consumed by a single QuantizeLinear node.
47+
// Otherwise, if only the inputs are quantized, but not the output, then this node group would not
48+
// be considered a QDQ node unit anyway.
49+
std::vector<const Node*> children_nodes = graph.GetConsumerNodes(node.OutputDefs()[0]->Name());
50+
if (children_nodes.size() != 1 || children_nodes[0]->OpType() != QDQ::QOpName) {
51+
continue;
52+
}
53+
4654
Node& dq_0 = *graph.GetNode(parent_node_0->Index());
4755
Node* dq_1 = nullptr;
4856
const ONNX_NAMESPACE::TensorProto* weight_proto = nullptr;

onnxruntime/test/optimizer/qdq_transformer_test.cc

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5349,6 +5349,62 @@ TEST(QDQTransformerTests, WeightBiasQuantization_Conv_Weight_Bias) {
53495349
#endif
53505350
}
53515351

5352+
// Tests that the WeightBiasQuantization optimizer does not process nodes that do not
5353+
// already have an output that is consumed by a single QuantizeLinear node.
5354+
TEST(QDQTransformerTests, WeightBiasQuantization_SkipIfOutputNotQuantized) {
5355+
auto test_case = [](bool add_final_reshape) {
5356+
auto build_test_case = [&](ModelTestBuilder& builder) {
5357+
NodeArg* input_arg = builder.MakeInput<uint8_t>({1, 24, 67, 67}, std::numeric_limits<uint8_t>::min(),
5358+
std::numeric_limits<uint8_t>::max());
5359+
NodeArg* weight_arg = builder.MakeInitializer<float>({24, 1, 5, 5}, -0.1f, 0.1f);
5360+
NodeArg* bias_arg = builder.MakeInitializer<float>({24}, -0.1f, 0.1f);
5361+
NodeArg* input_dq_arg = builder.MakeIntermediate();
5362+
NodeArg* conv_output_arg = add_final_reshape ? builder.MakeIntermediate() : builder.MakeOutput();
5363+
5364+
builder.AddDequantizeLinearNode<uint8_t>(input_arg, 0.014f, static_cast<uint8_t>(127), input_dq_arg);
5365+
auto& conv_node = builder.AddNode("Conv", {input_dq_arg, weight_arg, bias_arg}, {conv_output_arg});
5366+
conv_node.AddAttribute("dilations", std::vector<int64_t>{1, 1});
5367+
conv_node.AddAttribute("kernel_shape", std::vector<int64_t>{5, 5});
5368+
conv_node.AddAttribute("strides", std::vector<int64_t>{2, 2});
5369+
conv_node.AddAttribute("group", static_cast<int64_t>(24));
5370+
conv_node.AddAttribute("pads", std::vector<int64_t>{0, 0, 0, 0});
5371+
5372+
// Make adding a final Reshape node configurable to test two cases:
5373+
// - Conv produces a graph output
5374+
// - Conv output is consumed by some node that is NOT a QuantizeLinear
5375+
// In either case, the WeightBiasQuantization optimizer should skip this node.
5376+
if (add_final_reshape) {
5377+
NodeArg* reshape_output_arg = builder.MakeOutput();
5378+
NodeArg* new_shape_arg = builder.Make1DInitializer<int64_t>({1, -1});
5379+
builder.AddNode("Reshape", {conv_output_arg, new_shape_arg}, {reshape_output_arg});
5380+
}
5381+
};
5382+
5383+
auto check_graph = [add_final_reshape](InferenceSessionWrapper& session) {
5384+
auto op_to_count = CountOpsInGraph(session.GetGraph());
5385+
const QDQOpKeys qdq_keys = GetQDQOpKeys(false);
5386+
5387+
// Should retain the same nodes in the original graph.
5388+
EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 1);
5389+
EXPECT_EQ(op_to_count["Conv"], 1);
5390+
EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], 0);
5391+
EXPECT_EQ(op_to_count["Reshape"], static_cast<int>(add_final_reshape));
5392+
};
5393+
5394+
TransformerTester(build_test_case,
5395+
check_graph,
5396+
TransformerLevel::Default,
5397+
TransformerLevel::Level1,
5398+
21,
5399+
/*per_sample_tolerance*/ 0.0,
5400+
/*relative_per_sample_tolerance*/ 0.0,
5401+
std::make_unique<WeightBiasQuantization>());
5402+
};
5403+
5404+
test_case(false); // Conv produces a graph output directly
5405+
test_case(true); // Conv -> Reshape -> graph_output
5406+
}
5407+
53525408
TEST(QDQTransformerTests, WeightBiasQuantization_ConvTranspose_Weight) {
53535409
auto test_case = [](bool use_contrib_qdq) {
53545410
auto build_test_case = [&](ModelTestBuilder& builder) {

0 commit comments

Comments
 (0)