[quant] Set sparse to False for embedding_bag ops in graph mode (#45997) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/45997 The current sparse field using in the float module is for sparse gradients, which is not applicable to inference. The sparse field in the quantizd ops denotes pruned weights. Test Plan: python test/test_quantization.py TestQuantizeDynamicJitOps.test_embedding_bag Imported from OSS Reviewed By: qizzzh Differential Revision: D24176543 fbshipit-source-id: a05b4ff949e0375462ae411947f68076e1b460d2

commit: 8c80ee8ba576de9531443610f05f2504b50415e9 [log] [tgz]
author: Supriya Rao <supriyar@fb.com> Thu Oct 08 16:11:18 2020 -0700
committer: Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com> Thu Oct 08 16:13:12 2020 -0700
tree: 52add65dff30946151ac6ee3800a3aa4bd928117
parent: 0cf0b5f2e8d8029558c1f8b765ddd498661c1ed7 [diff]
diff --git a/test/quantization/test_quantize_jit.py b/test/quantization/test_quantize_jit.py
index a0fad9b..44bbf11 100644
--- a/test/quantization/test_quantize_jit.py
+++ b/test/quantization/test_quantize_jit.py

@@ -3039,14 +3039,14 @@
                 self.embedding1 = torch.nn.EmbeddingBag(num_embeddings=10,
                                                         embedding_dim=12,
                                                         include_last_offset=True,
-                                                        sparse=False,
+                                                        sparse=True,
                                                         _weight=weights,
                                                         mode='sum')
 
                 self.embedding2 = torch.nn.EmbeddingBag(num_embeddings=10,
                                                         embedding_dim=12,
                                                         include_last_offset=True,
-                                                        sparse=False,
+                                                        sparse=True,
                                                         _weight=weights,
                                                         mode='sum')
 
@@ -3077,6 +3077,7 @@
             FileCheck().check("quantized::embedding_bag_4bit_rowwise_offsets") \
                        .check_next("quantized::embedding_bag_byte_rowwise_offsets") \
                        .run(m.graph)
+            m(*dummy_inputs)
 
 
 

diff --git a/torch/csrc/jit/passes/quantization/insert_quant_dequant.cpp b/torch/csrc/jit/passes/quantization/insert_quant_dequant.cpp
index fd49cf6..0a71b5e 100644
--- a/torch/csrc/jit/passes/quantization/insert_quant_dequant.cpp
+++ b/torch/csrc/jit/passes/quantization/insert_quant_dequant.cpp

@@ -330,6 +330,8 @@
   // Create and insert quantized embedding op.
   Value* none = g->insertConstant(IValue());
   Value* zero = g->insertConstant(IValue(0));
+  bool sparse_param = false;
+  auto sparse_const = g->insertConstant(sparse_param);
 
   if (is_aten_op) {
     TORCH_CHECK(
@@ -340,6 +342,10 @@
     for (auto i = 1; i < inputs_size - 1; ++i) {
       qembedding_bag_inputs.push_back(embedding_bag_inputs[i]);
     }
+    // The sparse field in the float operator denotes sparse gradients.
+    // For inference this stands for pruned weights. We currently don't support
+    // pruning in graph mode API so we set the field to 0 for inference.
+    qembedding_bag_inputs[5] = sparse_const;
   } else {
     TORCH_CHECK(
         inputs_size == 11,
@@ -348,8 +354,8 @@
     qembedding_bag_inputs.push_back(embedding_bag_inputs[3]); // offsets
     qembedding_bag_inputs.push_back(
         embedding_bag_inputs[6]); // scale_grad_by_freq
-    qembedding_bag_inputs.push_back(zero); // zero
-    qembedding_bag_inputs.push_back(embedding_bag_inputs[8]); // sparse
+    qembedding_bag_inputs.push_back(zero); // mode
+    qembedding_bag_inputs.push_back(sparse_const); // pruned_weights
     qembedding_bag_inputs.push_back(
         embedding_bag_inputs[9]); // per_sample_weights
   }
commit	8c80ee8ba576de9531443610f05f2504b50415e9	[log] [tgz]
author	Supriya Rao <supriyar@fb.com>	Thu Oct 08 16:11:18 2020 -0700
committer	Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>	Thu Oct 08 16:13:12 2020 -0700
tree	52add65dff30946151ac6ee3800a3aa4bd928117
parent	0cf0b5f2e8d8029558c1f8b765ddd498661c1ed7 [diff]