quantized w2l (#1623)

Summary:
Pull Request resolved: https://github.com/pytorch/executorch/pull/1623

With the features add prior to this diff, Quantized W2L should successfully work now

Reviewed By: digantdesai

Differential Revision: D52809537

fbshipit-source-id: 79e5bee58e0708a2f50008b074736c47e8028d99
diff --git a/backends/xnnpack/test/models/w2l.py b/backends/xnnpack/test/models/w2l.py
index 607ee4b..10d7ca1 100644
--- a/backends/xnnpack/test/models/w2l.py
+++ b/backends/xnnpack/test/models/w2l.py
@@ -15,7 +15,7 @@
     batch_size = 10
     input_frames = 700
     vocab_size = 4096
-    wav2letter = models.Wav2Letter(num_classes=vocab_size)
+    wav2letter = models.Wav2Letter(num_classes=vocab_size).eval()
 
     model_inputs = (torch.randn(batch_size, 1, input_frames),)
 
@@ -37,3 +37,23 @@
             .run_method()
             .compare_outputs()
         )
+
+    def test_qs8_w2l(self):
+        (
+            Tester(self.wav2letter.eval(), self.model_inputs)
+            .quantize()
+            .export()
+            .to_edge()
+            .partition()
+            .check_not(
+                [
+                    "executorch_exir_dialectes_edge__ops_aten_convolution_default",
+                    "executorch_exir_dialects_edge__ops_aten_relu_default",
+                ]
+            )
+            .check(["torch.ops.higher_order.executorch_call_delegate"])
+            .to_executorch()
+            .serialize()
+            .run_method()
+            .compare_outputs()
+        )