backends/qualcomm/tests/test_qnn_delegate.py - platform/external/executorch - Git at Google

 # Copyright (c) Qualcomm Innovation Center, Inc.
 # All rights reserved
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 import json
 import subprocess
 import sys
 import unittest
 from multiprocessing.connection import Listener

 import torch
 from executorch.backends.qualcomm.tests.utils import TestQNN

 from executorch.backends.qualcomm.utils.utils import (
     generate_qnn_executorch_compiler_spec,
 )

 from executorch.examples.qualcomm.scripts.utils import setup_common_args_and_variables

 from executorch.backends.qualcomm.tests.models import *  # noqa: F403

 from executorch.examples.models.deeplab_v3 import DeepLabV3ResNet101Model
 from executorch.examples.models.edsr import EdsrModel
 from executorch.examples.models.inception_v3 import InceptionV3Model
 from executorch.examples.models.inception_v4 import InceptionV4Model
 from executorch.examples.models.mobilebert import MobileBertModelExample
 from executorch.examples.models.mobilenet_v2 import MV2Model
 from executorch.examples.models.mobilenet_v3 import MV3Model
 from executorch.examples.models.torchvision_vit.model import TorchVisionViTModel
 from executorch.examples.qualcomm.scripts.edsr import annotate_forward
 from executorch.exir.backend.backend_api import disable_validation


 class TestQNNFloatingPointOperator(TestQNN):
     def setUp(self):
         TestQNN.atol = 1e-1
         TestQNN.rtol = 1e-1
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
             is_fp16=True,
             soc_model=self.arch_table[TestQNN.model],
             debug=False,
             saver=False,
         )

     def test_qnn_backend_arange(self):
         module = Arange(5)  # noqa: F405
         sample_input = (torch.randn(5),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_avg_pool2d(self):
         module = AvgPoolModule()  # noqa: F405
         sample_input = (torch.randn(1, 3, 2, 2),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_bmm(self):
         module = Bmm()  # noqa: F405
         sample_input = (torch.randn([4, 8, 32]), torch.randn([4, 32, 8]))
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_cast(self):
         module = Cast()  # noqa: F405
         sample_input = (10 * torch.rand((9, 4, 5, 3)),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_cat(self):
         modules = [Cat2(), Cat3(), Cat4()]  # noqa: F405
         sample_input = (torch.randn(1, 1, 2, 2), torch.randn(1, 1, 4, 2))
         for i, module in enumerate(modules):
             with self.subTest(i=i):
                 self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_clamp(self):
         module = Clamp()  # noqa: F405
         sample_input = (torch.randn((9, 4, 5, 3)),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_conv2d(self):
         module = ConvSequential()  # noqa: F405
         sample_input = (torch.randn([1, 1, 3, 3]),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_element_wise_add(self):
         test_comb = [
             {
                 "module": [Add()],  # noqa: F405
                 "sample_inputs": [
                     (torch.randn(2, 5, 1, 3), torch.randn(2, 5, 1, 3)),
                     (torch.randn([2, 5, 1, 3]), torch.randn([4, 1])),
                 ],
             },
             {
                 "module": [AddConstantFloat()],  # noqa: F405
                 "sample_inputs": [(torch.randn(2, 5, 1, 3),)],
             },
         ]

         index = 0
         for comb in test_comb:
             for module in comb["module"]:
                 for sample_input in comb["sample_inputs"]:
                     with self.subTest(i=index):
                         self.lower_module_and_test_output(module, sample_input)
                         index += 1

     def test_qnn_backend_element_wise_ceil(self):
         module = Ceil()  # noqa: F405
         sample_input = (torch.randn([2, 5, 1, 3]),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_element_wise_div(self):
         test_comb = [
             {
                 "module": [Div()],  # noqa: F405
                 "sample_inputs": [
                     (torch.randn(2, 5, 1, 3), torch.randn(2, 5, 1, 3)),
                     (torch.randn([2, 5, 1, 3]), torch.randn([4, 1])),
                 ],
             },
             {
                 "module": [DivConstantFloat()],  # noqa: F405
                 "sample_inputs": [(torch.randn(2, 5, 1, 3),)],
             },
         ]

         index = 0
         for comb in test_comb:
             for module in comb["module"]:
                 for sample_input in comb["sample_inputs"]:
                     with self.subTest(i=index):
                         self.lower_module_and_test_output(module, sample_input)
                         index += 1

     def test_qnn_backend_element_wise_mul(self):
         test_comb = [
             {
                 "module": [Mul()],  # noqa: F405
                 "sample_inputs": [
                     (torch.randn(2, 5, 1, 3), torch.randn(2, 5, 1, 3)),
                     (torch.randn([2, 5, 1, 3]), torch.randn([4, 1])),
                 ],
             },
             {
                 "module": [MulConstantFloat()],  # noqa: F405
                 "sample_inputs": [(torch.randn(2, 5, 1, 3),)],
             },
             {
                 "module": [MulScalar()],  # noqa: F405
                 "sample_inputs": [(torch.randn(2, 5, 1, 3),)],
             },
         ]

         index = 0
         for comb in test_comb:
             for module in comb["module"]:
                 for sample_input in comb["sample_inputs"]:
                     with self.subTest(i=index):
                         self.lower_module_and_test_output(module, sample_input)
                         index += 1

     @unittest.skip("not yet implemented")
     def test_qnn_backend_element_wise_sqrt(self):
         modules = [Sqrt(), SqrtConstant()]  # noqa: F405
         sample_input = (torch.randn([3, 1]),)
         for i, module in enumerate(modules):
             with self.subTest(i=i):
                 self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_element_wise_sub(self):
         test_comb = [
             {
                 "module": [Sub()],  # noqa: F405
                 "sample_inputs": [
                     (torch.randn(2, 5, 1, 3), torch.randn(2, 5, 1, 3)),
                     (torch.randn([2, 5, 1, 3]), torch.randn([4, 1])),
                 ],
             },
             {
                 "module": [SubConstantFloat()],  # noqa: F405
                 "sample_inputs": [(torch.randn(2, 5, 1, 3),)],
             },
         ]

         index = 0
         for comb in test_comb:
             for module in comb["module"]:
                 for sample_input in comb["sample_inputs"]:
                     with self.subTest(i=index):
                         self.lower_module_and_test_output(module, sample_input)
                         index += 1

     @unittest.expectedFailure
     def test_qnn_backend_embedding(self):
         module = Embedding()  # noqa: F405
         # QNN does not support int64 datatype
         sample_input = (torch.LongTensor([[1, 2, 4, 5], [4, 3, 2, 9]]),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_expand_copy(self):
         module = ExpandCopy()  # noqa: F405
         sample_input = (torch.randn([3, 1]),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_gelu(self):
         module = Gelu()  # noqa: F405
         sample_input = (torch.randn(2, 5, 1, 3),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_hardsigmoid(self):
         module = HardSigmoid()  # noqa: F405
         sample_input = (torch.randn(2, 5, 1, 3),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_hardswish(self):
         module = HardSwish()  # noqa: F405
         sample_input = (torch.randn(2, 5, 1, 3),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_hardtanh(self):
         module = HardTanh()  # noqa: F405
         sample_input = (torch.randn([2, 5, 1, 3]),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_interpolate(self):
         module = StaticResizeBilinear2DSizeModule()  # noqa: F405
         sample_input = (torch.randn(2, 3, 4, 5),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_layer_norm(self):
         module = LayerNorm()  # noqa: F405
         sample_input = (torch.randn(196, 768),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_linear(self):
         module = Linear()  # noqa: F405
         sample_input = (torch.randn([3, 4]),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_max_pool2d(self):
         module = MaxPool2d()  # noqa: F405
         sample_input = (torch.randn(4, 3, 24, 24),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_mean_dim(self):
         modules = [MeanWKeppDim(), MeanWOKeppDim()]  # noqa: F405
         sample_input = (torch.randn([2, 5, 1, 3]),)
         for i, module in enumerate(modules):
             with self.subTest(i=i):
                 self.lower_module_and_test_output(module, sample_input)

     @unittest.skip("it will hang in runtime")
     def test_qnn_backend_mha(self):
         module = MultiheadAttention()  # noqa: F405
         sample_input = (torch.randn(1, 197, 96),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_pad(self):
         module = Pad()  # noqa: F405
         sample_input = (torch.randn([1, 8, 128]),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_pixel_shuffle(self):
         module = PixelShuffle()  # noqa: F405
         sample_input = (torch.ones([2, 4, 3, 3]),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_relu(self):
         module = Relu()  # noqa: F405
         sample_input = (torch.randn([2, 5, 1, 3]),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_reshape(self):
         module = Reshape()  # noqa: F405
         sample_input = (torch.randn([3, 4]),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_sdpa(self):
         module = ScaledDotProductAttention()  # noqa: F405
         sample_input = (
             torch.randn(1, 4, 100, 64),
             torch.randn(1, 4, 100, 64),
             torch.randn(1, 4, 100, 64),
         )
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_select_copy(self):
         module = SelectCopy()  # noqa: F405
         sample_input = (torch.randn([1, 3, 3, 3]),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_slice_copy(self):
         module = SliceCopy()  # noqa: F405
         sample_input = (
             torch.randn([1, 512]),
             torch.randn([1, 8]),
         )
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_softmax(self):
         module = Softmax()  # noqa: F405
         sample_input = (torch.randn([1, 4, 8, 8]),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_tanh(self):
         module = Tanh()  # noqa: F405
         sample_input = (torch.randn(2, 5, 1, 3),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_unsqueeze(self):
         module = Unsqueeze()  # noqa: F405
         sample_input = (torch.randn([1, 3, 3]),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_view(self):
         module = View()  # noqa: F405
         sample_input = (torch.randn([1, 8, 512]), torch.randn([1, 2, 8, 256]))
         self.lower_module_and_test_output(module, sample_input)


 class TestQNNFloatingPointModel(TestQNN):
     def setUp(self):
         TestQNN.atol = 1e-1
         TestQNN.rtol = 1e-1
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
             is_fp16=True,
             soc_model=self.arch_table[TestQNN.model],
             debug=False,
             saver=False,
         )

     def test_qnn_backend_conv2d_avg_pool2d(self):
         module = Conv2dAvgPool2d()  # noqa: F405
         sample_input = (torch.randn(16, 3, 16, 16),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_conv2d_bn_hardtanh_mean(self):
         module = Conv2dBnHardtanhMean()  # noqa: F405
         sample_input = (torch.randn(1, 1, 6, 6),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_conv2d_cat(self):
         module = Conv2dCat()  # noqa: F405
         sample_input = (torch.randn(1, 3, 5, 5), torch.randn(1, 3, 5, 5))
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_conv2d_max_pool2d(self):
         module = Conv2dMaxPool2d()  # noqa: F405
         sample_input = (torch.rand(1, 2, 14, 14),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_residual_block(self):
         module = ResidualBlockModule()  # noqa: F405
         sample_input = (torch.randn(1, 32, 28, 28),)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_simple_model(self):
         module = SimpleModel()  # noqa: F405
         sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_view_permute_matmul(self):
         module = ViewPermuteMatMul()  # noqa: F405
         sample_input = (torch.randn([1, 8, 512]), torch.randn([1, 2, 8, 256]))
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_example_models(self):
         instances = [
             DeepLabV3ResNet101Model(),
             EdsrModel(),
             InceptionV3Model(),
             InceptionV4Model(),
             MV2Model(),
             MV3Model(),
             MobileBertModelExample(),
             TorchVisionViTModel(),
         ]
         expected_partitions = [
             1,
             1,
             1,
             1,
             1,
             1,
             1,
             1,
         ]
         # TODO: Due to trigger maximum recursion depth exceeded, need to check it.
         disable_validation()
         for i, instance in enumerate(instances):
             with self.subTest(i=i):
                 module = instance.get_eager_model().eval()
                 sample_input = instance.get_example_inputs()
                 self.lower_module_and_test_output(
                     module,
                     sample_input,
                     expected_partitions=expected_partitions[i],
                     assert_output_equal=False,
                 )


 class TestQNNQuantizedOperator(TestQNN):
     def setUp(self):
         TestQNN.atol = 1e-1
         TestQNN.rtol = 1
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
             is_fp16=False,
             soc_model=self.arch_table[TestQNN.model],
             debug=False,
             saver=False,
         )

     def test_qnn_backend_arange(self):
         module = Arange(5)  # noqa: F405
         sample_input = (torch.randn(5),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_avg_pool2d(self):
         module = AvgPoolModule()  # noqa: F405
         sample_input = (torch.randn(1, 3, 2, 2),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_bmm(self):
         module = Bmm()  # noqa: F405
         sample_input = (torch.randn([4, 8, 32]), torch.randn([4, 32, 8]))
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     @unittest.skip("not applicable")
     def test_qnn_backend_cast(self):
         module = Cast()  # noqa: F405
         sample_input = (10 * torch.rand((9, 4, 5, 3)),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_cat(self):
         modules = [Cat2(), Cat3(), Cat4()]  # noqa: F405
         sample_input = (torch.randn(1, 1, 2, 2), torch.randn(1, 1, 4, 2))
         for i, module in enumerate(modules):
             with self.subTest(i=i):
                 module = self.get_qdq_module(module, sample_input)
                 self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_clamp(self):
         module = Clamp()  # noqa: F405
         sample_input = (torch.randn((9, 4, 5, 3)),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_conv2d(self):
         module = ConvSequential()  # noqa: F405
         sample_input = (torch.randn([1, 1, 3, 3]),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_element_wise_add(self):
         test_comb = [
             {
                 "module": [Add()],  # noqa: F405
                 "sample_inputs": [
                     (torch.randn(2, 5, 1, 3), torch.randn(2, 5, 1, 3)),
                     (torch.randn([2, 5, 1, 3]), torch.randn([4, 1])),
                 ],
             },
             {
                 "module": [AddConstantFloat(), AddConstantLong()],  # noqa: F405
                 "sample_inputs": [(torch.randn(2, 5, 1, 3),)],
             },
         ]

         index = 0
         for comb in test_comb:
             for module in comb["module"]:
                 for sample_input in comb["sample_inputs"]:
                     with self.subTest(i=index):
                         module = self.get_qdq_module(module, sample_input)
                         self.lower_module_and_test_output(module, sample_input)
                         index += 1

     def test_qnn_backend_element_wise_ceil(self):
         module = Ceil()  # noqa: F405
         sample_input = (torch.randn([2, 5, 1, 3]),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_element_wise_div(self):
         test_comb = [
             {
                 "module": [Div()],  # noqa: F405
                 "sample_inputs": [
                     (torch.randn(2, 5, 1, 3), torch.randn(2, 5, 1, 3)),
                     (torch.randn([2, 5, 1, 3]), torch.randn([4, 1])),
                 ],
             },
             {
                 "module": [DivConstantFloat(), DivConstantLong()],  # noqa: F405
                 "sample_inputs": [(torch.randn(2, 5, 1, 3),)],
             },
         ]

         index = 0
         for comb in test_comb:
             for module in comb["module"]:
                 for sample_input in comb["sample_inputs"]:
                     with self.subTest(i=index):
                         module = self.get_qdq_module(module, sample_input)
                         self.lower_module_and_test_output(module, sample_input)
                         index += 1

     def test_qnn_backend_element_wise_mul(self):
         test_comb = [
             {
                 "module": [Mul()],  # noqa: F405
                 "sample_inputs": [
                     (torch.randn(2, 5, 1, 3), torch.randn(2, 5, 1, 3)),
                     (torch.randn([2, 5, 1, 3]), torch.randn([4, 1])),
                 ],
             },
             {
                 "module": [MulConstantFloat(), MulConstantLong()],  # noqa: F405
                 "sample_inputs": [(torch.randn(2, 5, 1, 3),)],
             },
             {
                 "module": [MulScalar()],  # noqa: F405
                 "sample_inputs": [(torch.randn(2, 5, 1, 3),)],
             },
         ]

         index = 0
         for comb in test_comb:
             for module in comb["module"]:
                 for sample_input in comb["sample_inputs"]:
                     with self.subTest(i=index):
                         module = self.get_qdq_module(module, sample_input)
                         self.lower_module_and_test_output(module, sample_input)
                         index += 1

     @unittest.skip("not yet implemented")
     def test_qnn_backend_element_wise_sqrt(self):
         modules = [Sqrt(), SqrtConstant()]  # noqa: F405
         sample_input = (torch.randn([3, 1]),)
         for i, module in enumerate(modules):
             with self.subTest(i=i):
                 module = self.get_qdq_module(module, sample_input)
                 self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_element_wise_sub(self):
         test_comb = [
             {
                 "module": [Sub()],  # noqa: F405
                 "sample_inputs": [
                     (torch.randn(2, 5, 1, 3), torch.randn(2, 5, 1, 3)),
                     (torch.randn([2, 5, 1, 3]), torch.randn([4, 1])),
                 ],
             },
             {
                 "module": [SubConstantFloat(), SubConstantLong()],  # noqa: F405
                 "sample_inputs": [(torch.randn(2, 5, 1, 3),)],
             },
         ]

         index = 0
         for comb in test_comb:
             for module in comb["module"]:
                 for sample_input in comb["sample_inputs"]:
                     with self.subTest(i=index):
                         module = self.get_qdq_module(module, sample_input)
                         self.lower_module_and_test_output(module, sample_input)
                         index += 1

     @unittest.expectedFailure
     def test_qnn_backend_embedding(self):
         module = Embedding()  # noqa: F405
         # QNN does not support int64 datatype
         sample_input = (torch.LongTensor([[1, 2, 4, 5], [4, 3, 2, 9]]),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_expand_copy(self):
         module = ExpandCopy()  # noqa: F405
         sample_input = (torch.randn([3, 1]),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_gelu(self):
         module = Gelu()  # noqa: F405
         sample_input = (torch.randn(2, 5, 1, 3),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_hardsigmoid(self):
         module = HardSigmoid()  # noqa: F405
         sample_input = (torch.randn(2, 5, 1, 3),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_hardswish(self):
         module = HardSwish()  # noqa: F405
         sample_input = (torch.randn(2, 5, 1, 3),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_hardtanh(self):
         module = HardTanh()  # noqa: F405
         sample_input = (torch.randn([2, 5, 1, 3]),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_interpolate(self):
         module = StaticResizeBilinear2DSizeModule()  # noqa: F405
         sample_input = (torch.randn(2, 3, 4, 5),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_layer_norm(self):
         module = LayerNorm()  # noqa: F405
         sample_input = (torch.randn(196, 768),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_linear(self):
         module = Linear()  # noqa: F405
         sample_input = (torch.randn([3, 4]),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_max_pool2d(self):
         module = MaxPool2d()  # noqa: F405
         sample_input = (torch.randn(4, 3, 24, 24),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_mean_dim(self):
         modules = [MeanWKeppDim(), MeanWOKeppDim()]  # noqa: F405
         sample_input = (torch.randn([2, 5, 1, 3]),)
         for i, module in enumerate(modules):
             with self.subTest(i=i):
                 module = self.get_qdq_module(module, sample_input)
                 self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_mha(self):
         module = MultiheadAttention()  # noqa: F405
         sample_input = (torch.randn(1, 197, 96),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_pad(self):
         module = Pad()  # noqa: F405
         sample_input = (torch.randn([1, 8, 128]),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_pixel_shuffle(self):
         module = PixelShuffle()  # noqa: F405
         sample_input = (torch.ones([2, 4, 3, 3]),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_relu(self):
         module = Relu()  # noqa: F405
         sample_input = (torch.randn([2, 5, 1, 3]),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_reshape(self):
         module = Reshape()  # noqa: F405
         sample_input = (torch.randn([3, 4]),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_sdpa(self):
         module = ScaledDotProductAttention()  # noqa: F405
         sample_input = (
             torch.randn(1, 4, 100, 64),
             torch.randn(1, 4, 100, 64),
             torch.randn(1, 4, 100, 64),
         )
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_select_copy(self):
         module = SelectCopy()  # noqa: F405
         sample_input = (torch.randn([1, 3, 3, 3]),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_slice_copy(self):
         module = SliceCopy()  # noqa: F405
         sample_input = (
             torch.randn([1, 512]),
             torch.randn([1, 8]),
         )
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_softmax(self):
         module = Softmax()  # noqa: F405
         sample_input = (torch.randn([1, 4, 8, 8]),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_tanh(self):
         module = Tanh()  # noqa: F405
         sample_input = (torch.randn(2, 5, 1, 3),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_unsqueeze(self):
         module = Unsqueeze()  # noqa: F405
         sample_input = (torch.randn([1, 3, 3]),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_view(self):
         module = View()  # noqa: F405
         sample_input = (torch.randn([1, 8, 512]), torch.randn([1, 2, 8, 256]))
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)


 class TestQNNQuantizedModel(TestQNN):
     def setUp(self):
         TestQNN.atol = 1e-1
         TestQNN.rtol = 1
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
             is_fp16=False,
             soc_model=self.arch_table[TestQNN.model],
             debug=False,
             saver=False,
         )

     def test_qnn_backend_conv2d_avg_pool2d(self):
         module = Conv2dAvgPool2d()  # noqa: F405
         sample_input = (torch.randn(16, 3, 16, 16),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_conv2d_bn_hardtanh_mean(self):
         module = Conv2dBnHardtanhMean()  # noqa: F405
         sample_input = (torch.randn(1, 1, 6, 6),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_conv2d_cat(self):
         module = Conv2dCat()  # noqa: F405
         sample_input = (torch.randn(1, 3, 5, 5), torch.randn(1, 3, 5, 5))
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_conv2d_max_pool2d(self):
         module = Conv2dMaxPool2d()  # noqa: F405
         sample_input = (torch.rand(1, 2, 14, 14),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_residual_block(self):
         module = ResidualBlockModule()  # noqa: F405
         sample_input = (torch.randn(1, 32, 28, 28),)
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_simple_model(self):
         module = SimpleModel()  # noqa: F405
         sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_view_permute_matmul(self):
         module = ViewPermuteMatMul()  # noqa: F405
         sample_input = (torch.randn([1, 8, 512]), torch.randn([1, 2, 8, 256]))
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)

     def test_qnn_backend_example_models(self):
         instances = [
             {"module": DeepLabV3ResNet101Model(), "annotation": ()},
             {"module": EdsrModel(), "annotation": (annotate_forward,)},
             {"module": InceptionV3Model(), "annotation": ()},
             {"module": InceptionV4Model(), "annotation": ()},
             {"module": MV2Model(), "annotation": ()},
             {"module": MV3Model(), "annotation": ()},
             # only works on QNN 2.12 so far
             # { 'module': MobileBertModelExample(), 'annotation': () },
             {"module": TorchVisionViTModel(), "annotation": ()},
         ]
         expected_partitions = [
             1,
             1,
             1,
             1,
             1,
             1,
             # For MobileBertModelExample
             # 1,
             1,
         ]
         # TODO: Due to trigger maximum recursion depth exceeded, need to check it.
         disable_validation()
         for i, instance in enumerate(instances):
             with self.subTest(i=i):
                 module = instance["module"].get_eager_model().eval()
                 sample_input = instance["module"].get_example_inputs()
                 module = self.get_qdq_module(
                     module,
                     sample_input,
                     custom_quant_annotations=instance["annotation"],
                 )
                 self.lower_module_and_test_output(
                     module,
                     sample_input,
                     expected_partitions=expected_partitions[i],
                     assert_output_equal=False,
                 )


 class TestQNNFloatingPointUtils(TestQNN):
     def setUp(self):
         TestQNN.atol = 1e-1
         TestQNN.rtol = 1e-1
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
             is_fp16=True,
             soc_model=self.arch_table[TestQNN.model],
             debug=False,
             saver=False,
         )

     def test_qnn_backend_skip_node_id(self):
         module = SimpleModel()  # noqa: F405
         sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
         self.lower_module_and_test_output(
             module,
             sample_input,
             expected_partitions=3,
             skip_node_id_set={"aten_add_tensor", "aten_mean_dim"},
         )

     def test_qnn_backend_skip_node_op(self):
         module = SimpleModel()  # noqa: F405
         sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
         self.lower_module_and_test_output(
             module,
             sample_input,
             expected_partitions=2,
             skip_node_op_set={"aten.add.Tensor"},
         )


 class TestQNNQuantizedUtils(TestQNN):
     def setUp(self):
         TestQNN.atol = 1e-1
         TestQNN.rtol = 1
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
             is_fp16=False,
             soc_model=self.arch_table[TestQNN.model],
             debug=False,
             saver=False,
         )

     def test_qnn_backend_skip_node_id(self):
         module = SimpleModel()  # noqa: F405
         sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(
             module,
             sample_input,
             expected_partitions=3,
             skip_node_id_set={"aten_add_tensor", "aten_mean_dim"},
         )

     def test_qnn_backend_skip_node_op(self):
         module = SimpleModel()  # noqa: F405
         sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(
             module,
             sample_input,
             expected_partitions=2,
             skip_node_op_set={"aten.add.Tensor"},
         )


 class TestExampleScript(TestQNN):
     def required_envs(self, conditions=None) -> bool:
         conditions = [] if conditions is None else conditions
         return all(
             [
                 self.executorch_root,
                 self.artifact_dir,
                 *conditions,
             ]
         )

     def test_mobilenet_v2(self):
         if not self.required_envs([self.image_dataset]):
             self.skipTest("missing required envs")

         cmds = [
             "python",
             f"{self.executorch_root}/examples/qualcomm/scripts/mobilenet_v2.py",
             "--dataset",
             self.image_dataset,
             "--artifact",
             self.artifact_dir,
             "--build_folder",
             self.build_folder,
             "--device",
             self.device,
             "--model",
             self.model,
             "--ip",
             self.ip,
             "--port",
             str(self.port),
         ]
         if self.host:
             cmds.extend(["--host", self.host])

         p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
         with Listener((self.ip, self.port)) as listener:
             conn = listener.accept()
             p.communicate()
             msg = json.loads(conn.recv())
             self.assertGreaterEqual(msg["top_1"], 60)
             self.assertGreaterEqual(msg["top_5"], 80)

     def test_inception_v3(self):
         if not self.required_envs([self.image_dataset]):
             self.skipTest("missing required envs")

         cmds = [
             "python",
             f"{self.executorch_root}/examples/qualcomm/scripts/inception_v3.py",
             "--dataset",
             self.image_dataset,
             "--artifact",
             self.artifact_dir,
             "--build_folder",
             self.build_folder,
             "--device",
             self.device,
             "--model",
             self.model,
             "--ip",
             self.ip,
             "--port",
             str(self.port),
         ]
         if self.host:
             cmds.extend(["--host", self.host])

         p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
         with Listener((self.ip, self.port)) as listener:
             conn = listener.accept()
             p.communicate()
             msg = json.loads(conn.recv())
             self.assertGreaterEqual(msg["top_1"], 60)
             self.assertGreaterEqual(msg["top_5"], 80)

     def test_inception_v4(self):
         if not self.required_envs([self.image_dataset]):
             self.skipTest("missing required envs")

         cmds = [
             "python",
             f"{self.executorch_root}/examples/qualcomm/scripts/inception_v4.py",
             "--dataset",
             self.image_dataset,
             "--artifact",
             self.artifact_dir,
             "--build_folder",
             self.build_folder,
             "--device",
             self.device,
             "--model",
             self.model,
             "--ip",
             self.ip,
             "--port",
             str(self.port),
         ]
         if self.host:
             cmds.extend(["--host", self.host])

         p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
         with Listener((self.ip, self.port)) as listener:
             conn = listener.accept()
             p.communicate()
             msg = json.loads(conn.recv())
             self.assertGreaterEqual(msg["top_1"], 60)
             self.assertGreaterEqual(msg["top_5"], 80)

     def test_vit(self):
         if not self.required_envs([self.image_dataset]):
             self.skipTest("missing required envs")

         cmds = [
             "python",
             f"{self.executorch_root}/examples/qualcomm/scripts/torchvision_vit.py",
             "--dataset",
             self.image_dataset,
             "--artifact",
             self.artifact_dir,
             "--build_folder",
             self.build_folder,
             "--device",
             self.device,
             "--model",
             self.model,
             "--ip",
             self.ip,
             "--port",
             str(self.port),
         ]
         if self.host:
             cmds.extend(["--host", self.host])

         p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
         with Listener((self.ip, self.port)) as listener:
             conn = listener.accept()
             p.communicate()
             msg = json.loads(conn.recv())
             self.assertGreaterEqual(msg["top_1"], 70)
             self.assertGreaterEqual(msg["top_5"], 90)

     def test_edsr(self):
         if not self.required_envs():
             self.skipTest("missing required envs")

         cmds = [
             "python",
             f"{self.executorch_root}/examples/qualcomm/scripts/edsr.py",
             "--artifact",
             self.artifact_dir,
             "--build_folder",
             self.build_folder,
             "--device",
             self.device,
             "--model",
             self.model,
             "--default_dataset",
             "--ip",
             self.ip,
             "--port",
             str(self.port),
         ]
         if self.host:
             cmds.extend(["--host", self.host])

         p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
         with Listener((self.ip, self.port)) as listener:
             conn = listener.accept()
             p.communicate()
             msg = json.loads(conn.recv())
             self.assertGreaterEqual(msg["PNSR"], 25)
             self.assertGreaterEqual(msg["SSIM"], 0.8)

     def test_deeplab_v3(self):
         if not self.required_envs():
             self.skipTest("missing required envs")

         cmds = [
             "python",
             f"{self.executorch_root}/examples/qualcomm/scripts/deeplab_v3.py",
             "--artifact",
             self.artifact_dir,
             "--build_folder",
             self.build_folder,
             "--device",
             self.device,
             "--model",
             self.model,
             "--download",
             "--ip",
             self.ip,
             "--port",
             str(self.port),
         ]
         if self.host:
             cmds.extend(["--host", self.host])

         p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
         with Listener((self.ip, self.port)) as listener:
             conn = listener.accept()
             p.communicate()
             msg = json.loads(conn.recv())
             self.assertGreaterEqual(msg["PA"], 0.85)
             self.assertGreaterEqual(msg["MPA"], 0.70)
             self.assertGreaterEqual(msg["MIoU"], 0.55)

     def test_mobilebert(self):
         if not self.required_envs([self.pretrained_weight]):
             self.skipTest("missing required envs")

         cmds = [
             "python",
             f"{self.executorch_root}/examples/qualcomm/scripts/mobilebert_fine_tune.py",
             "--artifact",
             self.artifact_dir,
             "--build_folder",
             self.build_folder,
             "--device",
             self.device,
             "--model",
             self.model,
             "--pretrained_weight",
             self.pretrained_weight,
             "--ip",
             self.ip,
             "--port",
             str(self.port),
         ]
         if self.host:
             cmds.extend(["--host", self.host])

         p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
         with Listener((self.ip, self.port)) as listener:
             conn = listener.accept()
             p.communicate()
             msg = json.loads(conn.recv())
             cpu, htp = msg["CPU"], msg["HTP"]
             for k, v in cpu.items():
                 self.assertLessEqual(abs(v[0] - htp[k][0]), 1)

     def test_ptq_mobilebert(self):
         if not self.required_envs([self.pretrained_weight]):
             self.skipTest("missing required envs")

         cmds = [
             "python",
             f"{self.executorch_root}/examples/qualcomm/scripts/mobilebert_fine_tune.py",
             "--artifact",
             self.artifact_dir,
             "--build_folder",
             self.build_folder,
             "--device",
             self.device,
             "--model",
             self.model,
             "--pretrained_weight",
             self.pretrained_weight,
             "--ip",
             self.ip,
             "--port",
             str(self.port),
             "--ptq",
         ]
         if self.host:
             cmds.extend(["--host", self.host])

         p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
         with Listener((self.ip, self.port)) as listener:
             conn = listener.accept()
             p.communicate()
             msg = json.loads(conn.recv())
             cpu, htp = msg["CPU"], msg["HTP"]
             for k, v in cpu.items():
                 self.assertLessEqual(abs(v[0] - htp[k][0]), 5)


 def setup_environment():
     parser = setup_common_args_and_variables()

     parser.add_argument(
         "-r",
         "--executorch_root",
         help="Root location of current repo",
         type=str,
     )
     parser.add_argument(
         "-a",
         "--artifact_dir",
         help="Location for putting generated artifacts",
         type=str,
     )
     parser.add_argument(
         "-i",
         "--image_dataset",
         help="Location for imagenet dataset",
         type=str,
     )
     parser.add_argument(
         "-p",
         "--pretrained_weight",
         help="Location for pretrained weighting",
         type=str,
     )
     parser.add_argument(
         "-e",
         "--error_only",
         help="Emit log only when error happened",
         action="store_true",
     )

     args, ns_args = parser.parse_known_args(namespace=unittest)
     TestQNN.host = args.host
     TestQNN.device = args.device
     TestQNN.model = args.model
     TestQNN.build_folder = args.build_folder
     TestQNN.executorch_root = args.executorch_root
     TestQNN.artifact_dir = args.artifact_dir
     TestQNN.image_dataset = args.image_dataset
     TestQNN.pretrained_weight = args.pretrained_weight
     TestQNN.error_only = args.error_only
     return sys.argv[:1] + ns_args


 if __name__ == "__main__":
     ut_args = setup_environment()
     unittest.main(argv=ut_args)