[BE] [3/3] Rewrite `super()` calls in test (#94592)

Rewrite Python built-in class `super()` calls. Only non-semantic changes should be applied.

- #94587
- #94588
- #94592

Also, methods with only a `super()` call are removed:

```diff
class MyModule(nn.Module):
-   def __init__(self):
-       super().__init__()
-
    def forward(self, ...):
        ...
```

Some cases that change the semantics should be kept unchanged. E.g.:

https://github.com/pytorch/pytorch/blob/f152a79be9612b824e1672b8f8cb88a414ce4c12/caffe2/python/net_printer.py#L184-L190

https://github.com/pytorch/pytorch/blob/f152a79be9612b824e1672b8f8cb88a414ce4c12/test/test_jit_fuser_te.py#L2628-L2635

Pull Request resolved: https://github.com/pytorch/pytorch/pull/94592
Approved by: https://github.com/ezyang, https://github.com/seemethere
diff --git a/test/bottleneck_test/test_cuda.py b/test/bottleneck_test/test_cuda.py
index 1cf018f..65bbcac 100644
--- a/test/bottleneck_test/test_cuda.py
+++ b/test/bottleneck_test/test_cuda.py
@@ -6,7 +6,7 @@
 
 class Model(nn.Module):
     def __init__(self):
-        super(Model, self).__init__()
+        super().__init__()
         self.linear = nn.Linear(20, 20)
 
     def forward(self, input):
diff --git a/test/cpp/jit/test_exception.cpp b/test/cpp/jit/test_exception.cpp
index 7f57bc5..2f495c4 100644
--- a/test/cpp/jit/test_exception.cpp
+++ b/test/cpp/jit/test_exception.cpp
@@ -113,7 +113,7 @@
   py::exec(R"PY(
   class SimpleValueError(ValueError):
     def __init__(self, message):
-      super(SimpleValueError, self).__init__(message)
+      super().__init__(message)
   )PY");
 
   std::string pythonCode = R"PY(
diff --git a/test/cpp/jit/test_lite_interpreter.cpp b/test/cpp/jit/test_lite_interpreter.cpp
index c45ca96..212d642 100644
--- a/test/cpp/jit/test_lite_interpreter.cpp
+++ b/test/cpp/jit/test_lite_interpreter.cpp
@@ -1157,7 +1157,7 @@
 
   // class Add(torch.nn.Module):
   //     def __init__(self):
-  //         super(Add, self).__init__()
+  //         super().__init__()
 
   //     def forward(self, a, b):
   //         return a + b
diff --git a/test/cpp/jit/tests_setup.py b/test/cpp/jit/tests_setup.py
index 8a9be71..b464392 100644
--- a/test/cpp/jit/tests_setup.py
+++ b/test/cpp/jit/tests_setup.py
@@ -26,7 +26,7 @@
     def setup(self):
         class Model(torch.jit.ScriptModule):
             def __init__(self):
-                super(Model, self).__init__()
+                super().__init__()
                 self.dropout = torch.nn.Dropout(0.1)
 
             @torch.jit.script_method
diff --git a/test/cpp/lite_interpreter_runtime/test_lite_interpreter_runtime.cpp b/test/cpp/lite_interpreter_runtime/test_lite_interpreter_runtime.cpp
index 1648b1e..e176e6b 100644
--- a/test/cpp/lite_interpreter_runtime/test_lite_interpreter_runtime.cpp
+++ b/test/cpp/lite_interpreter_runtime/test_lite_interpreter_runtime.cpp
@@ -21,21 +21,21 @@
   //  sequence.ptl source code:
   //  class A(torch.nn.Module):
   //    def __init__(self):
-  //      super(A, self).__init__()
+  //      super().__init__()
   //
   //    def forward(self, x):
   //      return x + 1
   //
   //  class B(torch.nn.Module):
   //    def __init__(self):
-  //      super(B, self).__init__()
+  //      super().__init__()
   //
   //    def forward(self, x):
   //      return x + 2
   //
   //  class C(torch.nn.Module):
   //    def __init__(self):
-  //      super(C, self).__init__()
+  //      super().__init__()
   //      self.A0 = A()
   //      self.B0 = B()
   //
diff --git a/test/cpp_api_parity/sample_module.py b/test/cpp_api_parity/sample_module.py
index 082df0a..e126bbd 100644
--- a/test/cpp_api_parity/sample_module.py
+++ b/test/cpp_api_parity/sample_module.py
@@ -13,7 +13,7 @@
 
 class SampleModule(torch.nn.Module):
     def __init__(self, has_parity, has_submodule):
-        super(SampleModule, self).__init__()
+        super().__init__()
         self.has_parity = has_parity
         if has_submodule:
             self.submodule = SampleModule(self.has_parity, False)
diff --git a/test/create_dummy_torchscript_model.py b/test/create_dummy_torchscript_model.py
index ffd869e..ba9f661 100644
--- a/test/create_dummy_torchscript_model.py
+++ b/test/create_dummy_torchscript_model.py
@@ -7,7 +7,7 @@
 class NeuralNetwork(nn.Module):
 
     def __init__(self):
-        super(NeuralNetwork, self).__init__()
+        super().__init__()
         self.flatten = nn.Flatten()
         self.linear_relu_stack = nn.Sequential(
             nn.Linear(28 * 28, 512),
diff --git a/test/custom_backend/backend.py b/test/custom_backend/backend.py
index 8b48ed0..7c81142 100644
--- a/test/custom_backend/backend.py
+++ b/test/custom_backend/backend.py
@@ -43,9 +43,6 @@
     and executing in C++.
     """
 
-    def __init__(self):
-        super(Model, self).__init__()
-
     def forward(self, a, b):
         return (a + b, a - b)
 
diff --git a/test/custom_operator/model.py b/test/custom_operator/model.py
index 5131b4a..ff9e310 100644
--- a/test/custom_operator/model.py
+++ b/test/custom_operator/model.py
@@ -19,7 +19,7 @@
 
 class Model(torch.jit.ScriptModule):
     def __init__(self):
-        super(Model, self).__init__()
+        super().__init__()
         self.p = torch.nn.Parameter(torch.eye(5))
 
     @torch.jit.script_method
diff --git a/test/distributed/_composable/test_replicate.py b/test/distributed/_composable/test_replicate.py
index 10a64cf..e5c9f0f 100644
--- a/test/distributed/_composable/test_replicate.py
+++ b/test/distributed/_composable/test_replicate.py
@@ -14,7 +14,7 @@
 
 class Net(nn.Module):
     def __init__(self):
-        super(Net, self).__init__()
+        super().__init__()
         self.fc1 = nn.Linear(2, 10, bias=False)
         self.fc2 = nn.Linear(10, 50, bias=False)
         self.fc3 = nn.Linear(50, 4, bias=False)
diff --git a/test/distributed/_shard/sharded_optim/test_sharded_optim.py b/test/distributed/_shard/sharded_optim/test_sharded_optim.py
index a884d64..24d99e2 100644
--- a/test/distributed/_shard/sharded_optim/test_sharded_optim.py
+++ b/test/distributed/_shard/sharded_optim/test_sharded_optim.py
@@ -29,7 +29,7 @@
 
 class MyShardedModel(torch.nn.Module):
     def __init__(self, spec=None, group=None):
-        super(MyShardedModel, self).__init__()
+        super().__init__()
         # Use same seed.
         torch.manual_seed(0)
         self.param = torch.nn.Parameter(torch.rand(5, 10))
@@ -47,7 +47,7 @@
 
 class MyShardedLinear(torch.nn.Module):
     def __init__(self, rank=None):
-        super(MyShardedLinear, self).__init__()
+        super().__init__()
         # Use same seed.
         torch.manual_seed(0)
         self.linear1 = torch.nn.Linear(17, 12)
diff --git a/test/distributed/_tensor/test_dtensor.py b/test/distributed/_tensor/test_dtensor.py
index d39c3a7..a58e781 100644
--- a/test/distributed/_tensor/test_dtensor.py
+++ b/test/distributed/_tensor/test_dtensor.py
@@ -18,7 +18,7 @@
 
 class DummyMLP(torch.nn.Module):
     def __init__(self, device):
-        super(DummyMLP, self).__init__()
+        super().__init__()
         self.net1 = torch.nn.Linear(5, 1024, device=device)
         self.relu = torch.nn.ReLU()
         self.net2 = torch.nn.Linear(1024, 4, device=device)
diff --git a/test/distributed/algorithms/ddp_comm_hooks/test_ddp_hooks.py b/test/distributed/algorithms/ddp_comm_hooks/test_ddp_hooks.py
index a685fb6..2d6a17b 100644
--- a/test/distributed/algorithms/ddp_comm_hooks/test_ddp_hooks.py
+++ b/test/distributed/algorithms/ddp_comm_hooks/test_ddp_hooks.py
@@ -43,7 +43,7 @@
 
 class Task(nn.Module):
     def __init__(self):
-        super(Task, self).__init__()
+        super().__init__()
         torch.manual_seed(0)
         self.p = nn.Parameter(torch.randn(40, 20))
 
@@ -62,7 +62,7 @@
 
 class DistributedDataParallelCommHookTest(MultiProcessTestCase):
     def setUp(self):
-        super(DistributedDataParallelCommHookTest, self).setUp()
+        super().setUp()
         self._spawn_processes()
 
     def tearDown(self):
diff --git a/test/distributed/algorithms/quantization/test_quantization.py b/test/distributed/algorithms/quantization/test_quantization.py
index a3b505d..368671a 100644
--- a/test/distributed/algorithms/quantization/test_quantization.py
+++ b/test/distributed/algorithms/quantization/test_quantization.py
@@ -43,12 +43,12 @@
     class DistQuantizationTests(MultiProcessTestCase):
 
         def setUp(self):
-            super(DistQuantizationTests, self).setUp()
+            super().setUp()
             self._spawn_processes()
             torch.backends.cudnn.flags(enabled=True, allow_tf32=False).__enter__()
 
         def tearDown(self):
-            super(DistQuantizationTests, self).tearDown()
+            super().tearDown()
             try:
                 os.remove(self.file_name)
             except OSError:
diff --git a/test/distributed/algorithms/test_join.py b/test/distributed/algorithms/test_join.py
index 2b8a376..66ec049 100644
--- a/test/distributed/algorithms/test_join.py
+++ b/test/distributed/algorithms/test_join.py
@@ -83,7 +83,7 @@
     per-iteration collective communication.
     """
     def __init__(self, device, process_group):
-        super(AllReducer, self).__init__()
+        super().__init__()
         self.device = device
         self.process_group = process_group
         self.post_hook_tensor = torch.tensor([BEFORE_CONSTANT], device=self.device)
@@ -139,7 +139,7 @@
 class TestJoin(MultiProcessTestCase):
     r"""Test cases for the generic join context."""
     def setUp(self):
-        super(TestJoin, self).setUp()
+        super().setUp()
         os.environ["WORLD_SIZE"] = str(self.world_size)
         os.environ["BACKEND"] = BACKEND
         self._spawn_processes()
diff --git a/test/distributed/checkpoint/test_2d_fsdp_dt_checkpoint.py b/test/distributed/checkpoint/test_2d_fsdp_dt_checkpoint.py
index 67096d2..7a815c3 100644
--- a/test/distributed/checkpoint/test_2d_fsdp_dt_checkpoint.py
+++ b/test/distributed/checkpoint/test_2d_fsdp_dt_checkpoint.py
@@ -39,7 +39,7 @@
 
 class SimpleModel(torch.nn.Module):
     def __init__(self):
-        super(SimpleModel, self).__init__()
+        super().__init__()
         self.net1 = torch.nn.Linear(5, 8)
         self.relu = torch.nn.ReLU()
         self.net2 = torch.nn.Linear(8, 4)
diff --git a/test/distributed/checkpoint/test_checkpoint.py b/test/distributed/checkpoint/test_checkpoint.py
index 6d0111a..a0002c3 100644
--- a/test/distributed/checkpoint/test_checkpoint.py
+++ b/test/distributed/checkpoint/test_checkpoint.py
@@ -185,7 +185,7 @@
 
 class FaultyStorageWriter(TestStorageBase, StorageWriter):
     def __init__(self, fail_conf):
-        super(FaultyStorageWriter, self).__init__(fail_conf)
+        super().__init__(fail_conf)
 
     def set_up_storage_writer(self, is_coordinator: bool) -> None:
         self._fail_rank("fail_set_up_storage_writer")
@@ -212,7 +212,7 @@
 
 class FaultyStorageReader(TestStorageBase, StorageReader):
     def __init__(self, metadata, fail_conf):
-        super(FaultyStorageReader, self).__init__(fail_conf)
+        super().__init__(fail_conf)
         self.metadata = metadata
 
     def set_up_storage_reader(self, metadata: Metadata, is_coordinator: bool) -> None:
diff --git a/test/distributed/checkpoint/test_file_system_checkpoint.py b/test/distributed/checkpoint/test_file_system_checkpoint.py
index c847c06..3d92e79 100644
--- a/test/distributed/checkpoint/test_file_system_checkpoint.py
+++ b/test/distributed/checkpoint/test_file_system_checkpoint.py
@@ -101,7 +101,7 @@
         self,
         spec: ShardingSpec,
     ) -> None:
-        super(MyShardedModel3, self).__init__()
+        super().__init__()
         self.sharded_tensor: ShardedTensor = sharded_tensor.rand(
             spec, 10, 20, init_rrefs=False
         )
diff --git a/test/distributed/checkpoint/test_file_system_checkpoint_cpu.py b/test/distributed/checkpoint/test_file_system_checkpoint_cpu.py
index 3fe2850..559f86b 100644
--- a/test/distributed/checkpoint/test_file_system_checkpoint_cpu.py
+++ b/test/distributed/checkpoint/test_file_system_checkpoint_cpu.py
@@ -100,7 +100,7 @@
         self,
         spec: ShardingSpec,
     ) -> None:
-        super(MyShardedModel3, self).__init__()
+        super().__init__()
         self.sharded_tensor: ShardedTensor = sharded_tensor.rand(
             spec, 10, 20, init_rrefs=False
         )
diff --git a/test/distributed/fsdp/test_checkpoint_wrapper.py b/test/distributed/fsdp/test_checkpoint_wrapper.py
index d8e005f..c6c5d54 100644
--- a/test/distributed/fsdp/test_checkpoint_wrapper.py
+++ b/test/distributed/fsdp/test_checkpoint_wrapper.py
@@ -22,9 +22,6 @@
 
 
 class CheckpointWrapperTest(TestCase):
-    def setUp(self):
-        super().setUp()
-
     def test_load_activation_checkpointed_module(self):
         lin = nn.Linear(10, 10, bias=False)
         lin = checkpoint_wrapper(
diff --git a/test/distributed/fsdp/test_fsdp_mixed_precision.py b/test/distributed/fsdp/test_fsdp_mixed_precision.py
index 35b80d4..70eb006 100644
--- a/test/distributed/fsdp/test_fsdp_mixed_precision.py
+++ b/test/distributed/fsdp/test_fsdp_mixed_precision.py
@@ -664,7 +664,7 @@
     def test_mp_batchnorm(self, convert_sync_bn):
         class BatchNormNet(nn.Module):
             def __init__(self, affine=True):
-                super(BatchNormNet, self).__init__()
+                super().__init__()
                 self.fc1 = nn.Linear(2, 40, bias=False)
                 self.bn = nn.BatchNorm1d(4, affine=affine)
                 self.fc2 = nn.Linear(40, 4, bias=False)
diff --git a/test/distributed/fsdp/test_fsdp_optim_state.py b/test/distributed/fsdp/test_fsdp_optim_state.py
index 35faead..0cd93b1 100644
--- a/test/distributed/fsdp/test_fsdp_optim_state.py
+++ b/test/distributed/fsdp/test_fsdp_optim_state.py
@@ -286,7 +286,7 @@
 
 class TestFSDPOptimState(FSDPTest):
     def __init__(self, *args, **kwargs):
-        super(TestFSDPOptimState, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
         self._model_class = {
             _ModelClass.NESTED: self._init_nested_model,
             _ModelClass.TRANSFORMER: self._init_transformer_model,
@@ -1655,7 +1655,7 @@
     def test_with_empty_optimizer_state(self):
         class TestDummyModel(torch.nn.Module):
             def __init__(self):
-                super(TestDummyModel, self).__init__()
+                super().__init__()
                 torch.manual_seed(0)
                 self.net1 = nn.Sequential(nn.Linear(8, 16), nn.ReLU())
                 self.net2 = nn.Sequential(nn.Linear(16, 32), nn.ReLU())
diff --git a/test/distributed/optim/test_named_optimizer.py b/test/distributed/optim/test_named_optimizer.py
index 2289fd2..7d30f6d 100644
--- a/test/distributed/optim/test_named_optimizer.py
+++ b/test/distributed/optim/test_named_optimizer.py
@@ -28,7 +28,7 @@
 
 class TestDummyModel(torch.nn.Module):
     def __init__(self):
-        super(TestDummyModel, self).__init__()
+        super().__init__()
         torch.manual_seed(0)
         self.net1 = nn.Sequential(nn.Linear(8, 16), nn.ReLU())
         self.net2 = nn.Sequential(nn.Linear(16, 32), nn.ReLU())
diff --git a/test/distributed/optim/test_zero_redundancy_optimizer.py b/test/distributed/optim/test_zero_redundancy_optimizer.py
index e67ba92..a125abe 100644
--- a/test/distributed/optim/test_zero_redundancy_optimizer.py
+++ b/test/distributed/optim/test_zero_redundancy_optimizer.py
@@ -64,7 +64,7 @@
 @unittest.skipIf(TEST_WITH_ASAN or TEST_WITH_DEV_DBG_ASAN, "CUDA + ASAN does not work.")
 class TestZeroRedundancyOptimizer(common_distributed.MultiProcessTestCase):
     def setUp(self):
-        super(TestZeroRedundancyOptimizer, self).setUp()
+        super().setUp()
         os.environ["WORLD_SIZE"] = str(self.world_size)
         self._spawn_processes()
 
diff --git a/test/distributed/rpc/test_share_memory.py b/test/distributed/rpc/test_share_memory.py
index 067233b..bdfddaa 100644
--- a/test/distributed/rpc/test_share_memory.py
+++ b/test/distributed/rpc/test_share_memory.py
@@ -53,9 +53,6 @@
     pass
 
 class TestRPCPickler(TestCase):
-    def setUp(self):
-        super().setUp()
-
     def test_case(self):
         os.environ['MASTER_ADDR'] = 'localhost'
         os.environ['MASTER_PORT'] = '29500'
diff --git a/test/distributed/tensor/parallel/test_2d_parallel.py b/test/distributed/tensor/parallel/test_2d_parallel.py
index e71be70..50ec700 100644
--- a/test/distributed/tensor/parallel/test_2d_parallel.py
+++ b/test/distributed/tensor/parallel/test_2d_parallel.py
@@ -29,7 +29,7 @@
 
 class SimpleModel(torch.nn.Module):
     def __init__(self):
-        super(SimpleModel, self).__init__()
+        super().__init__()
         self.net1 = torch.nn.Linear(5, 8)
         self.relu = torch.nn.ReLU()
         self.net2 = torch.nn.Linear(8, 4)
diff --git a/test/distributed/tensor/parallel/test_parallelize_api.py b/test/distributed/tensor/parallel/test_parallelize_api.py
index 780c53d..a7b3717 100644
--- a/test/distributed/tensor/parallel/test_parallelize_api.py
+++ b/test/distributed/tensor/parallel/test_parallelize_api.py
@@ -26,7 +26,7 @@
 
 class MLPModule(torch.nn.Module):
     def __init__(self, device):
-        super(MLPModule, self).__init__()
+        super().__init__()
         torch.manual_seed(5)
         self.net1 = torch.nn.Linear(10, 16, device=device)
         self.relu = torch.nn.ReLU()
diff --git a/test/distributed/tensor/parallel/test_tp_examples.py b/test/distributed/tensor/parallel/test_tp_examples.py
index 12ee9b0..59de182 100644
--- a/test/distributed/tensor/parallel/test_tp_examples.py
+++ b/test/distributed/tensor/parallel/test_tp_examples.py
@@ -20,7 +20,7 @@
 
 class MLPModule(torch.nn.Module):
     def __init__(self, device):
-        super(MLPModule, self).__init__()
+        super().__init__()
         torch.manual_seed(5)
         self.net1 = torch.nn.Linear(10, 16, device=device)
         self.relu = torch.nn.ReLU()
diff --git a/test/distributed/test_c10d_common.py b/test/distributed/test_c10d_common.py
index 87c804a..6c16401 100644
--- a/test/distributed/test_c10d_common.py
+++ b/test/distributed/test_c10d_common.py
@@ -131,7 +131,7 @@
 
 class Net(nn.Module):
     def __init__(self):
-        super(Net, self).__init__()
+        super().__init__()
         self.fc1 = nn.Linear(2, 10, bias=False)
         self.fc2 = nn.Linear(10, 50, bias=False)
         self.fc3 = nn.Linear(50, 4, bias=False)
@@ -146,7 +146,7 @@
 
 class DoubleGpuNet(nn.Module):
     def __init__(self, gpus):
-        super(DoubleGpuNet, self).__init__()
+        super().__init__()
         self.fc1 = nn.Linear(2, 10, bias=False).to(gpus[0])
         self.fc2 = nn.Linear(10, 50, bias=False).to(gpus[1])
         self.fc3 = nn.Linear(50, 4, bias=False).to(gpus[1])
@@ -166,7 +166,7 @@
 
 class QuadraGpuNet(nn.Module):
     def __init__(self, gpus):
-        super(QuadraGpuNet, self).__init__()
+        super().__init__()
         self.fc1 = nn.Linear(2, 10, bias=False).to(gpus[0])
         self.fc2 = nn.Linear(10, 50, bias=False).to(gpus[1])
         self.fc3 = nn.Linear(50, 4, bias=False).to(gpus[2])
@@ -190,7 +190,7 @@
 
 class ConvNet(nn.Module):
     def __init__(self, gpus, layouts, dtypes):
-        super(ConvNet, self).__init__()
+        super().__init__()
         self.dtypes = dtypes
         if isinstance(gpus, list):
             self.layer_gpus = gpus
@@ -242,7 +242,7 @@
 
 class SparseGradientModule(nn.Module):
     def __init__(self):
-        super(SparseGradientModule, self).__init__()
+        super().__init__()
         self.embedding = nn.EmbeddingBag(10, 10, sparse=True)
 
     def forward(self, x):
@@ -1300,11 +1300,11 @@
 
 class CommTest(AbstractCommTest, MultiProcessTestCase):
     def setUp(self):
-        super(CommTest, self).setUp()
+        super().setUp()
         self._spawn_processes()
 
     def tearDown(self):
-        super(CommTest, self).tearDown()
+        super().tearDown()
         try:
             os.remove(self.file_name)
         except OSError:
@@ -1419,11 +1419,11 @@
 
 class PythonProcessGroupExtensionTest(MultiProcessTestCase):
     def setUp(self):
-        super(PythonProcessGroupExtensionTest, self).setUp()
+        super().setUp()
         self._spawn_processes()
 
     def tearDown(self):
-        super(PythonProcessGroupExtensionTest, self).tearDown()
+        super().tearDown()
         try:
             os.remove(self.file_name)
         except OSError:
@@ -1522,11 +1522,11 @@
         return 1
 
     def setUp(self):
-        super(ProcessGroupWithDispatchedCollectivesTests, self).setUp()
+        super().setUp()
         self._spawn_processes()
 
     def tearDown(self):
-        super(ProcessGroupWithDispatchedCollectivesTests, self).tearDown()
+        super().tearDown()
         try:
             os.remove(self.file_name)
         except OSError:
@@ -1639,11 +1639,11 @@
 
 class CompilerTest(MultiProcessTestCase):
     def setUp(self):
-        super(CompilerTest, self).setUp()
+        super().setUp()
         self._spawn_processes()
 
     def tearDown(self):
-        super(CompilerTest, self).tearDown()
+        super().tearDown()
         try:
             os.remove(self.file_name)
         except OSError:
diff --git a/test/distributed/test_c10d_error_logger.py b/test/distributed/test_c10d_error_logger.py
index 7c8a624..868d449 100644
--- a/test/distributed/test_c10d_error_logger.py
+++ b/test/distributed/test_c10d_error_logger.py
@@ -50,7 +50,7 @@
 
 class C10dErrorLoggerTest(MultiProcessTestCase):
     def setUp(self):
-        super(C10dErrorLoggerTest, self).setUp()
+        super().setUp()
         os.environ["WORLD_SIZE"] = str(self.world_size)
         os.environ["BACKEND"] = BACKEND
         self._spawn_processes()
diff --git a/test/distributed/test_c10d_gloo.py b/test/distributed/test_c10d_gloo.py
index dfdfe44..5da1a85 100644
--- a/test/distributed/test_c10d_gloo.py
+++ b/test/distributed/test_c10d_gloo.py
@@ -217,7 +217,7 @@
         return pg
 
     def setUp(self):
-        super(ProcessGroupGlooTest, self).setUp()
+        super().setUp()
         self._spawn_processes()
 
     def opts(self, threads=2):
@@ -1458,7 +1458,7 @@
     test_c10d_common.CommonDistributedDataParallelTest, MultiProcessTestCase
 ):
     def setUp(self):
-        super(DistributedDataParallelTest, self).setUp()
+        super().setUp()
         self._spawn_processes()
 
     def _get_process_group(self):
@@ -1528,7 +1528,7 @@
 
         class GlobalLocalUnusedParamModule(nn.Module):
             def __init__(self):
-                super(GlobalLocalUnusedParamModule, self).__init__()
+                super().__init__()
                 self.t0 = Task()
                 self.t1 = Task()
                 self.task_unused = Task()
@@ -1610,7 +1610,7 @@
 
         class FindUnusedParamModule(nn.Module):
             def __init__(self):
-                super(FindUnusedParamModule, self).__init__()
+                super().__init__()
                 self.t0 = Task()
                 self.t1 = Task()
 
@@ -1663,7 +1663,7 @@
 
         class IgnoredOutput(nn.Module):
             def __init__(self):
-                super(IgnoredOutput, self).__init__()
+                super().__init__()
                 self.fc1 = nn.Linear(2, 10, bias=False)
                 self.fc2 = nn.Linear(10, 4, bias=False)
                 self.relu = nn.ReLU()
@@ -1705,7 +1705,7 @@
 
         class IgnoredOutputWithUnusedParameters(nn.Module):
             def __init__(self):
-                super(IgnoredOutputWithUnusedParameters, self).__init__()
+                super().__init__()
                 self.fc1 = nn.Linear(2, 10, bias=False)
                 self.fc2 = nn.Linear(10, 4, bias=False)
                 self.fc3 = nn.Linear(4, 4, bias=False)
@@ -1813,7 +1813,7 @@
 
         class TestModel(nn.Module):
             def __init__(self):
-                super(TestModel, self).__init__()
+                super().__init__()
                 self.fc1 = nn.Linear(2, 10, bias=False)
                 self.fc2 = nn.Linear(10, 4, bias=False)
                 self.relu = nn.ReLU()
@@ -2113,7 +2113,7 @@
 
 class ReducerModule(nn.Module):
     def __init__(self):
-        super(ReducerModule, self).__init__()
+        super().__init__()
         self.fc1 = nn.Linear(2, 10, bias=False)
         self.fc2 = nn.Linear(10, 4, bias=False)
         self.fc3 = nn.Linear(4, 4, bias=False)
@@ -2269,11 +2269,11 @@
 
 
     def setUp(self):
-        super(CommTest, self).setUp()
+        super().setUp()
         self._spawn_processes()
 
     def tearDown(self):
-        super(CommTest, self).tearDown()
+        super().tearDown()
         try:
             os.remove(self.file_name)
         except OSError:
diff --git a/test/distributed/test_c10d_nccl.py b/test/distributed/test_c10d_nccl.py
index a1c7ad2..1a1de0a 100644
--- a/test/distributed/test_c10d_nccl.py
+++ b/test/distributed/test_c10d_nccl.py
@@ -221,7 +221,7 @@
         return opts
 
     def setUp(self):
-        super(ProcessGroupNCCLTest, self).setUp()
+        super().setUp()
         # NCCL_BLOCKING_WAIT overrides NCCL_ASYNC_ERROR_HANDLING hence tests
         # that use NCCL_BLOCKING_WAIT will test it as expected.
         os.environ["NCCL_ASYNC_ERROR_HANDLING"] = "1"
@@ -229,7 +229,7 @@
         self._spawn_processes()
 
     def tearDown(self):
-        super(ProcessGroupNCCLTest, self).tearDown()
+        super().tearDown()
         try:
             os.remove(self.file_name)
         except OSError:
@@ -1033,7 +1033,7 @@
     test_c10d_common.CommonDistributedDataParallelTest, MultiProcessTestCase
 ):
     def setUp(self):
-        super(DistributedDataParallelTest, self).setUp()
+        super().setUp()
         # NCCL_BLOCKING_WAIT overrides NCCL_ASYNC_ERROR_HANDLING hence tests
         # that use NCCL_BLOCKING_WAIT will test it as expected.
         os.environ["NCCL_ASYNC_ERROR_HANDLING"] = "1"
@@ -1240,7 +1240,7 @@
 
         class ForwardReturnValueModule(nn.Module):
             def __init__(self):
-                super(ForwardReturnValueModule, self).__init__()
+                super().__init__()
                 self.fc1 = nn.Linear(2, 10, bias=False)
                 self.fc2 = nn.Linear(10, 4, bias=False)
                 self.fc3 = nn.Linear(4, 4, bias=False)
@@ -1358,7 +1358,7 @@
 
         class FindUnusedParametersModule(nn.Module):
             def __init__(self):
-                super(FindUnusedParametersModule, self).__init__()
+                super().__init__()
                 self.fc1 = nn.Linear(2, 10, bias=False)
                 self.fc2 = nn.Linear(10, 4, bias=False)
                 self.fc3 = nn.Linear(4, 4, bias=False)
@@ -1504,7 +1504,7 @@
 
         class MultipleOutputModule(nn.Module):
             def __init__(self):
-                super(MultipleOutputModule, self).__init__()
+                super().__init__()
 
                 def define_module():
                     return nn.Sequential(
@@ -1566,7 +1566,7 @@
 
         class NoGradModule(nn.Module):
             def __init__(self):
-                super(NoGradModule, self).__init__()
+                super().__init__()
                 self.fc1 = nn.Linear(2, 10, bias=False)
                 self.fc2 = nn.Linear(10, 4, bias=False)
                 self.relu = nn.ReLU()
@@ -1681,7 +1681,7 @@
 
         class TestModel(nn.Module):
             def __init__(self):
-                super(TestModel, self).__init__()
+                super().__init__()
                 self.fc1 = nn.Linear(2, 10, bias=False)
                 self.fc2 = nn.Linear(10, 4, bias=False)
                 self.relu = nn.ReLU()
@@ -2350,7 +2350,7 @@
 
 class NcclErrorHandlingTest(MultiProcessTestCase):
     def setUp(self):
-        super(NcclErrorHandlingTest, self).setUp()
+        super().setUp()
         # Need to skip return code checking for these tests since the child
         # processes don't exit cleanly.
         self.skip_return_code_checks = [
@@ -2365,7 +2365,7 @@
         self._spawn_processes()
 
     def tearDown(self):
-        super(NcclErrorHandlingTest, self).tearDown()
+        super().tearDown()
         try:
             os.remove(self.file_name)
         except OSError:
@@ -2593,14 +2593,14 @@
 
 
     def setUp(self):
-        super(CommTest, self).setUp()
+        super().setUp()
         # NCCL_BLOCKING_WAIT overrides NCCL_ASYNC_ERROR_HANDLING hence tests
         # that use NCCL_BLOCKING_WAIT will test it as expected.
         os.environ["NCCL_ASYNC_ERROR_HANDLING"] = "1"
         self._spawn_processes()
 
     def tearDown(self):
-        super(CommTest, self).tearDown()
+        super().tearDown()
         try:
             os.remove(self.file_name)
         except OSError:
diff --git a/test/distributed/test_c10d_object_collectives.py b/test/distributed/test_c10d_object_collectives.py
index eed8570..a132e69 100644
--- a/test/distributed/test_c10d_object_collectives.py
+++ b/test/distributed/test_c10d_object_collectives.py
@@ -41,7 +41,7 @@
 
 class TestObjectCollectives(MultiProcessTestCase):
     def setUp(self):
-        super(TestObjectCollectives, self).setUp()
+        super().setUp()
         os.environ["WORLD_SIZE"] = str(self.world_size)
         os.environ["BACKEND"] = BACKEND
         self._spawn_processes()
diff --git a/test/distributed/test_c10d_pypg.py b/test/distributed/test_c10d_pypg.py
index 9c9e0c4..32f3359 100644
--- a/test/distributed/test_c10d_pypg.py
+++ b/test/distributed/test_c10d_pypg.py
@@ -43,7 +43,7 @@
     This PG only supports world_size of 1
     """
     def __init__(self, rank, world, use_wrapper):
-        super(LonelyRankProcessGroup, self).__init__(rank, world)
+        super().__init__(rank, world)
         assert rank == 0
         assert world == 1
 
@@ -91,7 +91,7 @@
 # We cannot use parametrize as some tests are defined on the base class and use _get_process_group
 class AbstractDDPSingleRank(test_c10d_common.CommonDistributedDataParallelTest):
     def setUp(self):
-        super(AbstractDDPSingleRank, self).setUp()
+        super().setUp()
         self._spawn_processes()
 
     @property
@@ -99,7 +99,7 @@
         return 1
 
     def tearDown(self):
-        super(AbstractDDPSingleRank, self).tearDown()
+        super().tearDown()
         try:
             os.remove(self.file_name)
         except OSError:
diff --git a/test/distributed/test_c10d_spawn.py b/test/distributed/test_c10d_spawn.py
index 8e813b2..8ac496e 100644
--- a/test/distributed/test_c10d_spawn.py
+++ b/test/distributed/test_c10d_spawn.py
@@ -103,11 +103,11 @@
 
 class TestDistributedNNFunctions(MultiProcessTestCase):
     def setUp(self):
-        super(TestDistributedNNFunctions, self).setUp()
+        super().setUp()
         self._spawn_processes()
 
     def tearDown(self):
-        super(TestDistributedNNFunctions, self).tearDown()
+        super().tearDown()
         try:
             os.remove(self.file_name)
         except OSError:
diff --git a/test/distributed/test_c10d_spawn_gloo.py b/test/distributed/test_c10d_spawn_gloo.py
index fbff4cc..0be3fc2 100644
--- a/test/distributed/test_c10d_spawn_gloo.py
+++ b/test/distributed/test_c10d_spawn_gloo.py
@@ -155,7 +155,7 @@
 
         class Net(nn.Module):
             def __init__(self, input_dim, hidden_dim, output_dim, hidden_layers):
-                super(Net, self).__init__()
+                super().__init__()
                 self.input_dim = input_dim
                 self.hidden_dim = hidden_dim
                 self.output_dim = output_dim
diff --git a/test/distributed/test_data_parallel.py b/test/distributed/test_data_parallel.py
index c172034..3a062b8 100644
--- a/test/distributed/test_data_parallel.py
+++ b/test/distributed/test_data_parallel.py
@@ -33,7 +33,7 @@
     def test_data_parallel_buffers_requiring_grad(self):
         class TestModule(nn.Module):
             def __init__(self, t):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.register_buffer('t_rg', t)
                 self.register_buffer('t_not_rg', t.clone().detach())
 
@@ -57,7 +57,7 @@
         class TestModule(torch.nn.Module):
 
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.rnn = torch.nn.LSTM(300, 1024, 1, batch_first=True, bidirectional=True)
 
             def forward(self, x):
@@ -305,7 +305,7 @@
 
         class Model(nn.Module):
             def __init__(self):
-                super(Model, self).__init__()
+                super().__init__()
                 self.linear = nn.Linear(1, 1)
 
             def forward(self, x):
@@ -630,7 +630,7 @@
 
         class Net(torch.nn.Module):
             def __init__(self, testcase):
-                super(Net, self).__init__()
+                super().__init__()
                 self._testcase = testcase
 
             def forward(self, x):
@@ -648,11 +648,11 @@
     def test_autocast(self):
         class Model(torch.nn.Linear):
             def __init__(self):
-                super(Model, self).__init__(8, 8)
+                super().__init__(8, 8)
 
             @torch.cuda.amp.autocast()
             def forward(self, input):
-                return super(Model, self).forward(input)
+                return super().forward(input)
 
         model = dp.DataParallel(Model().cuda().to(dtype=torch.float32))
         input = torch.randn((8, 8), dtype=torch.float32, device="cuda")
@@ -672,7 +672,7 @@
     def test_strided_grad_layout(self):
         class ConvNet(nn.Module):
             def __init__(self, layouts, dtype_list):
-                super(ConvNet, self).__init__()
+                super().__init__()
                 self.dtypes = dtype_list
                 self.conv0 = torch.nn.Conv2d(8, 16, (2, 2)).to(memory_format=layouts[0], dtype=dtype_list[0])
                 self.conv1 = torch.nn.Conv2d(16, 32, (2, 2)).to(memory_format=layouts[1], dtype=dtype_list[1])
@@ -742,7 +742,7 @@
     def test_parameter_list_dict_replica(self):
         class MyMod(torch.nn.Module):
             def __init__(self, data, check_fn):
-                super(MyMod, self).__init__()
+                super().__init__()
                 self.data = data
                 self.check_fn = check_fn
 
@@ -800,7 +800,7 @@
     def test_data_parallel_module_kwargs_only(self, device, dtype):
         class Net(nn.Module):
             def __init__(self):
-                super(Net, self).__init__()
+                super().__init__()
                 self.l = l
 
             def forward(self, input):
@@ -820,7 +820,7 @@
     def test_data_parallel_module_kwargs_only_empty_list(self, device, dtype):
         class Net(nn.Module):
             def __init__(self):
-                super(Net, self).__init__()
+                super().__init__()
                 self.l = l
 
             def forward(self, input):
@@ -840,7 +840,7 @@
     def test_data_parallel_module_kwargs_only_empty_dict(self, device, dtype):
         class Net(nn.Module):
             def __init__(self):
-                super(Net, self).__init__()
+                super().__init__()
                 self.l = l
 
             def forward(self, input):
@@ -860,7 +860,7 @@
     def test_data_parallel_module_kwargs_only_empty_tuple(self, device, dtype):
         class Net(nn.Module):
             def __init__(self):
-                super(Net, self).__init__()
+                super().__init__()
                 self.l = l
 
             def forward(self, input):
diff --git a/test/distributed/test_dynamo_distributed.py b/test/distributed/test_dynamo_distributed.py
index cbea661..77fee11 100644
--- a/test/distributed/test_dynamo_distributed.py
+++ b/test/distributed/test_dynamo_distributed.py
@@ -62,7 +62,7 @@
 def get_custom_model(device):
     class MyCustomLinear(torch.nn.Module):
         def __init__(self):
-            super(MyCustomLinear, self).__init__()
+            super().__init__()
             self.weight = nn.Parameter(torch.randn(512, 512))
 
         def forward(self, x):
@@ -73,7 +73,7 @@
 
     class MyLinear(torch.nn.Module):
         def __init__(self):
-            super(MyLinear, self).__init__()
+            super().__init__()
             self.linear = torch.nn.Linear(512, 512)
 
         def forward(self, x):
@@ -81,7 +81,7 @@
 
     class MyModule(torch.nn.Module):
         def __init__(self):
-            super(MyModule, self).__init__()
+            super().__init__()
             mods = [
                 (MyLinear(), torch.nn.ReLU()),
                 # sandwich the custom in the middle so it comes before and after
diff --git a/test/distributed/test_pg_wrapper.py b/test/distributed/test_pg_wrapper.py
index c9bafe0..8bb176d 100644
--- a/test/distributed/test_pg_wrapper.py
+++ b/test/distributed/test_pg_wrapper.py
@@ -28,7 +28,7 @@
 
 class AbstractProcessGroupWrapperTest(MultiProcessTestCase):
     def setUp(self):
-        super(AbstractProcessGroupWrapperTest, self).setUp()
+        super().setUp()
         self._spawn_processes()
 
     def _validate_error(self, exception, op_type, rank, tensor):
@@ -335,9 +335,6 @@
 
 @requires_gloo()
 class ProcessGroupGlooWrapperTest(AbstractProcessGroupWrapperTest):
-    def setUp(self):
-        super(ProcessGroupGlooWrapperTest, self).setUp()
-
     def opts(self, threads=2, timeout=10.0):
         opts = c10d.ProcessGroupGloo._Options()
         opts._timeout = timeout
diff --git a/test/distributed/test_store.py b/test/distributed/test_store.py
index a479527..eb7afae 100644
--- a/test/distributed/test_store.py
+++ b/test/distributed/test_store.py
@@ -122,7 +122,7 @@
 
 class FileStoreTest(TestCase, StoreTestBase):
     def setUp(self):
-        super(FileStoreTest, self).setUp()
+        super().setUp()
         self.file = tempfile.NamedTemporaryFile(delete=False)
 
     def _create_store(self):
@@ -162,9 +162,6 @@
 
 @skip_if_win32()
 class HashStoreTest(TestCase, StoreTestBase):
-    def setUp(self):
-        super(HashStoreTest, self).setUp()
-
     def _create_store(self):
         store = dist.HashStore()
         store.set_timeout(timedelta(seconds=300))
@@ -186,7 +183,7 @@
 
 class PrefixFileStoreTest(TestCase, StoreTestBase):
     def setUp(self):
-        super(PrefixFileStoreTest, self).setUp()
+        super().setUp()
         self.file = tempfile.NamedTemporaryFile(delete=False)
         self.filestore = dist.FileStore(self.file.name, 1)
         self.prefix = "test_prefix"
@@ -317,7 +314,7 @@
 
 class PrefixTCPStoreTest(TestCase, StoreTestBase):
     def setUp(self):
-        super(PrefixTCPStoreTest, self).setUp()
+        super().setUp()
         self.tcpstore = create_tcp_store()
         self.prefix = "test_prefix"
         self.tcpstore.set_timeout(timedelta(seconds=300))
@@ -335,7 +332,7 @@
 
 class MyPythonStore(dist.Store):
     def __init__(self):
-        super(MyPythonStore, self).__init__()
+        super().__init__()
         self.store = {}
 
     def set(self, key, value):
@@ -358,9 +355,6 @@
 
 
 class PythonStoreTest(TestCase):
-    def setUp(self):
-        super(PythonStoreTest, self).setUp()
-
     def test_set_get(self):
         # If we were to inherit from StoreTestBase and try to use
         # its test_set_get function, we would exercise the Python
diff --git a/test/distributions/test_distributions.py b/test/distributions/test_distributions.py
index af3c706..836b595 100644
--- a/test/distributions/test_distributions.py
+++ b/test/distributions/test_distributions.py
@@ -797,7 +797,7 @@
     def setUp(self):
         """The tests assume that the validation flag is set."""
         torch.distributions.Distribution.set_default_validate_args(True)
-        super(DistributionsTestCase, self).setUp()
+        super().setUp()
 
 
 @skipIfTorchDynamo("Not a TorchDynamo suitable test")
@@ -3466,14 +3466,11 @@
 
 class TestDistributionShapes(DistributionsTestCase):
     def setUp(self):
-        super(TestDistributionShapes, self).setUp()
+        super().setUp()
         self.scalar_sample = 1
         self.tensor_sample_1 = torch.ones(3, 2)
         self.tensor_sample_2 = torch.ones(3, 2, 3)
 
-    def tearDown(self):
-        super(TestDistributionShapes, self).tearDown()
-
     def test_entropy_shape(self):
         for Dist, params in EXAMPLES:
             for i, param in enumerate(params):
@@ -3930,11 +3927,11 @@
 class TestKL(DistributionsTestCase):
 
     def setUp(self):
-        super(TestKL, self).setUp()
+        super().setUp()
 
         class Binomial30(Binomial):
             def __init__(self, probs):
-                super(Binomial30, self).__init__(30, probs)
+                super().__init__(30, probs)
 
         # These are pairs of distributions with 4 x 4 parameters as specified.
         # The first of the pair e.g. bernoulli[0] varies column-wise and the second
@@ -4593,7 +4590,7 @@
 # TODO: make this a pytest parameterized test
 class TestLazyLogitsInitialization(DistributionsTestCase):
     def setUp(self):
-        super(TestLazyLogitsInitialization, self).setUp()
+        super().setUp()
         # ContinuousBernoulli is not tested because log_prob is not computed simply
         # from 'logits', but 'probs' is also needed
         self.examples = [e for e in EXAMPLES if e.Dist in
@@ -4640,7 +4637,7 @@
 @unittest.skipIf(not TEST_NUMPY, "NumPy not found")
 class TestAgainstScipy(DistributionsTestCase):
     def setUp(self):
-        super(TestAgainstScipy, self).setUp()
+        super().setUp()
         positive_var = torch.randn(20).exp()
         positive_var2 = torch.randn(20).exp()
         random_var = torch.randn(20)
@@ -4931,9 +4928,6 @@
 
 
 class TestValidation(DistributionsTestCase):
-    def setUp(self):
-        super(TestValidation, self).setUp()
-
     def test_valid(self):
         for Dist, params in EXAMPLES:
             for param in params:
@@ -5021,9 +5015,6 @@
         with self.assertWarns(UserWarning):
             d.log_prob(sample)
 
-    def tearDown(self):
-        super(TestValidation, self).tearDown()
-
 
 class TestJit(DistributionsTestCase):
     def _examples(self):
diff --git a/test/dynamo/test_aot_autograd.py b/test/dynamo/test_aot_autograd.py
index a59df7c..f5476f1 100644
--- a/test/dynamo/test_aot_autograd.py
+++ b/test/dynamo/test_aot_autograd.py
@@ -122,7 +122,7 @@
     def test_call_fn_with_non_const_inputs_aot_safe(self):
         class ModuleSpecialFwd(torch.nn.Module):
             def __init__(self):
-                super(ModuleSpecialFwd, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(
                     in_channels=3, out_channels=20, kernel_size=(5, 5)
                 )
@@ -151,9 +151,6 @@
 
     def test_call_fn_with_non_const_inputs_aot_unsafe(self):
         class ModuleSpecialFwd(torch.nn.Module):
-            def __init__(self):
-                super(ModuleSpecialFwd, self).__init__()
-
             def _some_bad_fwd(self, param, y):
                 prev_grad = torch.is_grad_enabled()
                 try:
@@ -190,9 +187,6 @@
 
     def test_call_fn_with_non_const_inputs_aot_unsafe_control_flow(self):
         class ModuleSpecialFwd(torch.nn.Module):
-            def __init__(self):
-                super(ModuleSpecialFwd, self).__init__()
-
             def _some_bad_fwd(self, param, y):
                 if y[0][0] < 3:
                     return y + param
diff --git a/test/dynamo/test_backends.py b/test/dynamo/test_backends.py
index 82c30f4..0749bac 100644
--- a/test/dynamo/test_backends.py
+++ b/test/dynamo/test_backends.py
@@ -32,7 +32,7 @@
 
 class Conv_Bn_Relu(torch.nn.Module):
     def __init__(self, in_channels, out_channels, **kwargs):
-        super(Conv_Bn_Relu, self).__init__()
+        super().__init__()
         self.conv = torch.nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
         self.bn = torch.nn.BatchNorm2d(out_channels, eps=0.001)
         self.relu = torch.nn.ReLU()
diff --git a/test/dynamo/test_export.py b/test/dynamo/test_export.py
index 4069148..5a51399 100644
--- a/test/dynamo/test_export.py
+++ b/test/dynamo/test_export.py
@@ -897,9 +897,6 @@
         inp = torch.randn(4, 4)
 
         class MyBlock(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 x = torch.nn.functional.linear(x, torch.randn(4, 4))
                 return torch.cos(x).relu() + 1
@@ -1117,9 +1114,6 @@
             return torch.nonzero(x)
 
         class MyModule(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x, z):
                 y = helper_fn(x) + helper_fn(z)
                 return y
@@ -1488,9 +1482,6 @@
         from functorch.experimental.control_flow import cond
 
         class Module(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 def true_fn(x):
                     return x + x
@@ -1511,9 +1502,6 @@
         from functorch.experimental.control_flow import cond, map
 
         class Module(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def inner(self, x, pred):
                 def true_fn(x):
                     return x + x
@@ -1545,9 +1533,6 @@
         from functorch.experimental.control_flow import map
 
         class Module(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, xs):
                 def body(x):
                     return x + 1
@@ -1673,9 +1658,6 @@
     @patch.object(torch._dynamo.config, "capture_scalar_outputs", True)
     def test_export_cond_in_aten_symbolic(self):
         class ConditionOp(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def true_fn(self, x, y):
                 return x * y
 
diff --git a/test/dynamo/test_export_mutations.py b/test/dynamo/test_export_mutations.py
index 218935d..1bc5280 100644
--- a/test/dynamo/test_export_mutations.py
+++ b/test/dynamo/test_export_mutations.py
@@ -57,9 +57,6 @@
     def test_module_attribute_mutation_violation_positive_4(self):
         # Mutating attribute with an inline function
         class Foo(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def add(self, a, b):
                 return a + b
 
diff --git a/test/dynamo/test_functions.py b/test/dynamo/test_functions.py
index 0575415..811fbb4 100644
--- a/test/dynamo/test_functions.py
+++ b/test/dynamo/test_functions.py
@@ -782,9 +782,6 @@
 
 
 class ModuleWithDefaultTensorArgsMethod(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-
     def forward(self, x=torch.zeros((2, 2)), *, kw_x=torch.zeros((1, 2))):
         x.add_(1)
         kw_x.add_(1)
diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py
index 7f94e89..2f1c083 100644
--- a/test/dynamo/test_misc.py
+++ b/test/dynamo/test_misc.py
@@ -3488,7 +3488,7 @@
     def test_if_cond_nn_mod(self):
         class MockModule(torch.nn.Module):
             def __init__(self, output_relu=True):
-                super(MockModule, self).__init__()
+                super().__init__()
                 self.relu = torch.nn.ReLU() if output_relu else None
 
             def forward(self, x):
@@ -4167,9 +4167,6 @@
 
 
 class Module1(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-
     def forward(self, foo):
         return CustomFunc1().apply(foo)
 
@@ -4184,9 +4181,6 @@
 
 
 class Module3(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-
     def forward(self, foo):
         return CustomFunc2().apply(foo)
 
@@ -4201,9 +4195,6 @@
 
 
 class Module5(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-
     def forward(self, foo):
         return CustomFunc3().apply(foo)
 
@@ -4222,7 +4213,7 @@
         def fn():
             class Foo(torch.nn.Module):
                 def __init__(self):
-                    super(Foo, self).__init__()
+                    super().__init__()
                     self.a = 3
 
                 @torch.jit.export
diff --git a/test/dynamo/test_modules.py b/test/dynamo/test_modules.py
index 822b9fb..b43d036 100644
--- a/test/dynamo/test_modules.py
+++ b/test/dynamo/test_modules.py
@@ -604,9 +604,6 @@
 
 
 class ModuleAttributePrecedenceBase(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-
     def linear(self, x):
         return x * 2.0
 
@@ -1001,7 +998,7 @@
     def test_call_fn_with_non_const_inputs_safe(self):
         class ModuleSpecialFwd(torch.nn.Module):
             def __init__(self):
-                super(ModuleSpecialFwd, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(
                     in_channels=3, out_channels=20, kernel_size=(5, 5)
                 )
diff --git a/test/dynamo/test_optimizers.py b/test/dynamo/test_optimizers.py
index b8b5f99..62c3334 100644
--- a/test/dynamo/test_optimizers.py
+++ b/test/dynamo/test_optimizers.py
@@ -80,9 +80,6 @@
     # https://github.com/pytorch/torchdynamo/issues/1604
     def test_optimizing_over_tensor_with_requires_grad(self):
         class Net(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x, y):
                 z = torch.bmm(x, y)
                 z = torch.flatten(z, 1)
diff --git a/test/dynamo/test_repros.py b/test/dynamo/test_repros.py
index d203055..c8003ee 100644
--- a/test/dynamo/test_repros.py
+++ b/test/dynamo/test_repros.py
@@ -352,7 +352,7 @@
 class PartialT5(torch.nn.Module):
     # Highly simplified T5Attention prefix
     def __init__(self):
-        super(PartialT5, self).__init__()
+        super().__init__()
         self.q = torch.nn.Linear(512, 512)
         self.k = torch.nn.Linear(512, 512)
         self.v = torch.nn.Linear(512, 512)
@@ -461,7 +461,7 @@
 
 class FakeMamlInner(torch.nn.Module):
     def __init__(self):
-        super(FakeMamlInner, self).__init__()
+        super().__init__()
         self.linear = torch.nn.Linear(784, 5)
 
     def forward(self, x, ignored=None, bn_training=False):
@@ -471,7 +471,7 @@
 class PartialMaml(torch.nn.Module):
     # Highly simplified version of maml.meta.Meta.finetuning
     def __init__(self):
-        super(PartialMaml, self).__init__()
+        super().__init__()
         self.net = FakeMamlInner()
         self.update_step_test = 10
         self.update_lr = 0.4
@@ -571,9 +571,6 @@
 class SequentialAppendList(torch.nn.Sequential):
     """from timm/models/vovnet.py"""
 
-    def __init__(self, *args):
-        super(SequentialAppendList, self).__init__(*args)
-
     def forward(self, x: torch.Tensor, concat_list: List[torch.Tensor]) -> torch.Tensor:
         for i, module in enumerate(self):
             if i == 0:
@@ -597,7 +594,7 @@
         act_layer=torch.nn.ReLU,
         inplace=True,
     ):
-        super(BatchNormAct2d, self).__init__(
+        super().__init__(
             num_features,
             eps=eps,
             momentum=momentum,
@@ -693,7 +690,7 @@
 
 class FeedForwardLayer(nn.Module):
     def __init__(self, d_model, dim_feedforward, activation, dropout) -> None:
-        super(FeedForwardLayer, self).__init__()
+        super().__init__()
         self.linear1 = nn.Linear(d_model, dim_feedforward)
         self.activation = activation
         self.dropout1 = nn.Dropout(dropout)
@@ -716,7 +713,7 @@
         activation=nn.ReLU(),
         layer_norm_eps=1e-5,
     ):
-        super(TransformerEncoderLayer, self).__init__()
+        super().__init__()
         self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
         self.norm1 = nn.LayerNorm(d_model, eps=layer_norm_eps)
         self.norm2 = nn.LayerNorm(d_model, eps=layer_norm_eps)
diff --git a/test/dynamo/test_verify_correctness.py b/test/dynamo/test_verify_correctness.py
index e05eb3f..f3b3044 100644
--- a/test/dynamo/test_verify_correctness.py
+++ b/test/dynamo/test_verify_correctness.py
@@ -28,7 +28,7 @@
 
 class Conv_Bn_Relu(torch.nn.Module):
     def __init__(self, in_channels, out_channels, **kwargs):
-        super(Conv_Bn_Relu, self).__init__()
+        super().__init__()
         self.conv = torch.nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
         self.bn = torch.nn.BatchNorm2d(out_channels, eps=0.001)
         self.relu = torch.nn.ReLU()
diff --git a/test/functorch/test_aotdispatch.py b/test/functorch/test_aotdispatch.py
index ebf8358..56f59c8 100644
--- a/test/functorch/test_aotdispatch.py
+++ b/test/functorch/test_aotdispatch.py
@@ -2183,9 +2183,6 @@
         fake_z = fake_mode.from_tensor(real_z)
 
         class MockModule(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 # Accessing a free variable fake tensor will look like a
                 # constant to make_fx, and result in the tensor being traced
diff --git a/test/functorch/test_eager_transforms.py b/test/functorch/test_eager_transforms.py
index bb6eafb..5ee4653 100644
--- a/test/functorch/test_eager_transforms.py
+++ b/test/functorch/test_eager_transforms.py
@@ -3414,7 +3414,7 @@
     def test_correctness_mnist(self, mechanism):
         class Net(nn.Module):
             def __init__(self):
-                super(Net, self).__init__()
+                super().__init__()
                 self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
                 self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
                 self.conv2_drop = nn.Dropout2d()
@@ -3573,7 +3573,7 @@
     def test_maml_regression(self, device, mechanism):
         class ThreeLayerNet(nn.Module):
             def __init__(self):
-                super(ThreeLayerNet, self).__init__()
+                super().__init__()
                 self.fc1 = nn.Linear(1, 40)
                 self.relu1 = nn.ReLU()
                 self.fc2 = nn.Linear(40, 40)
diff --git a/test/fx/test_dce_pass.py b/test/fx/test_dce_pass.py
index 4f46b99..b807404 100644
--- a/test/fx/test_dce_pass.py
+++ b/test/fx/test_dce_pass.py
@@ -119,9 +119,6 @@
         """
 
         class TestModule(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x, y):
                 return x + 7
 
@@ -136,9 +133,6 @@
         """
 
         class TestModule(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x, y):
                 a = y + 2
                 return x + 7
@@ -172,9 +166,6 @@
         """
 
         class TestModule(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, a: torch.Tensor) -> torch.Tensor:
                 torch._assert(torch.equal(a, a), "a must equal a")
                 return a * 2
diff --git a/test/fx/test_fx_const_fold.py b/test/fx/test_fx_const_fold.py
index 5b50930..9641a1f 100644
--- a/test/fx/test_fx_const_fold.py
+++ b/test/fx/test_fx_const_fold.py
@@ -133,9 +133,6 @@
         """
 
         class ConstFoldTestModule(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x, y):
                 return x * 2 + y
 
diff --git a/test/fx/test_gradual_type.py b/test/fx/test_gradual_type.py
index 131debf..1e678de 100644
--- a/test/fx/test_gradual_type.py
+++ b/test/fx/test_gradual_type.py
@@ -278,7 +278,7 @@
         class BasicBlock(torch.nn.Module):
 
             def __init__(self, inplanes, planes):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 norm_layer = torch.nn.BatchNorm2d
                 self.bn1 = norm_layer(planes)
 
@@ -309,7 +309,7 @@
         class BasicBlock(torch.nn.Module):
 
             def __init__(self, inplanes, planes):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 norm_layer = torch.nn.BatchNorm2d
                 self.bn1 = norm_layer(planes)
 
@@ -331,7 +331,7 @@
         class BasicBlock(torch.nn.Module):
 
             def __init__(self, inplanes, planes):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 norm_layer = torch.nn.BatchNorm2d
                 self.bn1 = norm_layer(planes)
 
@@ -368,7 +368,7 @@
     def test_type_check_conv2D(self):
         class BasicBlock(torch.nn.Module):
             def __init__(self, inplanes, planes, stride=1):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 norm_layer = torch.nn.BatchNorm2d
                 self.conv1 = conv3x3(inplanes, planes, stride)
                 self.bn1 = norm_layer(planes)
@@ -398,7 +398,7 @@
     def test_type_check_conv2D_2(self):
         class BasicBlock(torch.nn.Module):
             def __init__(self, inplanes, planes, stride=1):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 norm_layer = torch.nn.BatchNorm2d
                 self.conv1 = conv3x3(inplanes, planes, stride)
                 self.bn1 = norm_layer(planes)
@@ -466,7 +466,7 @@
 
             class BasicBlock(torch.nn.Module):
                 def __init__(self, in_planes, out_planes, kernel_size, stride, padding, groups, dilation):
-                    super(BasicBlock, self).__init__()
+                    super().__init__()
                     self.conv1 = torch.nn.Conv2d(in_channels=in_planes, out_channels=out_planes,
                                                  kernel_size=kernel_size, stride=stride,
                                                  padding=padding, groups=groups, bias=False, dilation=dilation)
@@ -496,7 +496,7 @@
             # test with intermediate annotations
             class BasicBlock(torch.nn.Module):
                 def __init__(self, in_planes, out_planes, kernel_size, stride, padding, groups, dilation):
-                    super(BasicBlock, self).__init__()
+                    super().__init__()
                     self.conv1 = torch.nn.Conv2d(in_channels=in_planes, out_channels=out_planes,
                                                  kernel_size=kernel_size, stride=stride,
                                                  padding=padding, groups=groups, bias=False, dilation=dilation)
@@ -529,7 +529,7 @@
 
             def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
                          base_width=64, dilation=1):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 norm_layer = torch.nn.BatchNorm2d
                 if groups != 1 or base_width != 64:
                     raise ValueError('BasicBlock only supports groups=1 and base_width=64')
@@ -580,7 +580,7 @@
 
         class BasicBlock(torch.nn.Module):
             def __init__(self):
-                super(BasicBlock, self).__init__()
+                super().__init__()
 
                 self.conv1 = torch.nn.Conv2d(3, 6, 5)
                 self.pool = torch.nn.MaxPool2d(2, 2)
@@ -664,7 +664,7 @@
 
         class BasicBlock(torch.nn.Module):
             def __init__(self):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 self.pool = torch.nn.MaxPool2d(5, 8)
 
             def forward(self, x : TensorType((64, 8, 8))):
@@ -706,7 +706,7 @@
 
             class BasicBlock(torch.nn.Module):
                 def __init__(self, kernel_size, stride, padding, dilation):
-                    super(BasicBlock, self).__init__()
+                    super().__init__()
                     self.pool = torch.nn.MaxPool2d(kernel_size, stride=stride,
                                                    padding=padding, dilation=dilation,
                                                    return_indices=False, ceil_mode=False)
@@ -736,7 +736,7 @@
             # test with intermediate annotations
             class BasicBlock(torch.nn.Module):
                 def __init__(self, kernel_size, stride, padding, dilation):
-                    super(BasicBlock, self).__init__()
+                    super().__init__()
                     self.pool = torch.nn.MaxPool2d(kernel_size, stride=stride,
                                                    padding=padding, dilation=dilation,
                                                    return_indices=False, ceil_mode=False)
@@ -787,7 +787,7 @@
 
             class BasicBlock(torch.nn.Module):
                 def __init__(self, start, end):
-                    super(BasicBlock, self).__init__()
+                    super().__init__()
                     self.start = start
                     self.end = end
 
@@ -865,7 +865,7 @@
         class BasicBlock(torch.nn.Module):
 
             def __init__(self, inplanes, planes):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 norm_layer = torch.nn.BatchNorm2d
                 self.bn1 = norm_layer(planes)
 
@@ -947,7 +947,7 @@
     def test_type_check_conv2D_types(self):
         class BasicBlock(torch.nn.Module):
             def __init__(self, inplanes, planes, stride=1):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 norm_layer = torch.nn.BatchNorm2d
                 self.conv1 = conv3x3(inplanes, planes, stride)
                 self.bn1 = norm_layer(planes)
@@ -976,7 +976,7 @@
 
         class BasicBlock(torch.nn.Module):
             def __init__(self):
-                super(BasicBlock, self).__init__()
+                super().__init__()
 
                 self.conv1 = torch.nn.Conv2d(3, 6, 5)
                 self.pool = torch.nn.MaxPool2d(2, 2)
diff --git a/test/fx/test_pass_infra.py b/test/fx/test_pass_infra.py
index b14eddb..9cb6dc3 100644
--- a/test/fx/test_pass_infra.py
+++ b/test/fx/test_pass_infra.py
@@ -52,9 +52,6 @@
 
 
 class AddModule(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-
     def forward(self, x):
         y = torch.add(x, x)
         z = torch.add(y, x)
diff --git a/test/fx/test_subgraph_rewriter.py b/test/fx/test_subgraph_rewriter.py
index 77c081f..da9e4c6 100644
--- a/test/fx/test_subgraph_rewriter.py
+++ b/test/fx/test_subgraph_rewriter.py
@@ -775,9 +775,6 @@
 
     def test_replace_pattern_with_filters(self):
         class M(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x, scale, zero_point):
                 # Match, second input to add is a scalar
                 x = x.dequantize()
diff --git a/test/fx/test_z3_gradual_types.py b/test/fx/test_z3_gradual_types.py
index d6fa610..f9f2e8e 100644
--- a/test/fx/test_z3_gradual_types.py
+++ b/test/fx/test_z3_gradual_types.py
@@ -33,9 +33,6 @@
 
     def test_dim(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x: TensorType([1, 2])):
                 y = x.dim()
                 return y
@@ -56,9 +53,6 @@
         """
 
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x: Dyn):
                 y = x.view(100)
                 tmp = y.size()[0]
@@ -82,9 +76,6 @@
         test dimensions and equalities
         """
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([32, 4, 4])):
                 eq = x.dim() == 3
                 return eq
@@ -111,9 +102,6 @@
 
         """
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([32, 4, 4]), y: TensorType([32, 4, 4])):
                 size_5 = x.size()
                 getitem_7 = size_5[0]
@@ -138,9 +126,6 @@
 
     def test_bmm(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([Dyn, 2, 3]), y: TensorType([1, 3, 2])):
                 bmm = torch.bmm(x, y)
                 return bmm
@@ -161,9 +146,6 @@
 
     def test_bmm2(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: Dyn, y: TensorType([1, 3, 2])):
                 bmm = torch.bmm(x, y)
                 return bmm
@@ -183,9 +165,6 @@
 
     def test_bmm3(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([2, 3, 3]), y: TensorType([1, 3, 2])):
                 bmm = torch.bmm(x, y)
                 return bmm
@@ -200,9 +179,6 @@
 
     def test_transpose(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([1, 2, 3, 4])):
                 transpose = x.transpose(0, 1)
                 return transpose
@@ -235,9 +211,6 @@
 
     def test_index_select(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([2050, 1024]), y: Dyn):
                 index_select = x.index_select(0, y)
                 return index_select
@@ -269,9 +242,6 @@
 
     def test_get_attr(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([1, 2, 3])):
                 getattr = x.device
                 to = x.to(getattr)
@@ -291,9 +261,6 @@
 
     def test_expand(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([1, 4])):
                 size = x.size()
                 getitem = size[-1]
@@ -328,9 +295,6 @@
 
     def test_getitem_tensor(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([4, 4])):
                 getitem = x[(None, None, slice(None, None, None), slice(None, None, None))]
                 return getitem
@@ -366,9 +330,6 @@
 
     def test_getitem_tensor2(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([4, 4])):
                 getitem = x[(None, None)]
                 return getitem
@@ -390,9 +351,6 @@
 
     def test_getitem_tensor_3(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([4, 4])):
                 getitem = x[(None, slice(None, None, None), None, slice(None, None, None))]
                 return getitem
@@ -416,7 +374,7 @@
 
         class BasicBlock(torch.nn.Module):
             def __init__(self):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 self.l = torch.nn.LayerNorm((1024,))
 
             def forward(self, x: Dyn):
@@ -472,9 +430,6 @@
     def test_layer_norm_functional(self):
 
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: Dyn):
                 return torch.nn.functional.layer_norm(x, (1024,))
 
@@ -502,9 +457,6 @@
     def test_ne_int_long_type_as(self):
 
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([Dyn, Dyn]), y: TensorType([Dyn, Dyn])):
                 ne_int = torch.ne(x, y).int()
                 type_as = ne_int.type_as(y)
@@ -539,9 +491,6 @@
         d1, d2 = D(s11, s1), D(0, s2)
 
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: Dyn, y: Dyn):
                 return torch.ne(x, y)
 
@@ -580,9 +529,6 @@
 
     def test_cumsum(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([Dyn, 4, 3])):
                 t = torch.cumsum(x, 3)
                 return t
@@ -634,9 +580,6 @@
 
     def test_cumsum_kwargs(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([Dyn, 4, 3])):
                 t = torch.cumsum(x, dim=3)
                 return t
@@ -662,9 +605,6 @@
 
     def test_arange(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([2, 4])):
                 size = x.size()
                 getitem = size[-1]
@@ -703,9 +643,6 @@
 
     def test_scalar_add(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([2, 4])):
                 size = x.size()
                 getitem = size[-1]
@@ -726,9 +663,6 @@
 
     def test_regular_add_2(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([2, 4])):
                 to = x.to()
                 size = to.size()
@@ -749,9 +683,6 @@
 
     def test_regular_add_3(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([2, 4])):
                 to = x.to()
                 size = to.size()
@@ -772,7 +703,7 @@
     def test_embedding(self):
         class BasicBlock(torch.nn.Module):
             def __init__(self):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 self.embedding = torch.nn.Embedding(256008, 1024, padding_idx=1)
 
             def forward(self, x: TensorType([2, 4])):
@@ -820,9 +751,6 @@
 
     def test_embedding_2(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([2, 4]), y: TensorType([Dyn, 1024])):
                 return torch.nn.functional.embedding(x, y)
 
@@ -842,9 +770,6 @@
 
     def test_size_two_args(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([Dyn, 2, Dyn])):
                 size = x.size(-1)
                 return size
@@ -874,9 +799,6 @@
 
     def test_size_getitem(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: Dyn):
                 size = x.size()
                 getitem = size[-1]
@@ -912,9 +834,6 @@
 
         # invalid index but should still be SAT because input will be Dyn
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: Dyn):
                 size = x.size()
                 getitem = size[-10]
@@ -935,7 +854,7 @@
     def test_view_mul(self):
         class BasicBlock(torch.nn.Module):
             def __init__(self):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 self.embed_tokens = torch.nn.Embedding(256008, 1024, padding_idx=1)
 
             def forward(self, x: TensorType([2, 4])):
@@ -974,9 +893,6 @@
 
     def test_gt(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([Dyn, 4])):
                 size = x.size()
                 getitem_1 = size[-1]
@@ -996,9 +912,6 @@
 
     def test_view(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([2, 4])):
                 view = x.view(-1, 8)
                 return view
@@ -1014,9 +927,6 @@
 
     def test_lt_tensor(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([2, 4]), y: Dyn):
                 lt = x > y
                 return lt
@@ -1036,9 +946,6 @@
         Test condition after making the wrong assumption about the input
         """
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: Dyn):
                 gt = x > 1
                 return gt
@@ -1067,7 +974,7 @@
         """
         class BasicBlock(torch.nn.Module):
             def __init__(self):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 self.embed_tokens = torch.nn.Embedding(256008, 1024, padding_idx=1)
 
             def forward(self, x: TensorType([Dyn, 4])):
@@ -1127,7 +1034,7 @@
         """
         class BasicBlock(torch.nn.Module):
             def __init__(self):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 self.embed_tokens = torch.nn.Embedding(256008, 1024, padding_idx=1)
 
             def forward(self, x: TensorType([Dyn, 4])):
@@ -1157,9 +1064,6 @@
 
     def test_masked_fill(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([2, 4])):
                 size = x.size()
                 getitem = size[-1]
@@ -1203,9 +1107,6 @@
 
     def test_add_reshape_1(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: Dyn, y: Dyn):
                 return torch.add(torch.reshape(x, (1, 2)), torch.reshape(y, (2, 2)))
 
@@ -1221,9 +1122,6 @@
 
     def test_add_reshape_2(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: Dyn, y: Dyn):
                 return torch.add(torch.reshape(x, (-1, 2)), torch.reshape(y, (2, 2, 2)))
 
@@ -1239,7 +1137,7 @@
     def test_conv_reshape_add_0(self):
         class BasicBlock(torch.nn.Module):
             def __init__(self, in_planes, out_planes, kernel_size, stride, padding, groups, dilation):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(in_channels=in_planes, out_channels=out_planes,
                                              kernel_size=kernel_size, stride=stride,
                                              padding=padding, groups=groups, bias=False, dilation=dilation)
@@ -1260,7 +1158,7 @@
     def test_conv_reshape_add_0_2(self):
         class BasicBlock(torch.nn.Module):
             def __init__(self, in_planes, out_planes, kernel_size, stride, padding, groups, dilation):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(in_channels=in_planes, out_channels=out_planes,
                                              kernel_size=kernel_size, stride=stride,
                                              padding=padding, groups=groups, bias=False, dilation=dilation)
@@ -1312,7 +1210,7 @@
     def test_conv_reshape_add_0_3(self):
         class BasicBlock(torch.nn.Module):
             def __init__(self, in_planes, out_planes, kernel_size, stride, padding, groups, dilation):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(in_channels=in_planes, out_channels=out_planes,
                                              kernel_size=kernel_size, stride=stride,
                                              padding=padding, groups=groups, bias=False, dilation=dilation)
@@ -1333,7 +1231,7 @@
     def test_conv_reshape_add_1(self):
         class BasicBlock(torch.nn.Module):
             def __init__(self, in_planes, out_planes, kernel_size, stride, padding, groups, dilation):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(in_channels=in_planes, out_channels=out_planes,
                                              kernel_size=kernel_size, stride=stride,
                                              padding=padding, groups=groups, bias=False, dilation=dilation)
@@ -1356,7 +1254,7 @@
 
         class BasicBlock(torch.nn.Module):
             def __init__(self, in_planes, out_planes, kernel_size, stride, padding, groups, dilation):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(in_channels=in_planes, out_channels=out_planes,
                                              kernel_size=kernel_size, stride=stride,
                                              padding=padding, groups=groups, bias=False, dilation=dilation)
@@ -1376,7 +1274,7 @@
     def test_conv_reshape0(self):
         class BasicBlock(torch.nn.Module):
             def __init__(self, in_planes, out_planes, kernel_size, stride, padding, groups, dilation):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(in_channels=in_planes, out_channels=out_planes,
                                              kernel_size=kernel_size, stride=stride,
                                              padding=padding, groups=groups, bias=False, dilation=dilation)
@@ -1429,7 +1327,7 @@
     def test_conv_reshape1(self):
         class BasicBlock(torch.nn.Module):
             def __init__(self, in_planes, out_planes, kernel_size, stride, padding, groups, dilation):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(in_channels=in_planes, out_channels=out_planes,
                                              kernel_size=kernel_size, stride=stride,
                                              padding=padding, groups=groups, bias=False, dilation=dilation)
@@ -1467,7 +1365,7 @@
     def test_conv_wrong_example(self):
         class BasicBlock(torch.nn.Module):
             def __init__(self):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(in_channels=2, out_channels=2,
                                              kernel_size=2, stride=2,
                                              padding=2, groups=2, bias=False, dilation=2)
@@ -1515,7 +1413,7 @@
 
         class BasicBlock(torch.nn.Module):
             def __init__(self, in_planes, out_planes, kernel_size, stride, padding, groups, dilation):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(in_channels=in_planes, out_channels=out_planes,
                                              kernel_size=kernel_size, stride=stride,
                                              padding=padding, groups=groups, bias=False, dilation=dilation)
@@ -1565,9 +1463,6 @@
         d1, d2, d3, d4 = D(s11, s1), D(s22, s2), D(s33, s3), D(s44, s4),
 
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: Dyn, y: Dyn):
                 return torch.add(x, y)
 
@@ -1595,9 +1490,6 @@
         self.assertEqual(s.check(), z3.sat)
 
         class BasicBlock2(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock2, self).__init__()
-
             def forward(self, x: TensorType((Dyn,)), y: Dyn):
                 return torch.add(x, y)
 
@@ -1621,9 +1513,6 @@
         self.assertEqual(s.check(), z3.unsat)
 
         class BasicBlock3(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock3, self).__init__()
-
             def forward(self, x: TensorType((Dyn,)), y: Dyn):
                 return torch.add(x, y)
 
@@ -1642,9 +1531,6 @@
         s1, s2, s3, s4 = z3.Ints('s1 s2 s3 s4')
 
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType((Dyn,)), y: TensorType((Dyn, Dyn))):
                 return torch.add(x, y)
 
@@ -1669,9 +1555,6 @@
         s1, s2, s3, s4 = z3.Ints('s1 s2 s3 s4')
 
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([Dyn, Dyn]), y: TensorType([Dyn])):
                 return torch.add(x, y)
 
@@ -1720,9 +1603,6 @@
         s1, s2, s3, s4 = z3.Ints('s1 s2 s3 s4')
 
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([Dyn, 1]), y: TensorType([Dyn])):
                 return torch.add(x, y)
 
@@ -1755,9 +1635,6 @@
 
     def test_add_padding_4(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([2, 1]), y: TensorType([3])):
                 return torch.add(x, y)
 
@@ -1777,9 +1654,6 @@
 
     def test_add_padding_5(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([2, 2]), y: TensorType([3])):
                 return torch.add(x, y)
 
@@ -1796,9 +1670,6 @@
     def test_add_size_3(self):
 
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([Dyn, Dyn, Dyn]), y: TensorType([Dyn, Dyn, Dyn])):
                 return torch.add(x, y)
 
@@ -1829,9 +1700,6 @@
     def test_add_padding_6(self):
 
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([Dyn]), y: TensorType([Dyn, Dyn, Dyn])):
                 return torch.add(x, y)
 
@@ -1862,9 +1730,6 @@
     def test_add_padding_7(self):
 
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([Dyn]), y: TensorType([Dyn, Dyn, Dyn, Dyn])):
                 return torch.add(x, y)
 
@@ -1885,9 +1750,6 @@
     def test_add_padding_8(self):
 
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([Dyn]), y: TensorType([Dyn, Dyn, Dyn, Dyn])):
                 return torch.add(x, y)
 
@@ -1914,9 +1776,6 @@
     def test_add_padding_9(self):
 
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: Dyn, y: TensorType([Dyn, Dyn, Dyn, Dyn])):
                 return torch.add(x, y)
 
@@ -1958,7 +1817,7 @@
 
         class BasicBlock(torch.nn.Module):
             def __init__(self, in_planes, out_planes, kernel_size, stride, padding, groups, dilation):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(in_channels=in_planes, out_channels=out_planes,
                                              kernel_size=kernel_size, stride=stride,
                                              padding=padding, dilation=dilation)
@@ -2008,9 +1867,6 @@
         s11, s22, s33, s44 = z3.Ints('s11 s22 s33 s44')
 
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: Dyn):
                 return torch.reshape(x, (2, -1))
 
@@ -2037,9 +1893,6 @@
         d1, d2, d3, d4 = D(s11, s1), D(s22, s2), D(s33, s3), D(s44, s4),
 
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([Dyn])):
                 return torch.reshape(x, (2, -1))
 
@@ -2058,9 +1911,6 @@
         s11, s22, s33, s44 = z3.Ints('s11 s22 s33 s44')
 
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: TensorType([Dyn])):
                 return torch.reshape(x, (2, 3))
 
@@ -2083,9 +1933,6 @@
         s11, s22, s33, s44 = z3.Ints('s11 s22 s33 s44')
 
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: Dyn):
                 return torch.reshape(x, (2, 3, 1, 1))
 
@@ -2107,7 +1954,7 @@
     def test_conv2D_maxpool2d_flatten(self):
         class BasicBlock(torch.nn.Module):
             def __init__(self):
-                super(BasicBlock, self).__init__()
+                super().__init__()
 
                 self.conv1 = torch.nn.Conv2d(3, 6, 5)
                 self.pool = torch.nn.MaxPool2d(2, 2)
@@ -2144,7 +1991,7 @@
     def test_conv2D_maxpool2d_flatten_unsat(self):
         class BasicBlock(torch.nn.Module):
             def __init__(self):
-                super(BasicBlock, self).__init__()
+                super().__init__()
 
                 self.conv1 = torch.nn.Conv2d(3, 6, 5)
                 self.pool = torch.nn.MaxPool2d(2, 2)
@@ -2177,7 +2024,7 @@
     def test_conv2D_maxpool2d_flatten_dyn(self):
         class BasicBlock(torch.nn.Module):
             def __init__(self):
-                super(BasicBlock, self).__init__()
+                super().__init__()
 
                 self.conv1 = torch.nn.Conv2d(3, 6, 5)
                 self.pool = torch.nn.MaxPool2d(2, 2)
@@ -2257,9 +2104,6 @@
 
     def test_add_reshape(self):
         class BasicBlock(torch.nn.Module):
-            def __init__(self):
-                super(BasicBlock, self).__init__()
-
             def forward(self, x: Dyn, y: Dyn):
                 return torch.add(torch.reshape(x, (1, 2)), torch.reshape(y, (2, 2)))
 
@@ -2275,7 +2119,7 @@
     def test_conv_reshape_add(self):
         class BasicBlock(torch.nn.Module):
             def __init__(self, in_planes, out_planes, kernel_size, stride, padding, groups, dilation):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(in_channels=in_planes, out_channels=out_planes,
                                              kernel_size=kernel_size, stride=stride,
                                              padding=padding, groups=groups, bias=False, dilation=dilation)
diff --git a/test/inductor/test_smoke.py b/test/inductor/test_smoke.py
index 8907972..9f23e12 100644
--- a/test/inductor/test_smoke.py
+++ b/test/inductor/test_smoke.py
@@ -9,7 +9,7 @@
 
 class MLP(torch.nn.Module):
     def __init__(self):
-        super(MLP, self).__init__()
+        super().__init__()
         self.l1 = torch.nn.Linear(1, 6)
         self.l2 = torch.nn.Linear(6, 1)
 
diff --git a/test/inductor/test_torchinductor.py b/test/inductor/test_torchinductor.py
index 7455bd3..23f2139 100644
--- a/test/inductor/test_torchinductor.py
+++ b/test/inductor/test_torchinductor.py
@@ -1661,9 +1661,6 @@
 
     def test_shape_prop_torch_ones(self):
         class Model(torch.nn.Module):
-            def __init__(self):
-                super(Model, self).__init__()
-
             def forward(self, attention_scores):
                 extended_attention_mask = torch.ones(
                     8, 1, 1, 512, device=attention_scores.device
@@ -1756,7 +1753,7 @@
                 dtype=None,
             ):
                 factory_kwargs = {"device": device, "dtype": dtype}
-                super(BatchNorm, self).__init__(
+                super().__init__(
                     num_features,
                     eps=eps,
                     momentum=momentum,
@@ -1831,7 +1828,7 @@
                 self,
                 **kwargs,
             ):
-                super(M, self).__init__()
+                super().__init__()
                 self.upsample = torch.nn.UpsamplingNearest2d(scale_factor=2)
                 self.conv = torch.nn.Conv2d(
                     8,
@@ -1889,7 +1886,7 @@
                 out_channels,
                 **kwargs,
             ):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(
                     in_channels,
                     out_channels,
@@ -1970,7 +1967,7 @@
                 bias,
                 **kwargs,
             ):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(
                     in_channels,
                     out_channels,
@@ -2072,7 +2069,7 @@
                 bias,
                 **kwargs,
             ):
-                super(M, self).__init__()
+                super().__init__()
                 self.linear = torch.nn.Linear(
                     in_features,
                     out_features,
@@ -2102,7 +2099,7 @@
     def test_linear_binary(self):
         class M(torch.nn.Module):
             def __init__(self, eltwise_fn, in_channels, out_channels, bias, **kwargs):
-                super(M, self).__init__()
+                super().__init__()
                 self.linear = torch.nn.Linear(
                     in_channels, out_channels, bias=bias, **kwargs
                 )
@@ -2152,7 +2149,7 @@
                 out_channels,
                 **kwargs,
             ):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv_transpose2d = torch.nn.ConvTranspose2d(
                     in_channels,
                     out_channels,
@@ -3184,7 +3181,7 @@
                 self,
                 **kwargs,
             ):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(
                     64,
                     5,
@@ -6338,7 +6335,7 @@
 
             class Model(torch.nn.Module):
                 def __init__(self):
-                    super(Model, self).__init__()
+                    super().__init__()
                     self.linear = torch.nn.Linear(
                         in_features=256, out_features=1536, bias=True
                     )
@@ -6499,9 +6496,6 @@
         @config.patch(permute_fusion=True)
         def test_permute_fusion(self):
             class Repro(torch.nn.Module):
-                def __init__(self):
-                    super().__init__()
-
                 def forward(self, view, reshape_2):
                     permute = view.permute(0, 2, 1)
                     view = None
@@ -6679,9 +6673,6 @@
         @requires_cuda()
         def test_unspec_inputs_interop(self):
             class Repro(torch.nn.Module):
-                def __init__(self):
-                    super().__init__()
-
                 def forward(self, x, y):
                     unsqueeze = torch.ops.aten.unsqueeze.default(x, 4)
                     permute = torch.ops.aten.permute.default(unsqueeze, [0, 1, 2, 4, 3])
@@ -6774,9 +6765,6 @@
         @config.patch(tune_layout=True)
         def test_tune_layout(self):
             class Repro(torch.nn.Module):
-                def __init__(self):
-                    super().__init__()
-
                 def forward(self, arg1_1, unsqueeze, unsqueeze_1):
                     convolution_1 = torch.ops.aten.convolution.default(
                         unsqueeze,
@@ -6809,7 +6797,7 @@
         def test_inplace_updates_cudagraphs(self):
             class Repro(torch.nn.Module):
                 def __init__(self):
-                    super(Repro, self).__init__()
+                    super().__init__()
                     self.weight1 = torch.nn.Parameter(
                         torch.randn(10, 20, requires_grad=True)
                     )
diff --git a/test/jit/fixtures_srcs/fixtures_src.py b/test/jit/fixtures_srcs/fixtures_src.py
index 52b9bf0..afba178 100644
--- a/test/jit/fixtures_srcs/fixtures_src.py
+++ b/test/jit/fixtures_srcs/fixtures_src.py
@@ -2,9 +2,6 @@
 from typing import Union
 
 class TestVersionedDivTensorExampleV7(torch.nn.Module):
-    def __init__(self):
-        super(TestVersionedDivTensorExampleV7, self).__init__()
-
     def forward(self, a, b):
         result_0 = a / b
         result_1 = torch.div(a, b)
@@ -12,74 +9,47 @@
         return result_0, result_1, result_2
 
 class TestVersionedLinspaceV7(torch.nn.Module):
-    def __init__(self):
-        super(TestVersionedLinspaceV7, self).__init__()
-
     def forward(self, a: Union[int, float, complex], b: Union[int, float, complex]):
         c = torch.linspace(a, b, steps=5)
         d = torch.linspace(a, b)
         return c, d
 
 class TestVersionedLinspaceOutV7(torch.nn.Module):
-    def __init__(self):
-        super(TestVersionedLinspaceOutV7, self).__init__()
-
     def forward(self, a: Union[int, float, complex], b: Union[int, float, complex], out: torch.Tensor):
         return torch.linspace(a, b, out=out)
 
 class TestVersionedLogspaceV8(torch.nn.Module):
-    def __init__(self):
-        super(TestVersionedLogspaceV8, self).__init__()
-
     def forward(self, a: Union[int, float, complex], b: Union[int, float, complex]):
         c = torch.logspace(a, b, steps=5)
         d = torch.logspace(a, b)
         return c, d
 
 class TestVersionedLogspaceOutV8(torch.nn.Module):
-    def __init__(self):
-        super(TestVersionedLogspaceOutV8, self).__init__()
-
     def forward(self, a: Union[int, float, complex], b: Union[int, float, complex], out: torch.Tensor):
         return torch.logspace(a, b, out=out)
 
 class TestVersionedGeluV9(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-
     def forward(self, x):
         return torch._C._nn.gelu(x)
 
 class TestVersionedGeluOutV9(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-
     def forward(self, x):
         out = torch.zeros_like(x)
         return torch._C._nn.gelu(x, out=out)
 
 class TestVersionedRandomV10(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-
     def forward(self, x):
         out = torch.zeros_like(x)
         return out.random_(0, 10)
 
 
 class TestVersionedRandomFuncV10(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-
     def forward(self, x):
         out = torch.zeros_like(x)
         return out.random(0, 10)
 
 
 class TestVersionedRandomOutV10(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-
     def forward(self, x):
         x = torch.zeros_like(x)
         out = torch.zeros_like(x)
diff --git a/test/jit/myexception.py b/test/jit/myexception.py
index 5937bd3..e60d30b 100644
--- a/test/jit/myexception.py
+++ b/test/jit/myexception.py
@@ -4,5 +4,4 @@
 is captured correctly in suce cases.
 """
 class MyKeyError(KeyError):
-    def __init__(self, msg):
-        super(KeyError, self).__init__(msg)
+    pass
diff --git a/test/jit/test_async.py b/test/jit/test_async.py
index f8a1bae..36fdc01 100644
--- a/test/jit/test_async.py
+++ b/test/jit/test_async.py
@@ -87,7 +87,7 @@
             __constants__ = ['const']
 
             def __init__(self):
-                super(Mod, self).__init__()
+                super().__init__()
                 self.const = 42
                 self.param = nn.Parameter(torch.randn(2, 2))
 
@@ -244,15 +244,12 @@
     @_inline_everything
     def test_async_script_trace(self):
         class Traced(nn.Module):
-            def __init__(self):
-                super(Traced, self).__init__()
-
             def forward(self, x):
                 return (torch.neg(x), x)
 
         class Mod(torch.jit.ScriptModule):
             def __init__(self):
-                super(Mod, self).__init__()
+                super().__init__()
                 x = torch.rand(3, 3)
                 self.traced = torch.jit.trace(Traced(), (x), _force_outplace=True)
 
@@ -273,7 +270,7 @@
 
         class TupleCl(nn.Module):
             def __init__(self):
-                super(TupleCl, self).__init__()
+                super().__init__()
                 self.module = Mod()
 
             def forward(self, x):
@@ -424,9 +421,6 @@
             return input + torch.ones(input.size())
 
         class TestListFutureModule(nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, input):
                 input_list = []
                 for i in range(3):
@@ -458,9 +452,6 @@
             return input + torch.ones(input.size())
 
         class DifferentOutputModule(nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, input):
                 fut_res = torch.jit._fork(add_one, (input))
 
diff --git a/test/jit/test_attr.py b/test/jit/test_attr.py
index 55f0638..1fd85be 100644
--- a/test/jit/test_attr.py
+++ b/test/jit/test_attr.py
@@ -16,7 +16,7 @@
 
         class A(torch.nn.Module):
             def __init__(self):
-                super(A, self).__init__()
+                super().__init__()
                 self.init_attr_val = 1.0
 
             def forward(self, x):
diff --git a/test/jit/test_autodiff_subgraph_slicing.py b/test/jit/test_autodiff_subgraph_slicing.py
index f643061..fbdcc19 100644
--- a/test/jit/test_autodiff_subgraph_slicing.py
+++ b/test/jit/test_autodiff_subgraph_slicing.py
@@ -86,7 +86,7 @@
         with enable_profiling_mode_for_profiling_tests():
             class M(torch.nn.Module):
                 def __init__(self, has_bias):
-                    super(M, self).__init__()
+                    super().__init__()
                     self.ll = torch.nn.Linear(10, 10, has_bias)
 
                 def forward(self, x, y):
diff --git a/test/jit/test_backends.py b/test/jit/test_backends.py
index 1a34fca..e114a54 100644
--- a/test/jit/test_backends.py
+++ b/test/jit/test_backends.py
@@ -52,9 +52,6 @@
     A simple Module used to test to_backend lowering machinery.
     """
 
-    def __init__(self):
-        super().__init__()
-
     def forward(self, x, h):
         return self.accum(x, h), self.sub_accum(x, h)
 
@@ -476,9 +473,6 @@
     A simple add Module used to test to_backend lowering machinery.
     """
 
-    def __init__(self):
-        super().__init__()
-
     def forward(self, x, h):
         return x + h
 
@@ -568,16 +562,10 @@
         """
         A module with an operator that is not supported.
         """
-        def __init__(self):
-            super().__init__()
-
         def forward(self, x, h):
             return x * h
             self._loweredmodule.forward()
 
-    def setUp(self):
-        super().setUp()
-
     def test_errors(self):
         scripted_module_n = torch.jit.script(ErrorMessagesWithCompiler.ModuleNotSupported())
         # Test exception is thrown when lowering a module with an unsupported operator
@@ -600,9 +588,6 @@
         """
         A simple subtraction Module to be used in CompModule.
         """
-        def __init__(self):
-            super().__init__()
-
         def forward(self, x, h):
             return x - h
 
@@ -694,9 +679,6 @@
         A simple Module used to test to_backend lowering machinery.
         """
 
-        def __init__(self):
-            super().__init__()
-
         def forward(self, x, h):
             return x + h
 
diff --git a/test/jit/test_builtins.py b/test/jit/test_builtins.py
index aa78a97..0009e4b 100644
--- a/test/jit/test_builtins.py
+++ b/test/jit/test_builtins.py
@@ -28,17 +28,17 @@
     def test_has_attr(self):
         class HasA(torch.nn.Module):
             def __init__(self):
-                super(HasA, self).__init__()
+                super().__init__()
                 self.a = 0
 
         class HasB(torch.nn.Module):
             def __init__(self):
-                super(HasB, self).__init__()
+                super().__init__()
                 self.b = 1
 
         class Mod(torch.nn.Module):
             def __init__(self):
-                super(Mod, self).__init__()
+                super().__init__()
                 self.mods = torch.nn.ModuleList([HasA(), HasB()])
 
             def forward(self):
@@ -59,7 +59,7 @@
     def test_has_attr_invalid_args(self):
         class Mod(torch.nn.Module):
             def __init__(self):
-                super(Mod, self).__init__()
+                super().__init__()
                 self.mod = torch.nn.Linear(1, 1)
 
             def forward(self, name):
@@ -70,9 +70,6 @@
             torch.jit.script(Mod())
 
         class Mod(torch.nn.Module):
-            def __init__(self):
-                super(Mod, self).__init__()
-
             def forward(self, name):
                 # not allowed, `torch.rand` is not a class type
                 return hasattr(torch.rand(2, 3), name)
diff --git a/test/jit/test_class_type.py b/test/jit/test_class_type.py
index 4d6e89b..8082979 100644
--- a/test/jit/test_class_type.py
+++ b/test/jit/test_class_type.py
@@ -650,7 +650,7 @@
         # Test interface/class python assignment
         class TestPyAssign(nn.Module):
             def __init__(self):
-                super(TestPyAssign, self).__init__()
+                super().__init__()
                 self.proxy_mod = Foo()
 
             def forward(self, x):
@@ -665,7 +665,7 @@
 
         class TestPyAssignError(nn.Module):
             def __init__(self, obj):
-                super(TestPyAssignError, self).__init__()
+                super().__init__()
                 self.proxy_mod = obj
 
             def forward(self, x):
@@ -931,7 +931,7 @@
             __constants__ = ["w"]
 
             def __init__(self, w):
-                super(M, self).__init__()
+                super().__init__()
                 self.w = w
 
             def forward(self, x):
@@ -1431,7 +1431,7 @@
 
         class Mod(nn.Module):
             def __init__(self):
-                super(Mod, self).__init__()
+                super().__init__()
                 self.mod1 = ValHolder("1")
                 self.mod2 = ValHolder("2")
 
diff --git a/test/jit/test_complexity.py b/test/jit/test_complexity.py
index d0d24c2..569a330 100644
--- a/test/jit/test_complexity.py
+++ b/test/jit/test_complexity.py
@@ -44,12 +44,12 @@
 
 class TestComplexity(JitTestCase):
     def setUp(self):
-        super(TestComplexity, self).setUp()
+        super().setUp()
         self.grad_enabled = torch.is_grad_enabled()
         torch.set_grad_enabled(False)
 
     def tearDown(self):
-        super(TestComplexity, self).tearDown()
+        super().tearDown()
         torch.set_grad_enabled(self.grad_enabled)
 
     @suppress_warnings
diff --git a/test/jit/test_convert_activation.py b/test/jit/test_convert_activation.py
index 0c06fb6..f414459 100644
--- a/test/jit/test_convert_activation.py
+++ b/test/jit/test_convert_activation.py
@@ -109,7 +109,7 @@
         # at the global scope
         class Test3(nn.Module):
             def __init__(self, x):
-                super(Test3, self).__init__()
+                super().__init__()
                 self.x = x
 
             def forward(self):
diff --git a/test/jit/test_cuda.py b/test/jit/test_cuda.py
index a151756..6937af9 100644
--- a/test/jit/test_cuda.py
+++ b/test/jit/test_cuda.py
@@ -44,13 +44,10 @@
     """
     A suite of tests for the CUDA API in TorchScript.
     """
-    def setUp(self):
-        super(TestCUDA, self).setUp()
-
     def tearDown(self):
         gc.collect()
         torch.cuda.empty_cache()
-        super(TestCUDA, self).tearDown()
+        super().tearDown()
 
     @skipIfRocm
     @unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
diff --git a/test/jit/test_enum.py b/test/jit/test_enum.py
index 3221a35..5198688 100644
--- a/test/jit/test_enum.py
+++ b/test/jit/test_enum.py
@@ -244,7 +244,7 @@
 
         class TestModule(torch.nn.Module):
             def __init__(self, e: Color):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.e = e
 
             def forward(self):
@@ -270,7 +270,7 @@
 
         class TestModule(torch.nn.Module):
             def __init__(self, e: Color):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.e = e
 
             def forward(self):
@@ -306,7 +306,7 @@
 
         class TestModule(torch.nn.Module):
             def __init__(self, e: Color):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.e = e
 
             def forward(self):
diff --git a/test/jit/test_exception.py b/test/jit/test_exception.py
index dce38e3..2cc0001 100644
--- a/test/jit/test_exception.py
+++ b/test/jit/test_exception.py
@@ -10,7 +10,7 @@
     def test_pyop_exception_message(self):
         class Foo(torch.jit.ScriptModule):
             def __init__(self):
-                super(Foo, self).__init__()
+                super().__init__()
                 self.conv = nn.Conv2d(1, 10, kernel_size=5)
 
             @torch.jit.script_method
@@ -156,8 +156,7 @@
 
     def test_custom_python_exception(self):
         class MyValueError(ValueError):
-            def __init__(self, msg):
-                super(MyValueError, self).__init__(msg)
+            pass
 
         @torch.jit.script
         def fn():
diff --git a/test/jit/test_freezing.py b/test/jit/test_freezing.py
index c04811e..966cc30 100644
--- a/test/jit/test_freezing.py
+++ b/test/jit/test_freezing.py
@@ -43,7 +43,7 @@
     def test_freeze_module(self):
         class M(nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.a = 1                      # folded
                 self.b = 1.2                    # folded
                 self.c = "hello"                # folded
@@ -101,7 +101,7 @@
     def test_freeze_module_with_submodule(self):
         class SubModule(nn.Module):
             def __init__(self):
-                super(SubModule, self).__init__()
+                super().__init__()
                 self.a = 11
                 self.b = 2
 
@@ -110,7 +110,7 @@
 
         class SubModule2(nn.Module):
             def __init__(self):
-                super(SubModule2, self).__init__()
+                super().__init__()
                 self.a = 12
                 self.b = 2
 
@@ -120,7 +120,7 @@
 
         class TestModule(nn.Module):
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.sub1 = SubModule()
                 self.sub2 = SubModule2()
                 self.a = 3
@@ -166,7 +166,7 @@
     def test_freeze_module_with_fork(self):
         class SubModule(nn.Module):
             def __init__(self):
-                super(SubModule, self).__init__()
+                super().__init__()
                 self.a = torch.ones(20, 20)
                 self.b = torch.ones(20, 20)
 
@@ -175,7 +175,7 @@
 
         class TestModule(nn.Module):
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.sub = SubModule()
 
             def forward(self, x):
@@ -206,7 +206,7 @@
     def test_freeze_module_with_nested_fork(self):
         class SubModule(nn.Module):
             def __init__(self):
-                super(SubModule, self).__init__()
+                super().__init__()
                 self.a = torch.ones(20, 20)
                 self.b = torch.ones(20, 20)
 
@@ -215,7 +215,7 @@
 
         class SubModule2(nn.Module):
             def __init__(self):
-                super(SubModule2, self).__init__()
+                super().__init__()
                 self.sub = SubModule()
                 self.c = torch.ones(20, 20)
 
@@ -227,7 +227,7 @@
 
         class TestModule(nn.Module):
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.sub = SubModule2()
                 self.d = 1
 
@@ -266,7 +266,7 @@
 
         class TestModule(nn.Module):
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.a = torch.ones(20, 20)
                 self.b = torch.ones(20, 20)
 
@@ -307,7 +307,7 @@
 
         class TestModule(nn.Module):
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.a = torch.ones(20, 20)
                 self.b = torch.ones(20, 20)
 
@@ -347,7 +347,7 @@
     def test_freeze_module_with_sharedclasstype(self):
         class SubModule(nn.Module):
             def __init__(self):
-                super(SubModule, self).__init__()
+                super().__init__()
                 self.a = torch.tensor([1.1])
                 self.b = torch.tensor([2.2])
 
@@ -366,7 +366,7 @@
 
         class SubModule2(nn.Module):
             def __init__(self):
-                super(SubModule2, self).__init__()
+                super().__init__()
                 self.sub = SubModule()
                 self.b = torch.tensor([3.3])
 
@@ -376,7 +376,7 @@
 
         class TestModule(nn.Module):
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.sub1 = SubModule()  # sub1 and sub2.sub shared same class type.
                 self.sub2 = SubModule2()
                 self.a = torch.tensor([4.4])
@@ -439,7 +439,7 @@
     def test_freeze_module_with_nestedaliasing(self):
         class SubModule(nn.Module):
             def __init__(self):
-                super(SubModule, self).__init__()
+                super().__init__()
                 self.a = torch.tensor([1.1])
                 self.b = torch.tensor([2.2])
 
@@ -459,7 +459,7 @@
 
         class SubModule2(nn.Module):
             def __init__(self):
-                super(SubModule2, self).__init__()
+                super().__init__()
                 self.sub = Sub  # aliasing
 
             def forward(self, x):
@@ -467,7 +467,7 @@
 
         class TestModule(nn.Module):
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.sub1 = Sub  # aliasing
                 self.sub2 = SubModule2()
 
@@ -495,7 +495,7 @@
     def test_freeze_module_with_nestedaliasingscalar(self):
         class SubModule(nn.Module):
             def __init__(self):
-                super(SubModule, self).__init__()
+                super().__init__()
                 self.a = 1.1
                 self.b = 2.2
 
@@ -515,7 +515,7 @@
 
         class SubModule2(nn.Module):
             def __init__(self):
-                super(SubModule2, self).__init__()
+                super().__init__()
                 self.sub = Sub  # aliasing
 
             def forward(self, x):
@@ -523,7 +523,7 @@
 
         class TestModule(nn.Module):
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.sub1 = Sub  # aliasing
                 self.sub2 = SubModule2()
 
@@ -551,7 +551,7 @@
     def test_freeze_module_with_preserve_sub_module(self):
         class SubModule(nn.Module):
             def __init__(self):
-                super(SubModule, self).__init__()
+                super().__init__()
                 self.a = torch.tensor([1.1])
                 self.b = 2.2
 
@@ -560,7 +560,7 @@
 
         class TestModule(nn.Module):
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.sub1 = SubModule()  # aliasing
                 self.sub2 = SubModule()
 
@@ -584,7 +584,7 @@
     def test_freeze_module_with_preserve_sub_module_and_mutation(self):
         class SubModule(nn.Module):
             def __init__(self):
-                super(SubModule, self).__init__()
+                super().__init__()
                 self.a = torch.tensor([1.1])
                 self.b = 2.2
 
@@ -594,7 +594,7 @@
 
         class TestModule(nn.Module):
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.sub1 = SubModule()  # aliasing
                 self.sub2 = SubModule()
 
@@ -622,7 +622,7 @@
     def test_freeze_module_with_helperfunction(self):
         class SubModule(nn.Module):
             def __init__(self):
-                super(SubModule, self).__init__()
+                super().__init__()
                 self.a = 11
                 self.b = 2
 
@@ -631,7 +631,7 @@
 
         class TestModule(nn.Module):
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.sub = SubModule()
                 self.a = 3
                 self.b = 4
@@ -655,7 +655,7 @@
     def test_freeze_module_with_inplace_mutable(self):
         class FreezeMe(torch.jit.ScriptModule):
             def __init__(self):
-                super(FreezeMe, self).__init__()
+                super().__init__()
                 self.a = [11, 22]
 
             @torch.jit.script_method
@@ -677,7 +677,7 @@
     def test_freeze_module_with_mutable_list(self):
         class FreezeMe(nn.Module):
             def __init__(self):
-                super(FreezeMe, self).__init__()
+                super().__init__()
                 self.a = [1, 2]
 
             def forward(self, x):
@@ -704,7 +704,7 @@
     def test_freeze_module_with_mutable_dict(self):
         class FreezeMe(nn.Module):
             def __init__(self):
-                super(FreezeMe, self).__init__()
+                super().__init__()
                 self.a = {"layer" : "4"}
 
             def forward(self, x):
@@ -733,7 +733,7 @@
     def test_freeze_module_with_mutable_tensor(self):
         class FreezeMe(nn.Module):
             def __init__(self):
-                super(FreezeMe, self).__init__()
+                super().__init__()
                 self.a = torch.tensor([1., 2., 3.])
 
             def forward(self, x):
@@ -755,7 +755,7 @@
     def test_freeze_module_with_tuple(self):
         class FreezeMe(nn.Module):
             def __init__(self):
-                super(FreezeMe, self).__init__()
+                super().__init__()
                 self.a = (torch.tensor([1, 2, 3, 4, 5, 6]), "hi")
 
             def forward(self, x):
@@ -777,7 +777,7 @@
     def test_freeze_module_with_tensor(self):
         class FreezeMe(nn.Module):
             def __init__(self):
-                super(FreezeMe, self).__init__()
+                super().__init__()
                 self.a = torch.tensor([1, 2, 3, 4, 5, 6])
 
             def forward(self, x):
@@ -799,7 +799,7 @@
     def test_freeze_module_with_list(self):
         class FreezeMe(nn.Module):
             def __init__(self):
-                super(FreezeMe, self).__init__()
+                super().__init__()
                 self.a = [torch.tensor([1, 2, 3, 4, 5, 6])]
 
             def forward(self, x):
@@ -820,7 +820,7 @@
     def test_freeze_module_with_aliased_tensor_attr(self):
         class FreezeMe(nn.Module):
             def __init__(self):
-                super(FreezeMe, self).__init__()
+                super().__init__()
                 self.a = torch.tensor([1, 2, 3, 4, 5, 6])
                 self.b = self.a.view(2, 3)
 
@@ -841,7 +841,7 @@
     def test_freeze_module_with_aliased_tensor_attr2(self):
         class FreezeMe(nn.Module):
             def __init__(self):
-                super(FreezeMe, self).__init__()
+                super().__init__()
                 self.a = torch.tensor([1, 2, 3, 4, 5, 6])
                 self.b = {"layer" : ([self.a.view(2, 3), torch.tensor([10])], 20)}
                 self.c = ([self.a.view(2, 3), torch.tensor([10])], 20)
@@ -862,7 +862,7 @@
     def test_freeze_module_with_aliased_tensor_attr3(self):
         class FreezeMe(nn.Module):
             def __init__(self):
-                super(FreezeMe, self).__init__()
+                super().__init__()
                 self.a = torch.tensor([1, 2, 3, 4, 5, 6])
                 self.b = [self.a, torch.tensor([10])]
 
@@ -885,7 +885,7 @@
     def test_freeze_module_with_aliased_tensor_attr4(self):
         class FreezeMe(nn.Module):
             def __init__(self):
-                super(FreezeMe, self).__init__()
+                super().__init__()
                 self.a = torch.tensor([1, 2, 3, 4, 5, 6])
                 self.b = [self.a, torch.tensor([10])]
 
@@ -907,7 +907,7 @@
 
         class FreezeMe(nn.Module):
             def __init__(self):
-                super(FreezeMe, self).__init__()
+                super().__init__()
                 self.b = [a.view(3, 2), torch.tensor([10])]
                 self.c = (20, a.view(2, 3))
 
@@ -927,7 +927,7 @@
     def test_freeze_module_with_aliased_attr(self):
         class FreezeMe(nn.Module):
             def __init__(self):
-                super(FreezeMe, self).__init__()
+                super().__init__()
                 self.a = [1, 2, 3, 4, 5, 6]
                 self.b = self.a
                 self.c = (self.a, 10)
@@ -954,7 +954,7 @@
     def test_freeze_module_with_aliased_attr2(self):
         class FreezeMe(nn.Module):
             def __init__(self):
-                super(FreezeMe, self).__init__()
+                super().__init__()
                 self.a = [1, 2, 3, 4, 5, 6]
                 self.b = ([11], [10])
 
@@ -978,7 +978,7 @@
     def test_freeze_module_with_aliased_attr3(self):
         class FreezeMe(nn.Module):
             def __init__(self):
-                super(FreezeMe, self).__init__()
+                super().__init__()
                 self.a = [1, 2, 3, 4, 5, 6]
                 self.b = ([11], [10])
 
@@ -1002,7 +1002,7 @@
     def test_freeze_module_return_self(self):
         class FreezeMe(nn.Module):
             def __init__(self):
-                super(FreezeMe, self).__init__()
+                super().__init__()
                 self.a = torch.tensor([1., 2., 3.])
 
             def forward(self, x):
@@ -1023,7 +1023,7 @@
 
         class Mod(nn.Module):
             def __init__(self):
-                super(Mod, self).__init__()
+                super().__init__()
                 self.obj = Obj(2, 3)
 
             def forward(self, i: int):
@@ -1046,7 +1046,7 @@
 
         class FreezeMe(nn.Module):
             def __init__(self):
-                super(FreezeMe, self).__init__()
+                super().__init__()
                 self.conv1 = nn.Conv2d(1, 32, 3, 1)
 
             def forward(self, x):
@@ -1062,7 +1062,7 @@
 
         class FreezeMe(nn.Module):
             def __init__(self):
-                super(FreezeMe, self).__init__()
+                super().__init__()
                 self.lin = nn.Linear(10, 1)
 
             @torch.jit.export
@@ -1081,7 +1081,7 @@
 
         class FreezeMe(nn.Module):
             def __init__(self):
-                super(FreezeMe, self).__init__()
+                super().__init__()
                 self.lin = nn.Linear(10, 1)
 
             @torch.jit.export
@@ -1099,7 +1099,7 @@
     def test_freeze_module_in_training_mode(self):
         class Net(nn.Module):
             def __init__(self):
-                super(Net, self).__init__()
+                super().__init__()
                 self.conv1 = nn.Conv2d(1, 32, 3, 1)
                 self.conv2 = nn.Conv2d(32, 64, 3, 1)
                 self.dropout1 = nn.Dropout2d(0.25)
@@ -1243,7 +1243,7 @@
     def test_freeze_module_with_user_preserved_attr(self):
         class Module(nn.Module):
             def __init__(self):
-                super(Module, self).__init__()
+                super().__init__()
                 self.a = torch.tensor([1.1])
                 self.b = torch.tensor([2.2])
 
@@ -1260,7 +1260,7 @@
     def test_freeze_module_with_user_preserved_method(self):
         class Module(nn.Module):
             def __init__(self):
-                super(Module, self).__init__()
+                super().__init__()
                 self.a = torch.tensor([1.1])
                 self.b = torch.tensor([2.2])
 
@@ -1291,7 +1291,7 @@
     def test_freeze_module_with_user_preserved_method2(self):
         class Module(nn.Module):
             def __init__(self):
-                super(Module, self).__init__()
+                super().__init__()
                 self.a = torch.tensor([1.1])
                 self.b = torch.tensor([2.2])
 
@@ -1313,7 +1313,7 @@
     def test_freeze_module_with_user_preserved_attribute_on_submodule(self):
         class SubModule(nn.Module):
             def __init__(self):
-                super(SubModule, self).__init__()
+                super().__init__()
                 self.a = 1
                 self.b = 2
 
@@ -1322,7 +1322,7 @@
 
         class Module(nn.Module):
             def __init__(self):
-                super(Module, self).__init__()
+                super().__init__()
                 self.sub1 = SubModule()
                 self.sub2 = SubModule()
 
@@ -1347,7 +1347,7 @@
     def test_freeze_module_with_user_preserved_attribute_on_unused_submodule(self):
         class SubModule(nn.Module):
             def __init__(self):
-                super(SubModule, self).__init__()
+                super().__init__()
                 self.a = 1
                 self.b = 2
 
@@ -1360,7 +1360,7 @@
 
         class Module(nn.Module):
             def __init__(self):
-                super(Module, self).__init__()
+                super().__init__()
                 self.sub = SubModule()
 
             def forward(self):
@@ -1377,9 +1377,6 @@
 
     def test_freeze_module_with_user_preserved_method_on_submodule(self):
         class SubModule(nn.Module):
-            def __init__(self):
-                super(SubModule, self).__init__()
-
             def forward(self, x):
                 return self.method_a(x) + self.method_b(x)
 
@@ -1391,7 +1388,7 @@
 
         class Module(nn.Module):
             def __init__(self):
-                super(Module, self).__init__()
+                super().__init__()
                 self.sub = SubModule()
 
             def forward(self, x):
@@ -1409,7 +1406,7 @@
     def test_module_with_shared_type_instances(self):
         class Child(nn.Module):
             def __init__(self):
-                super(Child, self).__init__()
+                super().__init__()
                 self.conv1 = nn.Conv2d(1, 1, 1).to(dtype=torch.float32)
 
             def forward(self, x):
@@ -1418,7 +1415,7 @@
 
         class Parent(nn.Module):
             def __init__(self):
-                super(Parent, self).__init__()
+                super().__init__()
                 self.quant = torch.ao.quantization.QuantStub()
                 self.conv1 = nn.Conv2d(1, 1, 1).to(dtype=torch.float32)
                 self.child = Child()
@@ -1465,7 +1462,7 @@
 
         class Mod(nn.Module):
             def __init__(self):
-                super(Mod, self).__init__()
+                super().__init__()
                 self.mod1 = ValHolder(1)
                 self.mod2 = ValHolder(2)
 
@@ -1536,7 +1533,7 @@
 
         class ImplementsInterface(torch.nn.Module):
             def __init__(self):
-                super(ImplementsInterface, self).__init__()
+                super().__init__()
                 self.sum = torch.zeros((2, 2))
 
             def forward(self, inp: torch.Tensor) -> torch.Tensor:
@@ -1612,7 +1609,7 @@
 
         class InnerImpl(torch.nn.Module):
             def __init__(self):
-                super(InnerImpl, self).__init__()
+                super().__init__()
                 self.x = torch.ones((2, 2))
 
             def forward(self, inp):
@@ -1622,7 +1619,7 @@
             inner_impl: InnerInterface
 
             def __init__(self):
-                super(OuterImpl, self).__init__()
+                super().__init__()
                 self.inner_impl = InnerImpl()
 
             def forward(self, inp):
@@ -1632,7 +1629,7 @@
             outer_impl: OuterInterface
 
             def __init__(self):
-                super(WrapperModule, self).__init__()
+                super().__init__()
                 self.outer_impl = OuterImpl()
 
             def forward(self, inp):
@@ -1662,7 +1659,7 @@
 
         class InnerImpl1(torch.nn.Module):
             def __init__(self):
-                super(InnerImpl1, self).__init__()
+                super().__init__()
                 self.x = torch.ones((2, 2))
 
             def forward(self, inp):
@@ -1671,7 +1668,7 @@
 
         class InnerImpl2(torch.nn.Module):
             def __init__(self):
-                super(InnerImpl2, self).__init__()
+                super().__init__()
                 self.x = torch.ones((2, 2)) * 2
 
             def forward(self, inp):
@@ -1681,7 +1678,7 @@
             inner_impl: InnerInterface
 
             def __init__(self):
-                super(OuterImpl, self).__init__()
+                super().__init__()
                 self.inner_impl = InnerImpl1()
                 self.impl1 = InnerImpl1()
                 self.impl2 = InnerImpl1()
@@ -1699,7 +1696,7 @@
             outer_impl: OuterInterface
 
             def __init__(self):
-                super(WrapperModule, self).__init__()
+                super().__init__()
                 self.outer_impl = OuterImpl()
 
             def forward(self, inp):
@@ -1730,7 +1727,7 @@
             interface_impl: MyInterface
 
             def __init__(self):
-                super(WrapperModule1, self).__init__()
+                super().__init__()
                 self.interface_impl = Impl1()
                 self.impl1 = Impl1()
                 self.impl2 = Impl2()
@@ -1752,7 +1749,7 @@
             interface_impl: MyInterface
 
             def __init__(self):
-                super(WrapperModule2, self).__init__()
+                super().__init__()
                 self.interface_impl = Impl1()
                 self.impl1 = Impl1()
                 self.impl2 = Impl2()
@@ -1795,7 +1792,7 @@
 
         class InnerImpl(torch.nn.Module):
             def __init__(self):
-                super(InnerImpl, self).__init__()
+                super().__init__()
                 self.x = torch.ones((2, 2))
 
             def forward(self, inp):
@@ -1805,7 +1802,7 @@
             impl: InnerInterface
 
             def __init__(self):
-                super(OuterImpl, self).__init__()
+                super().__init__()
                 self.impl = InnerImpl()
                 self.x = torch.ones((2, 2)) * 5
 
@@ -1819,7 +1816,7 @@
             impl: OuterInterface
 
             def __init__(self):
-                super(WrapperModule, self).__init__()
+                super().__init__()
                 self.impl = OuterImpl()
 
             def forward(self, inp):
@@ -1839,7 +1836,7 @@
     def test_freeze_non_interface_module_swap(self):
         class InnerModule(torch.nn.Module):
             def __init__(self, x):
-                super(InnerModule, self).__init__()
+                super().__init__()
                 self.x = x
 
             def forward(self, inp: torch.Tensor) -> torch.Tensor:
@@ -1928,7 +1925,7 @@
             }
 
             def __init__(self):
-                super(MyModule, self).__init__()
+                super().__init__()
                 self.box_coder = BoxCoder(50.)
 
             def forward(self, input):
@@ -1944,9 +1941,6 @@
 
     def test_freeze_module_with_tupleoutput_submodule(self):
         class SubModule(nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 return (x + 1, x + 2)
 
@@ -2015,7 +2009,7 @@
         for use_bias, modules, tracing, track_stats in product(conv_bias, module_pairs, use_tracing, bn_running_stats):
             class ConvBN(torch.nn.Module):
                 def __init__(self, in_channels, out_channels, **kwargs):
-                    super(ConvBN, self).__init__()
+                    super().__init__()
                     self.conv = modules[0](in_channels, out_channels, bias=use_bias, **kwargs)
                     self.bn = modules[1](out_channels, eps=0.001, track_running_stats=track_stats)
 
@@ -2060,7 +2054,7 @@
     def test_conv_bn_folding_not_forward(self):
         class ConvBN(torch.nn.Module):
             def __init__(self, in_channels, out_channels, **kwargs):
-                super(ConvBN, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(in_channels, out_channels, bias=True, **kwargs)
                 self.bn = torch.nn.BatchNorm2d(out_channels, eps=0.001)
                 self.amt = 3.2
@@ -2092,7 +2086,7 @@
 
         class ConvBN(torch.nn.Module):
             def __init__(self, in_channels, out_channels, **kwargs):
-                super(ConvBN, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(in_channels, out_channels, bias=False, dtype=torch.half, **kwargs)
                 self.bn = torch.nn.BatchNorm2d(out_channels, eps=0.001, dtype=torch.float)
 
@@ -2123,7 +2117,7 @@
                 __constants__ = ['use_scalar']
 
                 def __init__(self, in_channels, out_channels, tensor=None, **kwargs):
-                    super(ConvOp, self).__init__()
+                    super().__init__()
                     self.conv = module(in_channels, out_channels, bias=use_bias, **kwargs)
                     self.conv2 = module(in_channels, out_channels, bias=use_bias, **kwargs)
                     self.use_scalar = scalar
@@ -2202,7 +2196,7 @@
         class Conv_Mul_Add_Bn(nn.Module):
 
             def __init__(self, in_channels, out_channels, **kwargs):
-                super(Conv_Mul_Add_Bn, self).__init__()
+                super().__init__()
                 self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
                 self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
                 self.tensor1 = torch.tensor(2.2)
@@ -2231,7 +2225,7 @@
         for modules, tracing, track_stats in product(module_pairs, use_tracing, bn_running_stats):
             class LinearBN(torch.nn.Module):
                 def __init__(self, in_features, out_features):
-                    super(LinearBN, self).__init__()
+                    super().__init__()
                     self.linear = modules[0](in_features, out_features)
                     self.bn = modules[1](out_features, eps=0.001, track_running_stats=track_stats)
 
@@ -2286,7 +2280,7 @@
         for modules, tracing, track_stats in product(module_pairs, use_tracing, bn_running_stats):
             class LinearBN(torch.nn.Module):
                 def __init__(self, in_features, out_features):
-                    super(LinearBN, self).__init__()
+                    super().__init__()
                     self.linear = modules[0](in_features, out_features, bias=False, dtype=torch.half)
                     self.bn = modules[1](out_features, eps=0.001, dtype=torch.float)
 
@@ -2331,7 +2325,7 @@
         for w1_dim, w2_dim in out_dimms:
             class ModMultLinear(nn.Module):
                 def __init__(self, w1_dim, w2_dim):
-                    super(ModMultLinear, self).__init__()
+                    super().__init__()
                     self.w1 = nn.Parameter(torch.rand([w1_dim, 5]))
                     self.b1 = nn.Parameter(torch.rand([w1_dim]))
                     self.w2 = nn.Parameter(torch.rand([w2_dim, 5]))
@@ -2355,7 +2349,7 @@
         """
         class ModMultLinear(nn.Module):
             def __init__(self):
-                super(ModMultLinear, self).__init__()
+                super().__init__()
                 w1_dim = 5
                 w2_dim = 10
                 self.w1 = nn.Parameter(torch.rand([w1_dim, 5]))
@@ -2384,7 +2378,7 @@
         # Freezing requires that the graph be a module
         class ModMultLinear(nn.Module):
             def __init__(self, w1_dim, w2_dim):
-                super(ModMultLinear, self).__init__()
+                super().__init__()
                 self.w1 = nn.Parameter(torch.rand([w1_dim, 5]))
                 self.b1 = nn.Parameter(torch.rand([w1_dim]))
                 self.w2 = nn.Parameter(torch.rand([w2_dim, 5]))
@@ -2404,7 +2398,7 @@
     def test_linear_multiple_blocks(self):
         class ModMultLinear(nn.Module):
             def __init__(self, w1_dim, w2_dim):
-                super(ModMultLinear, self).__init__()
+                super().__init__()
                 self.w1 = nn.Parameter(torch.rand([w1_dim, 5]))
                 self.b1 = nn.Parameter(torch.rand([w1_dim]))
                 self.w2 = nn.Parameter(torch.rand([w2_dim, 5]))
@@ -2472,7 +2466,7 @@
     def test_freeze_remove_dropout(self):
         class Net(nn.Module):
             def __init__(self):
-                super(Net, self).__init__()
+                super().__init__()
                 self.dropout = nn.Dropout(0.5)
 
             def forward(self, x):
@@ -2493,7 +2487,7 @@
     def test_freeze_remove_feature_dropout(self):
         class Net(nn.Module):
             def __init__(self):
-                super(Net, self).__init__()
+                super().__init__()
                 self.dropout = nn.Dropout2d(0.5)
 
             def forward(self, x):
@@ -2554,7 +2548,7 @@
     def test_linear_transpose(self):
         class ModLinear(torch.nn.Module):
             def __init__(self):
-                super(ModLinear, self).__init__()
+                super().__init__()
                 self.bias = torch.nn.Parameter(torch.rand(30))
                 self.weight = torch.nn.Parameter(torch.rand([30, 20]))
 
@@ -2568,7 +2562,7 @@
     def test_linear_non_constant_weight(self):
         class ModLinear(torch.nn.Module):
             def __init__(self):
-                super(ModLinear, self).__init__()
+                super().__init__()
                 self.bias = torch.nn.Parameter(torch.rand(30))
 
             def forward(self, x, weight):
@@ -2704,7 +2698,7 @@
             for use_bias, conv, add_z, tracing in product(conv_bias, conv_ops, add_z, use_tracing):
                 class Net(nn.Module):
                     def __init__(self, in_channels, out_channels, **kwargs):
-                        super(Net, self).__init__()
+                        super().__init__()
                         self.conv = conv(in_channels, out_channels, bias=use_bias, **kwargs)
                         self.relu = nn.ReLU(inplace=True)
                         self.add_z = add_z
@@ -2748,7 +2742,7 @@
         with set_default_dtype(torch.float):
             class Net(nn.Module):
                 def __init__(self, in_channels, out_channels, **kwargs):
-                    super(Net, self).__init__()
+                    super().__init__()
                     self.conv = nn.Conv2d(in_channels, out_channels, bias=None, **kwargs)
                     self.relu = nn.ReLU(inplace=True)
 
@@ -2883,7 +2877,7 @@
         with set_default_dtype(torch.float):
             class Clamp(torch.nn.Module):
                 def __init__(self, min_val, max_val, **kwargs):
-                    super(Clamp, self).__init__()
+                    super().__init__()
                     self.min_val = min_val
                     self.max_val = max_val
 
@@ -2965,9 +2959,6 @@
 
     def test_remove_detach(self):
         class Mod(nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 y = x.detach()
                 return y * y
@@ -2980,9 +2971,6 @@
 
     def test_remove_detach_not_applied(self):
         class Mod(nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 y = x.detach()
                 return x is y
diff --git a/test/jit/test_graph_rewrite_passes.py b/test/jit/test_graph_rewrite_passes.py
index 95bb564..3ecdba6 100644
--- a/test/jit/test_graph_rewrite_passes.py
+++ b/test/jit/test_graph_rewrite_passes.py
@@ -10,7 +10,7 @@
     def test_fuse_linear(self):
         class FunctionalLinear(torch.nn.Module):
             def __init__(self, weight, bias):
-                super(FunctionalLinear, self).__init__()
+                super().__init__()
                 self.weight = weight
                 self.bias = bias
 
@@ -44,7 +44,7 @@
         # check matmuls are not fused
         class Matmul(torch.nn.Module):
             def __init__(self, weight):
-                super(Matmul, self).__init__()
+                super().__init__()
                 self.weight = weight
 
             def forward(self, x):
diff --git a/test/jit/test_ignore_context_manager.py b/test/jit/test_ignore_context_manager.py
index c58c6c5..4d0660e 100644
--- a/test/jit/test_ignore_context_manager.py
+++ b/test/jit/test_ignore_context_manager.py
@@ -21,9 +21,6 @@
     @unittest.skipUnless(_IS_ASTUNPARSE_INSTALLED, "astunparse package is required")
     def test_with_ignore_context_manager_with_inp_out(self):
         class A(torch.nn.Module):
-            def __init__(self):
-                super(A, self).__init__()
-
             def forward(self):
                 a: int = 4
                 b: int = 5
@@ -40,9 +37,6 @@
         self.assertEqual(s(), 20)
 
         class B(torch.nn.Module):
-            def __init__(self):
-                super(B, self).__init__()
-
             def forward(self):
                 a: int = 4
                 b: int = 5
@@ -57,9 +51,6 @@
         self.assertEqual(s(), model())
 
         class C(torch.nn.Module):
-            def __init__(self):
-                super(C, self).__init__()
-
             def forward(self):
                 a: int = 4
                 b: int = 5
@@ -75,9 +66,6 @@
     @unittest.skipUnless(_IS_ASTUNPARSE_INSTALLED, "astunparse package is required")
     def test_with_ignore_context_manager_with_just_inp(self):
         class A(torch.nn.Module):
-            def __init__(self):
-                super(A, self).__init__()
-
             def forward(self):
                 a: int = 4
                 b: int = 5
@@ -92,9 +80,6 @@
     @unittest.skipUnless(_IS_ASTUNPARSE_INSTALLED, "astunparse package is required")
     def test_with_ignore_context_manager_with_just_out(self):
         class A(torch.nn.Module):
-            def __init__(self):
-                super(A, self).__init__()
-
             def forward(self):
                 with torch.jit._IgnoreContextManager(c="out:List[int]"):
                     c = [2 for i in range(7) if i > 2]
diff --git a/test/jit/test_list_dict.py b/test/jit/test_list_dict.py
index 3fdce7e..f30d7f3 100644
--- a/test/jit/test_list_dict.py
+++ b/test/jit/test_list_dict.py
@@ -1976,7 +1976,7 @@
 
         class MyModule(types.ModuleType):
             def __init__(self):
-                super(MyModule, self).__init__('MyModule')
+                super().__init__('MyModule')
 
             def __getattr__(self, attr):
                 return TheType
diff --git a/test/jit/test_misc.py b/test/jit/test_misc.py
index d4bca3d..16e4d56 100644
--- a/test/jit/test_misc.py
+++ b/test/jit/test_misc.py
@@ -210,7 +210,7 @@
             sub : OneTwoModule
 
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.sub = BarMod()
 
             def forward(self, x: torch.Tensor) -> torch.Tensor:
diff --git a/test/jit/test_models.py b/test/jit/test_models.py
index 2f67e27..bc4b9d6 100644
--- a/test/jit/test_models.py
+++ b/test/jit/test_models.py
@@ -31,7 +31,7 @@
 
 class MnistNet(nn.Module):
     def __init__(self):
-        super(MnistNet, self).__init__()
+        super().__init__()
         self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
         self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
         self.conv2_drop = nn.Dropout2d()
@@ -52,7 +52,7 @@
     def _test_dcgan_models(self, device, check_export_import=True):
         class DCGANGenerator(nn.Module):
             def __init__(self, nz, ngf, nc):
-                super(DCGANGenerator, self).__init__()
+                super().__init__()
                 self.main = nn.Sequential(
                     # input is Z, going into a convolution
                     nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
@@ -81,7 +81,7 @@
 
         class DCGANDiscriminator(nn.Module):
             def __init__(self, nc, ndf):
-                super(DCGANDiscriminator, self).__init__()
+                super().__init__()
                 self.main = nn.Sequential(
                     # input is (nc) x 64 x 64
                     nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
@@ -126,7 +126,7 @@
     def _test_neural_style(self, device, check_export_import=True):
         class TransformerNet(torch.nn.Module):
             def __init__(self):
-                super(TransformerNet, self).__init__()
+                super().__init__()
                 # Initial convolution layers
                 self.conv1 = ConvLayer(3, 32, kernel_size=9, stride=1)
                 self.in1 = torch.nn.InstanceNorm2d(32, affine=True)
@@ -165,7 +165,7 @@
 
         class ConvLayer(torch.nn.Module):
             def __init__(self, in_channels, out_channels, kernel_size, stride):
-                super(ConvLayer, self).__init__()
+                super().__init__()
                 reflection_padding = kernel_size // 2
                 self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding)
                 self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride)
@@ -182,7 +182,7 @@
             """
 
             def __init__(self, channels):
-                super(ResidualBlock, self).__init__()
+                super().__init__()
                 self.conv1 = ConvLayer(channels, channels, kernel_size=3, stride=1)
                 self.in1 = torch.nn.InstanceNorm2d(channels, affine=True)
                 self.conv2 = ConvLayer(channels, channels, kernel_size=3, stride=1)
@@ -204,7 +204,7 @@
             """
 
             def __init__(self, in_channels, out_channels, kernel_size, stride, upsample=None):
-                super(UpsampleConvLayer, self).__init__()
+                super().__init__()
                 self.upsample = upsample
                 if upsample:
                     self.upsample_layer = torch.nn.Upsample(mode='nearest', scale_factor=upsample)
@@ -276,7 +276,7 @@
     def _test_reinforcement_learning(self, device, test_export_import=True):
         class Policy(nn.Module):
             def __init__(self):
-                super(Policy, self).__init__()
+                super().__init__()
                 self.affine1 = nn.Linear(4, 128)
                 self.affine2 = nn.Linear(128, 2)
 
@@ -303,9 +303,9 @@
 
             def forward(self, input):
                 if len(input.size()) <= 2:
-                    return super(Bottle, self).forward(input)
+                    return super().forward(input)
                 size = input.size()[:2]
-                out = super(Bottle, self).forward(input.view(size[0] * size[1], -1))
+                out = super().forward(input.view(size[0] * size[1], -1))
                 return out.view(size[0], size[1], -1)
 
         class Linear(Bottle, nn.Linear):
@@ -314,7 +314,7 @@
         class Encoder(nn.Module):
 
             def __init__(self, config):
-                super(Encoder, self).__init__()
+                super().__init__()
                 self.config = config
                 input_size = config.d_proj if config.projection else config.d_embed
                 dropout = 0 if config.n_layers == 1 else config.dp_ratio
@@ -332,7 +332,7 @@
         class SNLIClassifier(nn.Module):
 
             def __init__(self, config):
-                super(SNLIClassifier, self).__init__()
+                super().__init__()
                 self.config = config
                 self.embed = nn.Embedding(config.n_embed, config.d_embed)
                 self.projection = Linear(config.d_embed, config.d_proj)
@@ -416,7 +416,7 @@
         class Net(nn.Module):
 
             def __init__(self, upscale_factor):
-                super(Net, self).__init__()
+                super().__init__()
 
                 self.relu = nn.ReLU()
                 self.conv1 = nn.Conv2d(1, 64, (5, 5), (1, 1), (2, 2))
@@ -449,7 +449,7 @@
     def test_time_sequence_prediction(self):
         class Sequence(torch.jit.ScriptModule):
             def __init__(self):
-                super(Sequence, self).__init__()
+                super().__init__()
                 self.lstm1 = nn.LSTMCell(1, 51)
                 self.lstm2 = nn.LSTMCell(51, 51)
                 self.linear = nn.Linear(51, 1)
@@ -484,7 +484,7 @@
 
         class Traced(nn.Module):
             def __init__(self):
-                super(Traced, self).__init__()
+                super().__init__()
                 self.seq = Sequence()
 
             def forward(self, input):
@@ -500,7 +500,7 @@
     def _test_vae(self, device, check_export_import=True, quantized=False):
         class VAE(nn.Module):
             def __init__(self):
-                super(VAE, self).__init__()
+                super().__init__()
 
                 self.fc1 = nn.Linear(784, 400)
                 self.fc21 = nn.Linear(400, 20)
@@ -594,7 +594,7 @@
             __constants__ = ['downsample']
 
             def __init__(self, inplanes, planes, stride=1, downsample=None):
-                super(BasicBlock, self).__init__()
+                super().__init__()
                 self.conv1 = conv3x3(inplanes, planes, stride)
                 self.bn1 = nn.BatchNorm2d(planes)
                 self.relu = nn.ReLU(inplace=True)
@@ -626,7 +626,7 @@
             __constants__ = ['layer1', 'layer2', 'layer3', 'layer4']
 
             def __init__(self, block, layers, num_classes=1000):
-                super(ResNet, self).__init__()
+                super().__init__()
                 self.inplanes = 64
                 self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                                        bias=False)
diff --git a/test/jit/test_module_containers.py b/test/jit/test_module_containers.py
index f253c24..31b6030c9 100644
--- a/test/jit/test_module_containers.py
+++ b/test/jit/test_module_containers.py
@@ -21,22 +21,16 @@
 class TestModuleContainers(JitTestCase):
     def test_sequential_intermediary_types(self):
         class A(torch.nn.Module):
-            def __init__(self):
-                super(A, self).__init__()
-
             def forward(self, x):
                 return x + 3
 
         class B(torch.nn.Module):
-            def __init__(self):
-                super(B, self).__init__()
-
             def forward(self, x):
                 return {"1": x}
 
         class C(torch.nn.Module):
             def __init__(self):
-                super(C, self).__init__()
+                super().__init__()
                 self.foo = torch.nn.Sequential(A(), B())
 
             def forward(self, x):
@@ -59,7 +53,7 @@
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 modules = OrderedDict([
                     ('one', Inner()),
                     ('two', Inner2()),
@@ -90,9 +84,6 @@
                 return x, names
 
         class M2(M):
-            def __init__(self):
-                super(M2, self).__init__()
-
             def forward(self, x, skip_name):
                 # type: (Tensor, str)
                 names = torch.jit.annotate(List[str], [])
@@ -137,8 +128,7 @@
 
         class CustomSequential(nn.Sequential):
             def __init__(self):
-                super(CustomSequential, self).__init__(
-                    nn.ReLU(), Inner())
+                super().__init__(nn.ReLU(), Inner())
 
             def forward(self, x):
                 x = x + 3
@@ -150,8 +140,7 @@
 
         class CustomModuleList(nn.ModuleList):
             def __init__(self):
-                super(CustomModuleList, self).__init__(
-                    [nn.ReLU(), Inner()])
+                super().__init__([nn.ReLU(), Inner()])
 
             def forward(self, x):
                 x = x + 3
@@ -163,7 +152,7 @@
 
         class CustomModuleDict(nn.ModuleDict):
             def __init__(self):
-                super(CustomModuleDict, self).__init__(
+                super().__init__(
                     OrderedDict([
                         ('one', Inner()),
                         ('two', nn.ReLU()),
@@ -183,7 +172,7 @@
     def test_script_module_list_sequential(self):
         class M(torch.jit.ScriptModule):
             def __init__(self, mod_list):
-                super(M, self).__init__()
+                super().__init__()
                 self.mods = mod_list
 
             @torch.jit.script_method
@@ -199,7 +188,7 @@
     def test_script_modulelist_index(self):
         class Sub(torch.nn.Module):
             def __init__(self, i):
-                super(Sub, self).__init__()
+                super().__init__()
                 self.i = i
 
             def forward(self, thing):
@@ -207,7 +196,7 @@
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.mods = nn.ModuleList([Sub(i) for i in range(10)])
 
             def forward(self, v):
@@ -221,7 +210,7 @@
 
         class MForward(torch.nn.Module):
             def __init__(self):
-                super(MForward, self).__init__()
+                super().__init__()
                 self.mods = nn.ModuleList([Sub(i) for i in range(10)])
 
             def forward(self, v):
@@ -233,9 +222,6 @@
         self.checkModule(MForward(), (torch.tensor(1),))
 
         class M2(M):
-            def __init__(self):
-                super(M2, self).__init__()
-
             def forward(self, v):
                 return self.mods[-11].forward(v)
 
@@ -243,9 +229,6 @@
             torch.jit.script(M2())
 
         class M3(M):
-            def __init__(self):
-                super(M3, self).__init__()
-
             def forward(self, v):
                 i = 3
                 return self.mods[i].forward(v)
@@ -255,8 +238,7 @@
 
     def test_module_interface_special_methods(self):
         class CustomModuleInterface(torch.nn.Module):
-            def __init__(self):
-                super(CustomModuleInterface, self).__init__()
+            pass
 
         class CustomModuleList(CustomModuleInterface, torch.nn.ModuleList):
             def __init__(self, modules=None):
@@ -275,7 +257,7 @@
 
         class MyModule(torch.nn.Module):
             def __init__(self):
-                super(MyModule, self).__init__()
+                super().__init__()
                 # work around aliasing issue for 'is' operator by scripting ReLU up front
                 self.submod = torch.jit.script(torch.nn.ReLU())
                 self.modulelist = CustomModuleList([self.submod])
@@ -321,8 +303,7 @@
 
     def test_special_method_with_override(self):
         class CustomModuleInterface(torch.nn.Module):
-            def __init__(self):
-                super(CustomModuleInterface, self).__init__()
+            pass
 
         class CustomModuleList(CustomModuleInterface, torch.nn.ModuleList):
             def __init__(self, modules=None):
@@ -337,7 +318,7 @@
 
         class MyModule(torch.nn.Module):
             def __init__(self):
-                super(MyModule, self).__init__()
+                super().__init__()
                 # work around aliasing issue for 'is' operator by scripting ReLU up front
                 self.submod = torch.jit.script(torch.nn.ReLU())
                 self.modulelist = CustomModuleList([self.submod])
@@ -353,7 +334,7 @@
     def test_moduledict_getitem(self):
         class MyModule(torch.nn.Module):
             def __init__(self):
-                super(MyModule, self).__init__()
+                super().__init__()
                 self.relu = torch.jit.script(torch.nn.ReLU())
                 self.tanh = torch.jit.script(torch.nn.Tanh())
                 self.moduledict = torch.nn.ModuleDict({"relu": self.relu,
@@ -370,7 +351,7 @@
     def test_moduledict_keyerror(self):
         class BadModule(torch.nn.Module):
             def __init__(self):
-                super(BadModule, self).__init__()
+                super().__init__()
                 self.moduledict = torch.nn.ModuleDict({"foo": None,
                                                        "bar": None})
 
@@ -383,7 +364,7 @@
 
         class AnotherBadModule(torch.nn.Module):
             def __init__(self):
-                super(AnotherBadModule, self).__init__()
+                super().__init__()
                 self.moduledict = torch.nn.ModuleDict({"foo": None,
                                                        "bar": None})
 
@@ -416,8 +397,7 @@
 
     def test_empty_dict_override_contains(self):
         class CustomModuleInterface(torch.nn.Module):
-            def __init__(self):
-                super(CustomModuleInterface, self).__init__()
+            pass
 
         class CustomModuleDict(CustomModuleInterface, torch.nn.ModuleDict):
             def __init__(self, modules=None):
@@ -426,7 +406,7 @@
 
         class MyModule(torch.nn.Module):
             def __init__(self):
-                super(MyModule, self).__init__()
+                super().__init__()
                 # work around aliasing issue for 'is' operator by scripting ReLU up front
                 self.submod = torch.jit.script(torch.nn.ReLU())
                 self.moduledict = CustomModuleDict()
diff --git a/test/jit/test_module_interface.py b/test/jit/test_module_interface.py
index fdfe262..f9e9aea 100644
--- a/test/jit/test_module_interface.py
+++ b/test/jit/test_module_interface.py
@@ -18,9 +18,6 @@
                        "instead.")
 
 class OrigModule(nn.Module):
-    def __init__(self):
-        super(OrigModule, self).__init__()
-
     def one(self, inp1: Tensor, inp2: Tensor) -> Tensor:
         return inp1 + inp2 + 1
 
@@ -31,9 +28,6 @@
         return input + self.one(input, input) + 1
 
 class NewModule(nn.Module):
-    def __init__(self):
-        super(NewModule, self).__init__()
-
     def one(self, inp1: Tensor, inp2: Tensor) -> Tensor:
         return inp1 * inp2 + 1
 
@@ -51,7 +45,7 @@
             proxy_mod : ModuleInterface
 
             def __init__(self):
-                super(TestNotModuleInterfaceCall, self).__init__()
+                super().__init__()
                 self.proxy_mod = OrigModule()
 
             def forward(self, input: Tensor) -> Tensor:
@@ -144,7 +138,7 @@
             proxy_mod : TestInterface
 
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.proxy_mod = OrigModule()
 
             def forward(self, input):
@@ -260,7 +254,7 @@
             proxy_mod : ModuleInterface
 
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.proxy_mod = OrigModule()
 
             def forward(self, input: Tensor) -> Tensor:
@@ -288,9 +282,6 @@
                 pass
 
         class NewModuleWrong(nn.Module):
-            def __init__(self):
-                super(NewModuleWrong, self).__init__()
-
             def forward(self, input: int) -> int:
                 return input + 1
 
@@ -298,7 +289,7 @@
             proxy_mod : ModuleInterface
 
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.proxy_mod = OrigModule()
 
             def forward(self, input: Tensor) -> Tensor:
@@ -322,16 +313,13 @@
             proxy_mod : ModuleInterface
 
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.proxy_mod = OrigModule()
 
             def forward(self, input: Tensor) -> Tensor:
                 return self.proxy_mod.forward(input)
 
         class NewModuleMethodNotLazyCompile(nn.Module):
-            def __init__(self):
-                super(NewModuleMethodNotLazyCompile, self).__init__()
-
             def one(self, inp1: Tensor, inp2: Tensor) -> Tensor:
                 return inp1 * inp2 + 1
 
@@ -345,9 +333,6 @@
             scripted_mod.proxy_mod = torch.jit.script(NewModuleMethodNotLazyCompile())
 
         class NewModuleMethodManualExport(nn.Module):
-            def __init__(self):
-                super(NewModuleMethodManualExport, self).__init__()
-
             @torch.jit.export
             def one(self, inp1: Tensor, inp2: Tensor) -> Tensor:
                 return inp1 * inp2 + 1
@@ -363,7 +348,7 @@
         # test module swapping with no module interface
         class TestNoModuleInterface(nn.Module):
             def __init__(self):
-                super(TestNoModuleInterface, self).__init__()
+                super().__init__()
                 self.proxy_mod = OrigModule()
 
             def forward(self, input: Tensor) -> Tensor:
@@ -388,9 +373,6 @@
                 pass
 
         class OrigScriptModule(torch.jit.ScriptModule):
-            def __init__(self):
-                super(OrigScriptModule, self).__init__()
-
             @torch.jit.script_method
             def one(self, inp1: Tensor, inp2: Tensor) -> Tensor:
                 return inp1 + inp2 + 1
@@ -400,9 +382,6 @@
                 return input + self.one(input, input) + 1
 
         class NewScriptModule(torch.jit.ScriptModule):
-            def __init__(self):
-                super(NewScriptModule, self).__init__()
-
             @torch.jit.script_method
             def one(self, inp1: Tensor, inp2: Tensor) -> Tensor:
                 return inp1 * inp2 + 1
@@ -415,7 +394,7 @@
             proxy_mod : ModuleInterface
 
             def __init__(self):
-                super(TestNNModuleWithScriptModule, self).__init__()
+                super().__init__()
                 self.proxy_mod = OrigScriptModule()
 
             def forward(self, input: Tensor) -> Tensor:
@@ -433,7 +412,7 @@
     def test_freeze_module_with_interface(self):
         class SubModule(torch.nn.Module):
             def __init__(self):
-                super(SubModule, self).__init__()
+                super().__init__()
                 self.b = 20
 
             def forward(self, x):
@@ -441,7 +420,7 @@
 
         class OrigMod(torch.nn.Module):
             def __init__(self):
-                super(OrigMod, self).__init__()
+                super().__init__()
                 self.a = 0
 
             def forward(self, x):
@@ -456,7 +435,7 @@
             proxy_mod : ModInterface
 
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.proxy_mod = OrigMod()
                 self.sub = SubModule()  # folded
 
@@ -476,7 +455,7 @@
     def test_freeze_module_with_setattr_in_interface(self):
         class SubModule(torch.nn.Module):
             def __init__(self):
-                super(SubModule, self).__init__()
+                super().__init__()
                 self.b = 20
 
             def forward(self, x):
@@ -489,7 +468,7 @@
 
         class OrigMod(torch.nn.Module):
             def __init__(self):
-                super(OrigMod, self).__init__()
+                super().__init__()
                 self.a = 0
 
             def forward(self, x):
@@ -504,7 +483,7 @@
             proxy_mod : ModInterface
 
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.proxy_mod = OrigMod()
                 self.sub = SubModule()
 
@@ -519,7 +498,7 @@
     def test_freeze_module_with_inplace_mutation_in_interface(self):
         class SubModule(torch.nn.Module):
             def __init__(self):
-                super(SubModule, self).__init__()
+                super().__init__()
                 self.b = torch.tensor([1.5])
 
             def forward(self, x):
@@ -532,7 +511,7 @@
 
         class OrigMod(torch.nn.Module):
             def __init__(self):
-                super(OrigMod, self).__init__()
+                super().__init__()
                 self.a = torch.tensor([0.5])
 
             def forward(self, x):
@@ -547,7 +526,7 @@
             proxy_mod : ModInterface
 
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.proxy_mod = OrigMod()
                 self.sub = SubModule()
 
@@ -565,7 +544,7 @@
     def test_freeze_module_with_mutated_interface(self):
         class SubModule(torch.nn.Module):
             def __init__(self):
-                super(SubModule, self).__init__()
+                super().__init__()
                 self.b = torch.tensor([1.5])
 
             def forward(self, x):
@@ -577,7 +556,7 @@
 
         class OrigMod(torch.nn.Module):
             def __init__(self):
-                super(OrigMod, self).__init__()
+                super().__init__()
                 self.a = torch.tensor([0.5])
 
             def forward(self, x):
@@ -592,7 +571,7 @@
             proxy_mod : ModInterface
 
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.proxy_mod = OrigMod()
                 self.sub = SubModule()
 
@@ -610,7 +589,7 @@
     def test_freeze_module_with_interface_and_fork(self):
         class SubModule(torch.nn.Module):
             def __init__(self):
-                super(SubModule, self).__init__()
+                super().__init__()
                 self.b = torch.tensor([1.5])
 
             def forward(self, x):
@@ -619,7 +598,7 @@
 
         class OrigMod(torch.nn.Module):
             def __init__(self):
-                super(OrigMod, self).__init__()
+                super().__init__()
                 self.a = torch.tensor([0.5])
 
             def forward(self, x):
@@ -634,7 +613,7 @@
             proxy_mod : ModInterface
 
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.proxy_mod = OrigMod()
                 self.sub = SubModule()
 
@@ -645,7 +624,7 @@
 
         class MainModule(torch.nn.Module):
             def __init__(self):
-                super(MainModule, self).__init__()
+                super().__init__()
                 self.test = TestModule()
 
             def forward(self, x):
@@ -668,7 +647,7 @@
             proxy_mod : ModuleInterface
 
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.proxy_mod = OrigModule()
 
             def forward(self, input):
diff --git a/test/jit/test_optimize_for_mobile_preserve_debug_info.py b/test/jit/test_optimize_for_mobile_preserve_debug_info.py
index a6527a3..78d3fae 100644
--- a/test/jit/test_optimize_for_mobile_preserve_debug_info.py
+++ b/test/jit/test_optimize_for_mobile_preserve_debug_info.py
@@ -40,7 +40,7 @@
     def test_replace_conv1d_with_conv2d(self):
         class TestConv1d(torch.nn.Module):
             def __init__(self, weight, bias):
-                super(TestConv1d, self).__init__()
+                super().__init__()
                 self.weight = weight
                 self.bias = bias
 
@@ -167,7 +167,7 @@
                 conv2d_weight,
                 conv2d_bias,
             ):
-                super(TestFuseActivationLinearConv2d, self).__init__()
+                super().__init__()
                 self.linear_weight = linear_weight
                 self.linear_bias = linear_bias
                 self.conv2d_weight = conv2d_weight
diff --git a/test/jit/test_pdt.py b/test/jit/test_pdt.py
index baab4c8..dd8c006 100644
--- a/test/jit/test_pdt.py
+++ b/test/jit/test_pdt.py
@@ -28,9 +28,6 @@
     """
     def test_nn_module(self):
         class TestPDTModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x) -> Any:
                 if isinstance(x, int):
                     return x + 1
@@ -49,9 +46,6 @@
 
     def test_nested_nn_module_class(self):
         class NestedPDTInner(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 if isinstance(x, int):
                     return x * 10
@@ -76,9 +70,6 @@
 
     def test_nested_nn_module_class_with_args(self):
         class NestedModulePDTInner(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x, y):
                 if isinstance(x, int):
                     return x * 10 + y
@@ -105,9 +96,6 @@
 
     def test_nested_function_in_forward(self):
         class NestedFunctionInForward(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 return self.fun(x) + 10
 
@@ -127,9 +115,6 @@
 
     def test_nn_module_with_export_function(self):
         class TestModelWithExport(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             @torch.jit.export
             def fn(self, x, y) -> Any:
                 assert not (isinstance(x, bool) and isinstance(y, bool))
diff --git a/test/jit/test_peephole.py b/test/jit/test_peephole.py
index 12f7a1f..e79fbf6 100644
--- a/test/jit/test_peephole.py
+++ b/test/jit/test_peephole.py
@@ -194,7 +194,7 @@
         for mod in modules:
             class ConvDim(torch.nn.Module):
                 def __init__(self):
-                    super(ConvDim, self).__init__()
+                    super().__init__()
                     self.conv = mod(3, 32, kernel_size=3, stride=2, bias=False)
 
                 def forward(self, x):
@@ -208,7 +208,7 @@
 
             class ConvDimMutate(torch.nn.Module):
                 def __init__(self):
-                    super(ConvDimMutate, self).__init__()
+                    super().__init__()
                     self.conv = mod(3, 32, kernel_size=3, stride=2, bias=False)
 
                 def forward(self, x):
diff --git a/test/jit/test_recursive_script.py b/test/jit/test_recursive_script.py
index 8d74250..fe2a202 100644
--- a/test/jit/test_recursive_script.py
+++ b/test/jit/test_recursive_script.py
@@ -27,7 +27,7 @@
     def test_inferred_nonetype(self):
         class M(nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.x = None
 
             def forward(self):
@@ -47,7 +47,7 @@
 
         class M(torch.nn.Module):
             def __init__(self, fn):
-                super(M, self).__init__()
+                super().__init__()
                 self.fn = fn
 
             def forward(self, x):
@@ -62,7 +62,7 @@
     def test_python_function_attribute(self):
         class M(torch.nn.Module):
             def __init__(self, fn):
-                super(M, self).__init__()
+                super().__init__()
                 self.fn = fn
 
             def forward(self, x):
@@ -78,7 +78,7 @@
 
         class M(torch.nn.Module):
             def __init__(self, fn):
-                super(M, self).__init__()
+                super().__init__()
                 self.fn = fn
 
             def forward(self, x):
@@ -128,7 +128,7 @@
     def test_module_name(self):
         class MyModule(torch.nn.Module):
             def __init__(self):
-                super(MyModule, self).__init__()
+                super().__init__()
                 self.x = 2
 
             def forward(self, t):
@@ -206,9 +206,6 @@
 
 
         class TestModule(torch.nn.Module):
-            def __init__(self):
-                super(TestModule, self).__init__()
-
             def forward(self, x):
                 return MyScriptClass()
 
@@ -233,7 +230,7 @@
 
         class MyModule(nn.Module):
             def __init__(self):
-                super(MyModule, self).__init__()
+                super().__init__()
                 self.conv = nn.Conv2d(10, 10, 3)
                 self.lin = nn.Linear(10, 10)
                 self.sub = Submodule()
@@ -270,7 +267,7 @@
 
         class MyModule(nn.Module):
             def __init__(self):
-                super(MyModule, self).__init__()
+                super().__init__()
                 self.conv = nn.Conv2d(10, 10, 3)
                 self.lin = nn.Linear(10, 10)
 
@@ -299,9 +296,6 @@
 
 
         class N(torch.nn.Module):
-            def __init__(self):
-                super(N, self).__init__()
-
             def forward(self, x):
                 b = B(x)
                 return b.helper(x)
@@ -342,15 +336,12 @@
             return c(x)
 
         class Submodule(torch.nn.Module):
-            def __init__(self):
-                super(Submodule, self).__init__()
-
             def forward(self, x):
                 return b(x)
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.submodule = Submodule()
 
             def some_method(self, y):
@@ -421,7 +412,7 @@
             __constants__ = ['x']
 
             def __init__(self, x):
-                super(Other, self).__init__()
+                super().__init__()
                 self.x = x
                 self.param = torch.nn.Parameter(torch.ones(2, 2))
 
@@ -436,7 +427,7 @@
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.other = Other(200)
 
             def forward(self, t):
@@ -449,7 +440,7 @@
             __constants__ = ['x']
 
             def __init__(self, x):
-                super(Other, self).__init__()
+                super().__init__()
                 self.x = x
                 self.param = torch.nn.Parameter(torch.ones(2, 2))
 
@@ -463,7 +454,7 @@
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.other = Other(200)
 
             def forward(self, t):
@@ -478,7 +469,7 @@
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.sequential = nn.Sequential(
                     Inner(),
                     Inner(),
@@ -513,7 +504,7 @@
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 shared = SeluButReluWhenScripted()
                 self.sequential = nn.Sequential(
                     SeluButReluWhenScripted(),
@@ -603,9 +594,6 @@
             # my_empty_dict : Dict[str, int]
             # my_none : Optional[int]
 
-            def __init__(self):
-                super(M, self).__init__()
-
             def forward(self, x):
                 return (
                     self.my_dict,
@@ -653,7 +641,7 @@
     def test_function_attribute_in_submodule(self):
         class N(nn.Module):
             def __init__(self, norm):
-                super(N, self).__init__()
+                super().__init__()
                 self.activation = torch.nn.functional.relu
                 self.norm = norm
 
@@ -664,7 +652,7 @@
 
         class M(nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 encoder_norm = nn.ReLU()
                 self.encoder = N(encoder_norm)
 
@@ -681,7 +669,7 @@
 
         class Model(nn.Module):
             def __init__(self, dummies):
-                super(Model, self).__init__()
+                super().__init__()
                 self._dummies = dummies
 
             def forward(self, x):
@@ -708,7 +696,7 @@
 
         class ContainsLoaded(torch.nn.Module):
             def __init__(self):
-                super(ContainsLoaded, self).__init__()
+                super().__init__()
                 self.encoder = dummy
 
             def forward(self, input):
@@ -719,7 +707,7 @@
     def test_optional_module(self):
         class Dummy(nn.Module):
             def __init__(self):
-                super(Dummy, self).__init__()
+                super().__init__()
                 self.foo = nn.Linear(2, 2)
 
             def forward(self, x):
diff --git a/test/jit/test_remove_mutation.py b/test/jit/test_remove_mutation.py
index 4c393a7..2f7559f 100644
--- a/test/jit/test_remove_mutation.py
+++ b/test/jit/test_remove_mutation.py
@@ -268,7 +268,7 @@
         for op in ["cat", "stack", "vstack", "hstack", "dstack"]:
             class OpMod(torch.nn.Module):
                 def __init__(self, op):
-                    super(OpMod, self).__init__()
+                    super().__init__()
                     self.op = torch_op
 
                 def forward(self):
diff --git a/test/jit/test_save_load.py b/test/jit/test_save_load.py
index 6f32bc9..a21c3dc 100644
--- a/test/jit/test_save_load.py
+++ b/test/jit/test_save_load.py
@@ -35,7 +35,7 @@
 
         class Foo(torch.nn.Module):
             def __init__(self):
-                super(Foo, self).__init__()
+                super().__init__()
                 self.foo = torch.nn.Linear(2, 2)
                 self.bar = torch.nn.Linear(2, 2)
 
@@ -53,7 +53,7 @@
 
         class Foo(torch.nn.Module):
             def __init__(self):
-                super(Foo, self).__init__()
+                super().__init__()
                 self.foo = torch.nn.Linear(2, 2)
 
             def forward(self, x):
@@ -457,8 +457,7 @@
         """
 
         class Submodule(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
+            pass
 
         class TestModule(torch.nn.Module):
             def __init__(self):
@@ -508,7 +507,7 @@
 
         class Foo(torch.nn.Module):
             def __init__(self):
-                super(Foo, self).__init__()
+                super().__init__()
                 self.foo = torch.nn.Linear(2, 3, device="meta")
                 self.bar = torch.nn.Linear(3, 4)
                 self.register_buffer("buffer", torch.randn(4, device="meta"))
@@ -670,7 +669,7 @@
 
         class Foo(torch.nn.Module):
             def __init__(self):
-                super(Foo, self).__init__()
+                super().__init__()
                 self.foo = torch.nn.Linear(2, 2)
                 self.bar = torch.nn.Linear(2, 2)
 
@@ -686,7 +685,7 @@
 
         class Foo(torch.nn.Module):
             def __init__(self):
-                super(Foo, self).__init__()
+                super().__init__()
                 self.foo = torch.nn.Linear(2, 2)
 
             def forward(self, x):
@@ -1020,7 +1019,7 @@
     def test_module_info_flatbuffer(self):
         class Foo(torch.nn.Module):
             def __init__(self):
-                super(Foo, self).__init__()
+                super().__init__()
                 self.foo = torch.nn.Linear(2, 2)
                 self.bar = torch.nn.Linear(2, 2)
 
@@ -1051,8 +1050,7 @@
         """
 
         class Submodule(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
+            pass
 
         class TestModule(torch.nn.Module):
             def __init__(self):
@@ -1101,9 +1099,6 @@
         """
 
         class Module(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x: Tensor):
                 return x
 
diff --git a/test/jit/test_save_load_for_op_version.py b/test/jit/test_save_load_for_op_version.py
index b5e38b3..328f656 100644
--- a/test/jit/test_save_load_for_op_version.py
+++ b/test/jit/test_save_load_for_op_version.py
@@ -75,9 +75,6 @@
 
         # Tensor x Tensor
         class MyModule(torch.nn.Module):
-            def __init__(self):
-                super(MyModule, self).__init__()
-
             def forward(self, a, b):
                 result_0 = a / b
                 result_1 = torch.div(a, b)
@@ -123,9 +120,6 @@
             return self.divide_(other, rounding_mode='trunc')
 
         class MyModule(torch.nn.Module):
-            def __init__(self):
-                super(MyModule, self).__init__()
-
             def forward(self, a, b):
                 a /= b
                 return a
@@ -169,9 +163,6 @@
             return torch.divide(self, other, out=out, rounding_mode='trunc')
 
         class MyModule(torch.nn.Module):
-            def __init__(self):
-                super(MyModule, self).__init__()
-
             def forward(self, a, b, out):
                 return a.div(b, out=out)
 
@@ -220,16 +211,10 @@
             return torch.divide(self, other, rounding_mode='trunc')
 
         class MyModuleFloat(torch.nn.Module):
-            def __init__(self):
-                super(MyModuleFloat, self).__init__()
-
             def forward(self, a, b: float):
                 return a / b
 
         class MyModuleInt(torch.nn.Module):
-            def __init__(self):
-                super(MyModuleInt, self).__init__()
-
             def forward(self, a, b: int):
                 return a / b
 
@@ -279,16 +264,10 @@
             return torch.divide(other, self, rounding_mode='trunc')
 
         class MyModuleFloat(torch.nn.Module):
-            def __init__(self):
-                super(MyModuleFloat, self).__init__()
-
             def forward(self, a, b: float):
                 return b / a
 
         class MyModuleInt(torch.nn.Module):
-            def __init__(self):
-                super(MyModuleInt, self).__init__()
-
             def forward(self, a, b: int):
                 return b / a
 
@@ -348,17 +327,11 @@
             return self.divide_(other, rounding_mode='trunc')
 
         class MyModuleFloat(torch.nn.Module):
-            def __init__(self):
-                super(MyModuleFloat, self).__init__()
-
             def forward(self, a, b: float):
                 a /= b
                 return a
 
         class MyModuleInt(torch.nn.Module):
-            def __init__(self):
-                super(MyModuleInt, self).__init__()
-
             def forward(self, a, b: int):
                 a /= b
                 return a
@@ -396,9 +369,6 @@
     #   so this test verifies the behavior is unchanged.
     def test_versioned_div_scalar_scalar(self):
         class MyModule(torch.nn.Module):
-            def __init__(self):
-                super(MyModule, self).__init__()
-
             def forward(self, a: float, b: int, c: float, d: int):
                 result_0 = a / b
                 result_1 = a / c
@@ -425,9 +395,6 @@
 
     def test_versioned_linspace(self):
         class Module(torch.nn.Module):
-            def __init__(self):
-                super(Module, self).__init__()
-
             def forward(self, a: Union[int, float, complex], b: Union[int, float, complex]):
                 c = torch.linspace(a, b, steps=5)
                 d = torch.linspace(a, b, steps=100)
@@ -455,9 +422,6 @@
 
     def test_versioned_linspace_out(self):
         class Module(torch.nn.Module):
-            def __init__(self):
-                super(Module, self).__init__()
-
             def forward(self, a: Union[int, float, complex], b: Union[int, float, complex], out: torch.Tensor):
                 return torch.linspace(a, b, steps=100, out=out)
 
@@ -484,9 +448,6 @@
 
     def test_versioned_logspace(self):
         class Module(torch.nn.Module):
-            def __init__(self):
-                super(Module, self).__init__()
-
             def forward(self, a: Union[int, float, complex], b: Union[int, float, complex]):
                 c = torch.logspace(a, b, steps=5)
                 d = torch.logspace(a, b, steps=100)
@@ -514,9 +475,6 @@
 
     def test_versioned_logspace_out(self):
         class Module(torch.nn.Module):
-            def __init__(self):
-                super(Module, self).__init__()
-
             def forward(self, a: Union[int, float, complex], b: Union[int, float, complex], out: torch.Tensor):
                 return torch.logspace(a, b, steps=100, out=out)
 
diff --git a/test/jit/test_script_profile.py b/test/jit/test_script_profile.py
index f350a49..438994b 100644
--- a/test/jit/test_script_profile.py
+++ b/test/jit/test_script_profile.py
@@ -18,7 +18,7 @@
 
 class Sequence(nn.Module):
     def __init__(self):
-        super(Sequence, self).__init__()
+        super().__init__()
         self.lstm1 = nn.LSTMCell(1, 51)
         self.lstm2 = nn.LSTMCell(51, 51)
         self.linear = nn.Linear(51, 1)
diff --git a/test/jit/test_scriptmod_ann.py b/test/jit/test_scriptmod_ann.py
index 5d256ba..47e010e 100644
--- a/test/jit/test_scriptmod_ann.py
+++ b/test/jit/test_scriptmod_ann.py
@@ -54,7 +54,7 @@
     def test_annotated_empty_tensor(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.x: torch.Tensor = torch.empty(0)
 
             def forward(self, x: torch.Tensor):
@@ -68,7 +68,7 @@
     def test_annotated_with_jit_attribute(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.x = torch.jit.Attribute([], List[int])
 
             def forward(self, x: List[int]):
diff --git a/test/jit/test_symbolic_shape_analysis.py b/test/jit/test_symbolic_shape_analysis.py
index 3e3cb3f..73a55e5 100644
--- a/test/jit/test_symbolic_shape_analysis.py
+++ b/test/jit/test_symbolic_shape_analysis.py
@@ -309,7 +309,7 @@
             __constants__ = ['dim']
 
             def __init__(self, dim=0):
-                super(CatMod, self).__init__()
+                super().__init__()
                 self.dim = dim
 
             def forward(self, x, y):
@@ -442,7 +442,7 @@
     def test_refinement_through_graph_stitching(self):
         class TwoConvs(torch.nn.Module):
             def __init__(self):
-                super(TwoConvs, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
                 self.conv2 = torch.nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
 
diff --git a/test/jit/test_torchbind.py b/test/jit/test_torchbind.py
index 2a073dd..b92793e 100644
--- a/test/jit/test_torchbind.py
+++ b/test/jit/test_torchbind.py
@@ -75,7 +75,7 @@
 
         class CustomWrapper(torch.nn.Module):
             def __init__(self, foo):
-                super(CustomWrapper, self).__init__()
+                super().__init__()
                 self.foo = foo
 
             def forward(self) -> None:
@@ -239,7 +239,7 @@
     def test_torchbind_class_attr_recursive(self):
         class FooBar(torch.nn.Module):
             def __init__(self, foo_model):
-                super(FooBar, self).__init__()
+                super().__init__()
                 self.foo_mod = foo_model
 
             def forward(self) -> int:
@@ -256,7 +256,7 @@
     def test_torchbind_class_attribute(self):
         class FooBar1234(torch.nn.Module):
             def __init__(self):
-                super(FooBar1234, self).__init__()
+                super().__init__()
                 self.f = torch.classes._TorchScriptTesting._StackString(["3", "4"])
 
             def forward(self):
@@ -272,7 +272,7 @@
     def test_torchbind_getstate(self):
         class FooBar4321(torch.nn.Module):
             def __init__(self):
-                super(FooBar4321, self).__init__()
+                super().__init__()
                 self.f = torch.classes._TorchScriptTesting._PickleTester([3, 4])
 
             def forward(self):
@@ -293,7 +293,7 @@
     def test_torchbind_deepcopy(self):
         class FooBar4321(torch.nn.Module):
             def __init__(self):
-                super(FooBar4321, self).__init__()
+                super().__init__()
                 self.f = torch.classes._TorchScriptTesting._PickleTester([3, 4])
 
             def forward(self):
@@ -309,7 +309,7 @@
     def test_torchbind_python_deepcopy(self):
         class FooBar4321(torch.nn.Module):
             def __init__(self):
-                super(FooBar4321, self).__init__()
+                super().__init__()
                 self.f = torch.classes._TorchScriptTesting._PickleTester([3, 4])
 
             def forward(self):
@@ -324,7 +324,7 @@
     def test_torchbind_tracing(self):
         class TryTracing(torch.nn.Module):
             def __init__(self):
-                super(TryTracing, self).__init__()
+                super().__init__()
                 self.f = torch.classes._TorchScriptTesting._PickleTester([3, 4])
 
             def forward(self):
@@ -340,12 +340,12 @@
     def test_torchbind_tracing_nested(self):
         class TryTracingNest(torch.nn.Module):
             def __init__(self):
-                super(TryTracingNest, self).__init__()
+                super().__init__()
                 self.f = torch.classes._TorchScriptTesting._PickleTester([3, 4])
 
         class TryTracing123(torch.nn.Module):
             def __init__(self):
-                super(TryTracing123, self).__init__()
+                super().__init__()
                 self.nest = TryTracingNest()
 
             def forward(self):
diff --git a/test/jit/test_tracer.py b/test/jit/test_tracer.py
index b36003a..b16a086 100644
--- a/test/jit/test_tracer.py
+++ b/test/jit/test_tracer.py
@@ -40,7 +40,7 @@
     def test_large_nbr_kernel_args(self):
         class Recurrence(nn.Module):
             def __init__(self, seq_len):
-                super(Recurrence, self).__init__()
+                super().__init__()
                 self.seq_len = seq_len
 
             def forward(self, input):
@@ -87,9 +87,6 @@
 
     def test_trace_checking_with_global_name(self):
         class MyClass(torch.nn.Module):
-            def __init__(self):
-                super(MyClass, self).__init__()
-
             def forward(self, xs: List[Tensor]):
                 y = torch.cat(xs, dim=0)
                 return y
@@ -105,7 +102,7 @@
     def test_trace_aliased_parameter(self):
         class M(nn.Module):
             def __init__(self, x):
-                super(M, self).__init__()
+                super().__init__()
                 self.x = nn.Parameter(x)
 
             def forward(self, y):
@@ -622,9 +619,6 @@
     def test_input_dict_remembers_keys(self):
         """Check that the trace remembers which keys were in a dict input"""
         class TestModule(torch.nn.Module):
-            def __init__(self):
-                super(TestModule, self).__init__()
-
             def forward(self, dict_input):
                 return dict_input['x']
 
@@ -649,9 +643,6 @@
     def test_input_dict_insertion_order(self):
         """Check that dictionary access doesn't care about insertion order"""
         class TestModule(torch.nn.Module):
-            def __init__(self):
-                super(TestModule, self).__init__()
-
             def forward(self, dict_input):
                 return dict_input['x'], dict_input['y']
         input_x_then_y = {}
@@ -671,9 +662,6 @@
 
     def test_input_dict_recursive(self):
         class TestModule(torch.nn.Module):
-            def __init__(self):
-                super(TestModule, self).__init__()
-
             def forward(self, dict_input):
                 return dict_input['x'][1]
 
@@ -833,7 +821,7 @@
     def test_shared_param(self):
         class MyModule(torch.nn.Module):
             def __init__(self):
-                super(MyModule, self).__init__()
+                super().__init__()
                 self.b = self.a = nn.Parameter(torch.randn(2, 2))
 
             def forward(self, x):
@@ -852,9 +840,6 @@
             self.skipTest("Skip the test since c2 ops are not registered.")
 
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super(MyModel, self).__init__()
-
             def forward(self, scores, bbox_deltas, im_info, anchors):
                 a, b = torch.ops._caffe2.GenerateProposals(
                     (scores), (bbox_deltas), (im_info), (anchors),
@@ -955,7 +940,7 @@
     def test_traced_module_cuda(self):
         class Model(nn.Module):
             def __init__(self, num_features, num_layers):
-                super(Model, self).__init__()
+                super().__init__()
                 self.num_layers = num_layers
                 layers = [[nn.Linear(num_features, num_features), nn.Sigmoid()]
                           for _ in range(num_layers)]
@@ -1135,7 +1120,7 @@
     def test_trace_dict_input(self):
         class Bar(torch.nn.Module):
             def __init__(self):
-                super(Bar, self).__init__()
+                super().__init__()
                 self.foo = Foo()
 
             def forward(self, a, b):
@@ -1267,7 +1252,7 @@
     def test_trace_save_load_copy(self):
         class Test(torch.nn.Module):
             def __init__(self):
-                super(Test, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 3, 3)
 
             def forward(self, x):
@@ -1285,7 +1270,7 @@
     def test_trace_export_fns(self):
         class Foo(torch.nn.Module):
             def __init__(self):
-                super(Foo, self).__init__()
+                super().__init__()
                 self.a = 3
 
             @torch.jit.export
@@ -1316,7 +1301,7 @@
     def test_trace_export_fns_recursive(self):
         class Foo(torch.nn.Module):
             def __init__(self):
-                super(Foo, self).__init__()
+                super().__init__()
                 self.a = 3
 
             @torch.jit.export
@@ -1333,7 +1318,7 @@
 
         class Wrapper(torch.nn.Module):
             def __init__(self):
-                super(Wrapper, self).__init__()
+                super().__init__()
                 self.foo = Foo()
 
             def forward(self, x):
@@ -1354,9 +1339,6 @@
 
         # Note that Bar's forward can only be traced, but not scripted
         class Bar(nn.Module):
-            def __init__(self):
-                super().__init__()
-
             @torch.jit.export
             def addTwo(self, x):
                 return x + 2
@@ -1369,7 +1351,7 @@
         # being traced.
         class WrapperExports(torch.nn.Module):
             def __init__(self):
-                super(WrapperExports, self).__init__()
+                super().__init__()
                 self.bar = Bar()
 
             @torch.jit.export
@@ -1403,7 +1385,7 @@
 
         class Wrapper(torch.nn.Module):
             def __init__(self):
-                super(Wrapper, self).__init__()
+                super().__init__()
                 self.tm = TracedModule()
 
             def forward(self, x):
@@ -1455,7 +1437,7 @@
     def test_interpolate_trace(self):
         class test(nn.Module):
             def __init__(self):
-                super(test, self).__init__()
+                super().__init__()
                 self.conv = nn.Conv2d(1, 32, kernel_size=3, padding=1)
 
             def forward(self, x):
@@ -1515,7 +1497,7 @@
 
         class TracedModule(torch.nn.Module):
             def __init__(self):
-                super(TracedModule, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.rand(4, 5))
 
             def forward(self, x):
@@ -1533,7 +1515,7 @@
     def test_call_traced_module_from_traced_module(self):
         class TracedModule1(torch.nn.Module):
             def __init__(self):
-                super(TracedModule1, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.rand(5, 7))
 
             def forward(self, x):
@@ -1541,7 +1523,7 @@
 
         class TracedModule(torch.nn.Module):
             def __init__(self):
-                super(TracedModule, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.rand(4, 5))
                 self.mod = torch.jit.trace(TracedModule1(), torch.rand(3, 5))
 
@@ -1697,7 +1679,7 @@
     def test_trace_modulelist(self):
         class MySubmod(torch.nn.Module):
             def __init__(self):
-                super(MySubmod, self).__init__()
+                super().__init__()
                 self.relu = torch.nn.ReLU()
 
             def forward(self, x):
@@ -1705,7 +1687,7 @@
 
         class MyMod(torch.nn.Module):
             def __init__(self):
-                super(MyMod, self).__init__()
+                super().__init__()
                 self.ml = torch.nn.ModuleList([
                     MySubmod(),
                     MySubmod()
@@ -1721,7 +1703,7 @@
     def test_trace_fork_join_and_module(self):
         class MySubmod(torch.nn.Module):
             def __init__(self):
-                super(MySubmod, self).__init__()
+                super().__init__()
                 self.relu = torch.nn.ReLU()
 
             def forward(self, x):
@@ -1729,7 +1711,7 @@
 
         class Mod(torch.nn.Module):
             def __init__(self):
-                super(Mod, self).__init__()
+                super().__init__()
                 self.ml = torch.nn.ModuleList([
                     MySubmod() for i in range(2)
                 ])
@@ -1751,7 +1733,7 @@
     def test_trace_invert_module_hierarchy(self):
         class MySubmod(torch.nn.Module):
             def __init__(self):
-                super(MySubmod, self).__init__()
+                super().__init__()
                 self.relu = torch.nn.ReLU()
 
             def forward(self, x):
@@ -1763,7 +1745,7 @@
 
         class Mod(torch.nn.Module):
             def __init__(self):
-                super(Mod, self).__init__()
+                super().__init__()
                 self.sm = MySubmod()
                 self.fm = MyFunctionalMod()
 
@@ -1790,9 +1772,6 @@
     @skipIfTorchDynamo("Not a suitable test for TorchDynamo")
     def test_tracing_hooks(self):
         class Net(nn.Module):
-            def __init__(self):
-                super(Net, self).__init__()
-
             def forward(self, x):
                 return x + x
 
@@ -1851,9 +1830,6 @@
 
     def test_tracing_backward_hook_error(self):
         class Net(nn.Module):
-            def __init__(self):
-                super(Net, self).__init__()
-
             def forward(self, x):
                 return x + x
 
@@ -1869,7 +1845,7 @@
     def test_tracing_multiple_methods(self):
         class Net(nn.Module):
             def __init__(self):
-                super(Net, self).__init__()
+                super().__init__()
                 self.conv = nn.Conv2d(1, 1, 3)
 
             def forward(self, x):
@@ -1930,7 +1906,7 @@
     def test_trace_with_conditional_property(self):
         class Net(nn.Module):
             def __init__(self, attr=None):
-                super(Net, self).__init__()
+                super().__init__()
                 if attr is not None:
                     self._attr = attr
                 self.attr_name = '_attr'
@@ -1964,7 +1940,7 @@
     def test_trace_module_argument_names_captured(self):
         class TestModule(nn.Module):
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.conv = nn.Conv2d(1, 1, 3)
 
             def forward(self, first_arg: torch.Tensor, second_arg: torch.Tensor):
@@ -2105,7 +2081,7 @@
 
         class AnotherScriptMod(torch.jit.ScriptModule):
             def __init__(self):
-                super(AnotherScriptMod, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.rand(1, 2, 3))
 
             @torch.jit.script_method
@@ -2114,7 +2090,7 @@
 
         class SomeScriptMod(torch.jit.ScriptModule):
             def __init__(self):
-                super(SomeScriptMod, self).__init__()
+                super().__init__()
                 self.asm = AnotherScriptMod()
 
             @torch.jit.script_method
@@ -2127,7 +2103,7 @@
 
         class TraceMe(torch.nn.Module):
             def __init__(self):
-                super(TraceMe, self).__init__()
+                super().__init__()
                 self.ssm = SomeScriptMod()
 
             def forward(self, x):
@@ -2154,7 +2130,7 @@
     def test_trace_parameter(self):
         class Param(nn.Module):
             def __init__(self):
-                super(Param, self).__init__()
+                super().__init__()
                 self.register_parameter("bias", nn.Parameter(torch.empty(4, 4)))
 
             def forward(self, x):
@@ -2162,7 +2138,7 @@
 
         class M3(torch.jit.ScriptModule):
             def __init__(self, model):
-                super(M3, self).__init__()
+                super().__init__()
                 self.traced = torch.jit.trace(model, (torch.rand(3, 3)))
 
             @torch.jit.script_method
@@ -2171,7 +2147,7 @@
 
         class M2(nn.Module):
             def __init__(self, model):
-                super(M2, self).__init__()
+                super().__init__()
                 self.module = M3(model)
 
             def forward(self, x):
@@ -2179,7 +2155,7 @@
 
         class M1(torch.jit.ScriptModule):
             def __init__(self, model):
-                super(M1, self).__init__()
+                super().__init__()
                 self.traced = torch.jit.trace(M2(model), (torch.rand(3, 3)))
 
             @torch.jit.script_method
@@ -2199,7 +2175,7 @@
 
         class TracedModule(torch.nn.Module):
             def __init__(self):
-                super(TracedModule, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.rand(4, 5))
 
             def forward(self, x):
@@ -2212,7 +2188,7 @@
     def test_call_script_module_from_traced_module(self):
         class ScriptMod(torch.jit.ScriptModule):
             def __init__(self):
-                super(ScriptMod, self).__init__()
+                super().__init__()
                 self.param_foo = torch.nn.Parameter(torch.rand(5, 7))
 
             @torch.jit.script_method
@@ -2221,7 +2197,7 @@
 
         class TracedModule(torch.nn.Module):
             def __init__(self):
-                super(TracedModule, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.rand(4, 5))
                 self.mod = ScriptMod()
 
@@ -2247,9 +2223,6 @@
     def test_call_traced_mod_from_script_fn(self):
         with self.assertRaisesRegex(RuntimeError, "Cannot call a ScriptModule that is not a submodule of the caller"):
             class TracedModule(torch.nn.Module):
-                def __init__(self):
-                    super(TracedModule, self).__init__()
-
                 def forward(self, x):
                     return torch.mm(x, torch.zeros(4, 3))
 
@@ -2267,7 +2240,7 @@
 
         class ScriptMod(torch.jit.ScriptModule):
             def __init__(self):
-                super(ScriptMod, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.rand(4, 3))
 
             @torch.jit.script_method
@@ -2281,7 +2254,7 @@
     def test_call_tracing_mod_from_script_module(self):
         class TracedMod(torch.nn.Module):
             def __init__(self):
-                super(TracedMod, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.rand(3, 5))
 
             def forward(self, x):
@@ -2289,7 +2262,7 @@
 
         class ScriptMod(torch.jit.ScriptModule):
             def __init__(self):
-                super(ScriptMod, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.rand(4, 3))
                 self.tm = torch.jit.trace(TracedMod(), torch.rand(3, 3))
 
@@ -2302,15 +2275,12 @@
 
     def test_script_inline_trace_multiple_args(self):
         class M(torch.nn.Module):
-            def __init__(self):
-                super(M, self).__init__()
-
             def forward(self, input, input2):
                 return input + input2
 
         class M2(torch.jit.ScriptModule):
             def __init__(self):
-                super(M2, self).__init__()
+                super().__init__()
                 self.m = torch.jit.trace(M(), (torch.zeros(4, 3), torch.zeros(4, 3)))
 
             @torch.jit.script_method
@@ -2324,7 +2294,7 @@
     def test_trace_dict_mix_script(self):
         class testB(torch.nn.Module):
             def __init__(self):
-                super(testB, self).__init__()
+                super().__init__()
                 self.linear = torch.nn.Linear(2, 2)
 
             def forward(self, feature_map: Dict[str, List[Tensor]]) -> Tensor:
@@ -2336,7 +2306,7 @@
 
         class testA(torch.nn.Module):
             def __init__(self):
-                super(testA, self).__init__()
+                super().__init__()
                 self.b = torch.jit.script(testB())
 
             def forward(self, input_map: Dict[str, List[Tensor]]) -> Tensor:
@@ -2357,9 +2327,6 @@
         The dictionary can should be able to contain other containers (like a tuple) recursively.
         """
         class ReturnsDict(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(
                 self, id_score_list: Dict[str, Tuple[torch.Tensor, torch.Tensor, torch.Tensor]]
             ) -> Dict[str, Tuple[torch.Tensor, torch.Tensor, torch.Tensor]]:
@@ -2373,9 +2340,6 @@
                 return result
 
         class ChecksDict(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, input: Dict[str, Tuple[torch.Tensor, torch.Tensor, torch.Tensor]]):
                 v = input["1000"]
                 return v[1] + 1
@@ -2418,9 +2382,6 @@
         should work.
         """
         class ReturnsDict(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(
                 self, k: torch.Tensor, v: torch.Tensor
             ) -> Dict[str, Tuple[torch.Tensor, torch.Tensor]]:
@@ -2432,9 +2393,6 @@
                 return result
 
         class ReturnsBadDict(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(
                 self, k: torch.Tensor, v: torch.Tensor
             ) -> Dict[str, Tuple[torch.Tensor, float]]:
@@ -2473,7 +2431,7 @@
 
         class TestModule(nn.Module):
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.conv = nn.Conv2d(1, 1, 3)
 
             def forward(self, first_arg: torch.Tensor, second_arg: torch.Tensor) -> torch.Tensor:
diff --git a/test/jit/test_type_sharing.py b/test/jit/test_type_sharing.py
index 17b6138..c2b84fc 100644
--- a/test/jit/test_type_sharing.py
+++ b/test/jit/test_type_sharing.py
@@ -35,7 +35,7 @@
     def test_basic(self):
         class M(torch.nn.Module):
             def __init__(self, a, b, c):
-                super(M, self).__init__()
+                super().__init__()
                 self.a = a
                 self.b = b
                 self.c = c
@@ -55,7 +55,7 @@
         """
         class M(torch.nn.Module):
             def __init__(self, a, b, c):
-                super(M, self).__init__()
+                super().__init__()
                 self.a = a
                 self.b = b
                 self.c = c
@@ -77,7 +77,7 @@
             __constants__ = ["const"]
 
             def __init__(self, attr, const):
-                super(M, self).__init__()
+                super().__init__()
                 self.attr = attr
                 self.const = const
 
@@ -113,7 +113,7 @@
         """
         class M(torch.nn.Module):
             def __init__(self, in1, out1, in2, out2):
-                super(M, self).__init__()
+                super().__init__()
                 self.submod1 = torch.nn.Linear(in1, out1)
                 self.submod2 = torch.nn.Linear(in2, out2)
 
@@ -139,7 +139,7 @@
         """
         class M(torch.nn.Module):
             def __init__(self, foo):
-                super(M, self).__init__()
+                super().__init__()
                 self.foo = foo
 
             def forward(self, x):
@@ -160,7 +160,7 @@
             __constants__ = ["const"]
 
             def __init__(self, in1, out1, in2, out2):
-                super(A, self).__init__()
+                super().__init__()
                 self.submod1 = torch.nn.Linear(in1, out1)
                 self.submod2 = torch.nn.Linear(in2, out2)
                 self.const = 5
@@ -174,7 +174,7 @@
             __constants__ = ["const"]
 
             def __init__(self, in1, out1, in2, out2):
-                super(B, self).__init__()
+                super().__init__()
                 self.submod1 = torch.nn.Linear(in1, out1)
                 self.submod2 = torch.nn.Linear(in2, out2)
                 self.const = 5
@@ -194,7 +194,7 @@
         """
         class M(torch.nn.Module):
             def __init__(self, in1, out1, in2, out2):
-                super(M, self).__init__()
+                super().__init__()
                 self.submod1 = torch.nn.Linear(in1, out1)
                 self.submod2 = torch.nn.Linear(in2, out2)
                 self.foo = torch.ones(in1, in1)
@@ -216,7 +216,7 @@
         """
         class M(torch.nn.Module):
             def __init__(self, in1, out1, in2, out2):
-                super(M, self).__init__()
+                super().__init__()
                 self.submod1 = torch.nn.Linear(in1, out1)
                 self.submod2 = torch.nn.Linear(in2, out2)
                 self.foo = torch.ones(in1, in1)
@@ -246,7 +246,7 @@
         """
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 # assign a type we know can't be converted to TorchScript
                 self.foo = object
 
@@ -274,7 +274,7 @@
 
         class M(torch.nn.Module):
             def __init__(self, fn):
-                super(M, self).__init__()
+                super().__init__()
                 self.fn = fn
 
             def forward(self, x):
@@ -288,7 +288,7 @@
     def test_builtin_function_same(self):
         class Caller(torch.nn.Module):
             def __init__(self, fn):
-                super(Caller, self).__init__()
+                super().__init__()
                 self.fn = fn
 
             def forward(self, input):
@@ -302,7 +302,7 @@
     def test_builtin_function_different(self):
         class Caller(torch.nn.Module):
             def __init__(self, fn):
-                super(Caller, self).__init__()
+                super().__init__()
                 self.fn = fn
 
             def forward(self, input):
@@ -323,7 +323,7 @@
 
         class M(torch.nn.Module):
             def __init__(self, fn):
-                super(M, self).__init__()
+                super().__init__()
                 self.fn = fn
 
             def forward(self, x):
@@ -346,7 +346,7 @@
 
         class M(torch.nn.Module):
             def __init__(self, fn):
-                super(M, self).__init__()
+                super().__init__()
                 self.fn = fn
 
             def forward(self, x):
@@ -366,7 +366,7 @@
 
         class M(torch.nn.Module):
             def __init__(self, fn):
-                super(M, self).__init__()
+                super().__init__()
                 self.fn = fn
 
             def forward(self, x):
@@ -384,9 +384,6 @@
         trace runs, tracing must always generate a unique type.
         """
         class M(torch.nn.Module):
-            def __init__(self):
-                super(M, self).__init__()
-
             def forward(self, x, y):
                 if x.sum() > y.sum():
                     return x
@@ -400,7 +397,7 @@
     def test_ignored_fns(self):
         class M(torch.nn.Module):
             def __init__(self, foo):
-                super(M, self).__init__()
+                super().__init__()
                 self.foo = foo
 
             @torch.jit.ignore
@@ -418,9 +415,6 @@
     @suppress_warnings
     def test_script_module_containing_traced_module(self):
         class Traced(torch.nn.Module):
-            def __init__(self):
-                super(Traced, self).__init__()
-
             def forward(self, x):
                 if x.sum() > 0:
                     return x
@@ -429,7 +423,7 @@
 
         class M(torch.nn.Module):
             def __init__(self, input):
-                super(M, self).__init__()
+                super().__init__()
                 self.traced = torch.jit.trace(Traced(), input)
 
             def forward(self, x):
@@ -442,7 +436,7 @@
     def test_loaded_modules_work(self):
         class AB(torch.nn.Module):
             def __init__(self):
-                super(AB, self).__init__()
+                super().__init__()
                 self.a = 1
                 self.b = 1
 
@@ -451,7 +445,7 @@
 
         class A(torch.nn.Module):
             def __init__(self):
-                super(A, self).__init__()
+                super().__init__()
                 self.a = 1
 
             def forward(self):
@@ -459,7 +453,7 @@
 
         class Wrapper(torch.nn.Module):
             def __init__(self, sub):
-                super(Wrapper, self).__init__()
+                super().__init__()
                 self.sub = sub
 
             def forward(self):
@@ -483,15 +477,12 @@
         that have different keys but the same value types.
         """
         class A(torch.nn.Module):
-            def __init__(self):
-                super(A, self).__init__()
-
             def forward(self, x):
                 return x
 
         class Foo(torch.nn.Module):
             def __init__(self, s):
-                super(Foo, self).__init__()
+                super().__init__()
                 self.dict = torch.nn.ModuleDict(s)
 
             def forward(self, x):
@@ -536,9 +527,6 @@
                 return x
 
         class B(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 return x
 
diff --git a/test/jit/test_types.py b/test/jit/test_types.py
index 2502c2c..8374afc 100644
--- a/test/jit/test_types.py
+++ b/test/jit/test_types.py
@@ -50,9 +50,6 @@
         GG = namedtuple('GG', ['f', 'g'])
 
         class Foo(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             @torch.jit.ignore
             def foo(self, x: torch.Tensor, z: torch.Tensor) -> Tuple[GG, GG]:
                 return GG(x, z), GG(x, z)
@@ -64,9 +61,6 @@
         y = foo(torch.randn(2, 2), torch.randn(2, 2))
 
         class Foo(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             @torch.jit.ignore
             def foo(self, x, z) -> Tuple[GG, GG]:
                 return GG(x, z)
@@ -83,9 +77,6 @@
             return x + 10
 
         class M(torch.nn.Module):
-            def __init__(self):
-                super(M, self).__init__()
-
             def forward(self, in_batch: Dict[str, Optional[torch.Tensor]]) -> torch.Tensor:
                 self.dropout_modality(in_batch)
                 fn(in_batch)
@@ -200,9 +191,6 @@
         Test that module attributes can be ignored.
         """
         class Sub(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, a: int) -> int:
                 return sum([a])
 
diff --git a/test/jit/test_with.py b/test/jit/test_with.py
index 0302a07..03638ed 100644
--- a/test/jit/test_with.py
+++ b/test/jit/test_with.py
@@ -581,9 +581,6 @@
         # Check that @torch.jit.ignored functions respect no_grad when it is
         # called in JIT mode.
         class NoGradModule(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             @torch.jit.ignore
             def adder(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
                 w = x + y
diff --git a/test/jit/xnnpack/test_xnnpack_delegate.py b/test/jit/xnnpack/test_xnnpack_delegate.py
index c54d9ba..4c7bc4a 100644
--- a/test/jit/xnnpack/test_xnnpack_delegate.py
+++ b/test/jit/xnnpack/test_xnnpack_delegate.py
@@ -38,9 +38,6 @@
 
     def test_xnnpack_lowering(self):
         class Module(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 return x + x
 
@@ -98,9 +95,6 @@
 
     def test_xnnpack_backend_add(self):
         class AddModule(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x, y):
                 z = x + y
                 z = z + x
@@ -130,9 +124,6 @@
 
     def test_xnnpack_broadcasting(self):
         class AddModule(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x, y):
                 return x + y
 
@@ -159,9 +150,6 @@
 
     def test_xnnpack_unsupported(self):
         class AddSpliceModule(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x, y):
                 z = x + y[:, :, 1, :]
                 return z
diff --git a/test/lazy/test_extract_compiled_graph.py b/test/lazy/test_extract_compiled_graph.py
index 0d91695..bde68ae 100644
--- a/test/lazy/test_extract_compiled_graph.py
+++ b/test/lazy/test_extract_compiled_graph.py
@@ -16,16 +16,10 @@
 import copy
 
 class ModuleConstScale(nn.Module):
-    def __init__(self):
-        super(ModuleConstScale, self).__init__()
-
     def forward(self, a):
         return a * 2
 
 class ModuleSub(nn.Module):
-    def __init__(self):
-        super(ModuleSub, self).__init__()
-
     def forward(self, a, b):
         return a - b
 
@@ -33,16 +27,10 @@
     """
     addcmul function takes a at::Scalar which results in a special TSData containing a Scalar rather than a Tensor.
     """
-    def __init__(self):
-        super(ModuleAddcmul, self).__init__()
-
     def forward(self, a, b, c):
         return torch.addcmul(a, b, c, value=5)
 
 class ModuleReturnMulti(nn.Module):
-    def __init__(self):
-        super(ModuleReturnMulti, self).__init__()
-
     def forward(self, a, b):
         return (b + 1, a - 1)
 
@@ -50,7 +38,7 @@
 # a custom tracer.
 # class ModuleEagerTensor(nn.Module):
 #     def __init__(self):
-#         super(ModuleEagerTensor, self).__init__()
+#         super().__init__()
 #
 #     def forward(self, a):
 #         b = torch.randn(2, 3, device="cpu") # eager device
@@ -65,7 +53,7 @@
 # method to a constant.. Comment out for now
 # class ModuleReturnEagerTensorOnDefaultDevice(nn.Module):
 #     def __init__(self):
-#         super(ModuleReturnEagerTensorOnDefaultDevice, self).__init__()
+#         super().__init__()
 #
 #     def forward(self):
 #         return torch.tensor((2, 3), dtype=torch.float32)
@@ -76,17 +64,11 @@
     returned tuple. torchbench like drq will hit this corner case when running
     thru torchdynamo..
     """
-    def __init__(self):
-        super(ModuleReturnDupTensor, self).__init__()
-
     def forward(self, a, b):
         c = a + b
         return a - b, c, a + 1, c
 
 class ModuleInplaceUpdate(nn.Module):
-    def __init__(self):
-        super(ModuleInplaceUpdate, self).__init__()
-
     def forward(self, a, b):
         a.sub_(b)
         return b - 1, b + 1
diff --git a/test/mkldnn_verbose.py b/test/mkldnn_verbose.py
index 804eb9a..60fe87b 100644
--- a/test/mkldnn_verbose.py
+++ b/test/mkldnn_verbose.py
@@ -3,7 +3,7 @@
 
 class Module(torch.nn.Module):
     def __init__(self):
-        super(Module, self).__init__()
+        super().__init__()
         self.conv = torch.nn.Conv2d(1, 10, 5, 1)
 
     def forward(self, x):
diff --git a/test/mobile/lightweight_dispatch/test_codegen_unboxing.cpp b/test/mobile/lightweight_dispatch/test_codegen_unboxing.cpp
index 80f26e6..1b87911 100644
--- a/test/mobile/lightweight_dispatch/test_codegen_unboxing.cpp
+++ b/test/mobile/lightweight_dispatch/test_codegen_unboxing.cpp
@@ -197,15 +197,16 @@
   auto testModelFile = "ModelWithMultipleOps.ptl";
 
   // class ModelWithMultipleOps(torch.nn.Module):
-  //           def __init__(self):
-  //               super(Model, self).__init__()
-  //               self.ops = torch.nn.Sequential(
-  //                   torch.nn.ReLU(),
-  //                   torch.nn.Flatten(),
-  //               )
-  //           def forward(self, x):
-  //               x[1] = -2
-  //               return self.ops(x)
+  //     def __init__(self):
+  //         super().__init__()
+  //         self.ops = torch.nn.Sequential(
+  //             torch.nn.ReLU(),
+  //             torch.nn.Flatten(),
+  //         )
+  //
+  //     def forward(self, x):
+  //         x[1] = -2
+  //         return self.ops(x)
 
   Module bc = _load_for_mobile(testModelFile);
   auto b = at::ones({2, 2, 2, 2});
diff --git a/test/mobile/model_test/android_api_module.py b/test/mobile/model_test/android_api_module.py
index 109e3aa..acada05 100644
--- a/test/mobile/model_test/android_api_module.py
+++ b/test/mobile/model_test/android_api_module.py
@@ -5,9 +5,6 @@
 
 
 class AndroidAPIModule(torch.jit.ScriptModule):
-    def __init__(self):
-        super(AndroidAPIModule, self).__init__()
-
     @torch.jit.script_method
     def forward(self, input):
         return None
diff --git a/test/mobile/model_test/builtin_ops.py b/test/mobile/model_test/builtin_ops.py
index 75b57f7..b315c4f 100644
--- a/test/mobile/model_test/builtin_ops.py
+++ b/test/mobile/model_test/builtin_ops.py
@@ -5,9 +5,6 @@
 
 
 class TSBuiltinOpsModule(torch.nn.Module):
-    def __init__(self):
-        super(TSBuiltinOpsModule, self).__init__()
-
     def forward(self):
         x = torch.tensor(1)
         y = torch.tensor(0.5)
@@ -90,9 +87,6 @@
 
 
 class TSCollectionOpsModule(torch.nn.Module):
-    def __init__(self):
-        super(TSCollectionOpsModule, self).__init__()
-
     def forward(self):
         s = "abcde"
         # list
diff --git a/test/mobile/model_test/math_ops.py b/test/mobile/model_test/math_ops.py
index 551c712..009ec2e 100644
--- a/test/mobile/model_test/math_ops.py
+++ b/test/mobile/model_test/math_ops.py
@@ -6,9 +6,6 @@
 
 
 class PointwiseOpsModule(torch.nn.Module):
-    def __init__(self):
-        super(PointwiseOpsModule, self).__init__()
-
     def forward(self):
         return self.pointwise_ops()
 
@@ -212,9 +209,6 @@
 
 
 class ReductionOpsModule(torch.nn.Module):
-    def __init__(self):
-        super(ReductionOpsModule, self).__init__()
-
     def forward(self):
         return self.reduction_ops()
 
@@ -265,9 +259,6 @@
 
 
 class ComparisonOpsModule(torch.nn.Module):
-    def __init__(self):
-        super(ComparisonOpsModule, self).__init__()
-
     def forward(self):
         a = torch.tensor(0)
         b = torch.tensor(1)
@@ -313,9 +304,6 @@
 
 
 class OtherMathOpsModule(torch.nn.Module):
-    def __init__(self):
-        super(OtherMathOpsModule, self).__init__()
-
     def forward(self):
         return self.other_ops()
 
@@ -387,9 +375,6 @@
 
 
 class SpectralOpsModule(torch.nn.Module):
-    def __init__(self):
-        super(SpectralOpsModule, self).__init__()
-
     def forward(self):
         return self.spectral_ops()
 
@@ -409,9 +394,6 @@
 
 
 class BlasLapackOpsModule(torch.nn.Module):
-    def __init__(self):
-        super(BlasLapackOpsModule, self).__init__()
-
     def forward(self):
         return self.blas_lapack_ops()
 
diff --git a/test/mobile/model_test/nn_ops.py b/test/mobile/model_test/nn_ops.py
index 338359c..6389a00 100644
--- a/test/mobile/model_test/nn_ops.py
+++ b/test/mobile/model_test/nn_ops.py
@@ -5,7 +5,7 @@
 # https://pytorch.org/docs/stable/nn.html
 class NNConvolutionModule(torch.nn.Module):
     def __init__(self):
-        super(NNConvolutionModule, self).__init__()
+        super().__init__()
         self.input1d = torch.randn(1, 4, 36)
         self.input2d = torch.randn(1, 4, 30, 10)
         self.input3d = torch.randn(1, 4, 10, 4, 4)
@@ -40,7 +40,7 @@
 
 class NNPoolingModule(torch.nn.Module):
     def __init__(self):
-        super(NNPoolingModule, self).__init__()
+        super().__init__()
         self.input1d = torch.randn(1, 16, 50)
         self.module1d = nn.ModuleList(
             [
@@ -86,7 +86,7 @@
 
 class NNPaddingModule(torch.nn.Module):
     def __init__(self):
-        super(NNPaddingModule, self).__init__()
+        super().__init__()
         self.input1d = torch.randn(1, 4, 50)
         self.module1d = nn.ModuleList(
             [
@@ -125,7 +125,7 @@
 
 class NNNormalizationModule(torch.nn.Module):
     def __init__(self):
-        super(NNNormalizationModule, self).__init__()
+        super().__init__()
         self.input1d = torch.randn(1, 4, 50)
         self.module1d = nn.ModuleList(
             [
@@ -164,7 +164,7 @@
 
 class NNActivationModule(torch.nn.Module):
     def __init__(self):
-        super(NNActivationModule, self).__init__()
+        super().__init__()
         self.activations = nn.ModuleList(
             [
                 nn.ELU(),
@@ -209,7 +209,7 @@
 
 class NNRecurrentModule(torch.nn.Module):
     def __init__(self):
-        super(NNRecurrentModule, self).__init__()
+        super().__init__()
         self.rnn = nn.ModuleList(
             [
                 nn.RNN(4, 8, 2),
@@ -239,7 +239,7 @@
 
 class NNTransformerModule(torch.nn.Module):
     def __init__(self):
-        super(NNTransformerModule, self).__init__()
+        super().__init__()
         self.transformers = nn.ModuleList(
             [
                 nn.Transformer(
@@ -265,7 +265,7 @@
 
 class NNLinearModule(torch.nn.Module):
     def __init__(self):
-        super(NNLinearModule, self).__init__()
+        super().__init__()
         self.linears = nn.ModuleList(
             [
                 nn.Identity(54),
@@ -284,9 +284,6 @@
 
 
 class NNDropoutModule(torch.nn.Module):
-    def __init__(self):
-        super(NNDropoutModule, self).__init__()
-
     def forward(self):
         a = torch.randn(8, 4)
         b = torch.randn(8, 4, 4, 4)
@@ -301,9 +298,6 @@
 
 
 class NNSparseModule(torch.nn.Module):
-    def __init__(self):
-        super(NNSparseModule, self).__init__()
-
     def forward(self):
         input = torch.tensor([[1, 2, 4, 5], [4, 3, 2, 9]])
         input2 = torch.tensor([1, 2, 4, 5, 4, 3, 2, 9])
@@ -317,9 +311,6 @@
 
 
 class NNDistanceModule(torch.nn.Module):
-    def __init__(self):
-        super(NNDistanceModule, self).__init__()
-
     def forward(self):
         a = torch.randn(8, 4)
         b = torch.randn(8, 4)
@@ -332,7 +323,7 @@
 
 class NNLossFunctionModule(torch.nn.Module):
     def __init__(self):
-        super(NNLossFunctionModule, self).__init__()
+        super().__init__()
         self.x = torch.FloatTensor([[0.1, 0.2, 0.4, 0.8]])
         self.y = torch.LongTensor([[3, 0, -1, 1]])
 
@@ -371,7 +362,7 @@
 
 class NNVisionModule(torch.nn.Module):
     def __init__(self):
-        super(NNVisionModule, self).__init__()
+        super().__init__()
         self.input = torch.randn(1, 4, 9, 9)
         self.vision_modules = nn.ModuleList(
             [
@@ -401,7 +392,7 @@
 
 class NNShuffleModule(torch.nn.Module):
     def __init__(self):
-        super(NNShuffleModule, self).__init__()
+        super().__init__()
         self.shuffle = nn.ChannelShuffle(2)
 
     def forward(self):
@@ -410,7 +401,7 @@
 
 class NNUtilsModule(torch.nn.Module):
     def __init__(self):
-        super(NNUtilsModule, self).__init__()
+        super().__init__()
         self.flatten = nn.Sequential(
             nn.Linear(50, 50),
             nn.Unflatten(1, (2, 5, 5))
diff --git a/test/mobile/model_test/quantization_ops.py b/test/mobile/model_test/quantization_ops.py
index 00ccb97..dd34137 100644
--- a/test/mobile/model_test/quantization_ops.py
+++ b/test/mobile/model_test/quantization_ops.py
@@ -4,7 +4,7 @@
 
 class GeneralQuantModule(torch.nn.Module):
     def __init__(self):
-        super(GeneralQuantModule, self).__init__()
+        super().__init__()
         self.embedding = torch.ao.nn.quantized.Embedding(
             num_embeddings=10, embedding_dim=12
         )
@@ -48,7 +48,7 @@
 
 class DynamicQuantModule:
     def __init__(self):
-        super(DynamicQuantModule, self).__init__()
+        super().__init__()
         self.module = self.M()
 
     def getModule(self):
@@ -111,9 +111,6 @@
 
 
 class StaticQuantModule:
-    def __init__(self):
-        super(StaticQuantModule, self).__init__()
-
     def getModule(self):
         model_fp32 = self.M()
         model_fp32.eval()
@@ -165,9 +162,6 @@
 
 
 class FusedQuantModule:
-    def __init__(self):
-        super(FusedQuantModule, self).__init__()
-
     def getModule(self):
         model_fp32 = self.M()
         model_fp32.eval()
diff --git a/test/mobile/model_test/sampling_ops.py b/test/mobile/model_test/sampling_ops.py
index a1ac71a..50e6d91 100644
--- a/test/mobile/model_test/sampling_ops.py
+++ b/test/mobile/model_test/sampling_ops.py
@@ -4,9 +4,6 @@
 # https://pytorch.org/docs/stable/torch.html#random-sampling
 
 class SamplingOpsModule(torch.nn.Module):
-    def __init__(self):
-        super(SamplingOpsModule, self).__init__()
-
     def forward(self):
         a = torch.empty(3, 3).uniform_(0.0, 1.0)
         size = (1, 4)
diff --git a/test/mobile/model_test/tensor_ops.py b/test/mobile/model_test/tensor_ops.py
index 9e04c67..089cf10 100644
--- a/test/mobile/model_test/tensor_ops.py
+++ b/test/mobile/model_test/tensor_ops.py
@@ -2,9 +2,6 @@
 
 
 class TensorOpsModule(torch.nn.Module):
-    def __init__(self):
-        super(TensorOpsModule, self).__init__()
-
     def forward(self):
         return self.tensor_general_ops()
 
@@ -102,9 +99,6 @@
 
 
 class TensorCreationOpsModule(torch.nn.Module):
-    def __init__(self):
-        super(TensorCreationOpsModule, self).__init__()
-
     def forward(self):
         return self.tensor_creation_ops()
 
@@ -161,9 +155,6 @@
 
 
 class TensorIndexingOpsModule(torch.nn.Module):
-    def __init__(self):
-        super(TensorIndexingOpsModule, self).__init__()
-
     def forward(self):
         return self.tensor_indexing_ops()
 
@@ -227,9 +218,6 @@
 
 
 class TensorTypingOpsModule(torch.nn.Module):
-    def __init__(self):
-        super(TensorTypingOpsModule, self).__init__()
-
     def forward(self):
         return self.tensor_typing_ops()
 
@@ -255,9 +243,6 @@
 
 
 class TensorViewOpsModule(torch.nn.Module):
-    def __init__(self):
-        super(TensorViewOpsModule, self).__init__()
-
     def forward(self):
         return self.tensor_view_ops()
 
diff --git a/test/mobile/model_test/torchvision_models.py b/test/mobile/model_test/torchvision_models.py
index 232afbc..8684724 100644
--- a/test/mobile/model_test/torchvision_models.py
+++ b/test/mobile/model_test/torchvision_models.py
@@ -5,9 +5,6 @@
 
 
 class MobileNetV2Module:
-    def __init__(self):
-        super(MobileNetV2Module, self).__init__()
-
     def getModule(self):
         model = torchvision.models.mobilenet_v2(pretrained=True)
         model.eval()
diff --git a/test/mobile/nnc/aot_test_model.py b/test/mobile/nnc/aot_test_model.py
index c5e123b..834b731 100644
--- a/test/mobile/nnc/aot_test_model.py
+++ b/test/mobile/nnc/aot_test_model.py
@@ -3,9 +3,6 @@
 
 
 class NeuralNetwork(nn.Module):
-    def __init__(self):
-        super(NeuralNetwork, self).__init__()
-
     def forward(self, x):
         return torch.add(x, 10)
 
diff --git a/test/mobile/test_bytecode.py b/test/mobile/test_bytecode.py
index 50a4c2f..b5a493e 100644
--- a/test/mobile/test_bytecode.py
+++ b/test/mobile/test_bytecode.py
@@ -311,9 +311,6 @@
 
     def test_get_mobile_model_contained_types(self):
         class MyTestModule(torch.nn.Module):
-            def __init__(self):
-                super(MyTestModule, self).__init__()
-
             def forward(self, x):
                 return x + 10
 
diff --git a/test/mobile/test_lite_script_module.py b/test/mobile/test_lite_script_module.py
index 9089977..f75a02b 100644
--- a/test/mobile/test_lite_script_module.py
+++ b/test/mobile/test_lite_script_module.py
@@ -34,9 +34,6 @@
 
     def test_load_mobile_module(self):
         class MyTestModule(torch.nn.Module):
-            def __init__(self):
-                super(MyTestModule, self).__init__()
-
             def forward(self, x):
                 return x + 10
 
@@ -60,15 +57,12 @@
 
     def test_save_mobile_module_with_debug_info_with_trace(self):
         class A(torch.nn.Module):
-            def __init__(self):
-                super(A, self).__init__()
-
             def forward(self, x, y):
                 return x * y
 
         class B(torch.nn.Module):
             def __init__(self):
-                super(B, self).__init__()
+                super().__init__()
                 self.A0 = A()
                 self.A1 = A()
 
@@ -103,9 +97,6 @@
 
     def test_load_mobile_module_with_debug_info(self):
         class MyTestModule(torch.nn.Module):
-            def __init__(self):
-                super(MyTestModule, self).__init__()
-
             def forward(self, x):
                 return x + 5
 
@@ -161,7 +152,7 @@
     def test_method_calls_with_optional_arg(self):
         class A(torch.nn.Module):
             def __init__(self):
-                super(A, self).__init__()
+                super().__init__()
 
             # opt arg in script-to-script invocation
             def forward(self, x, two: int = 2):
@@ -169,7 +160,7 @@
 
         class B(torch.nn.Module):
             def __init__(self):
-                super(B, self).__init__()
+                super().__init__()
                 self.A0 = A()
 
             # opt arg in Python-to-script invocation
@@ -227,12 +218,11 @@
 
     def test_unsupported_return_list_with_module_class(self):
         class Foo(torch.nn.Module):
-            def __init__(self):
-                super(Foo, self).__init__()
+            pass
 
         class MyTestModuleForListWithModuleClass(torch.nn.Module):
             def __init__(self):
-                super(MyTestModuleForListWithModuleClass, self).__init__()
+                super().__init__()
                 self.foo = Foo()
 
             def forward(self):
@@ -250,12 +240,11 @@
 
     def test_unsupported_return_dict_with_module_class(self):
         class Foo(torch.nn.Module):
-            def __init__(self):
-                super(Foo, self).__init__()
+            pass
 
         class MyTestModuleForDictWithModuleClass(torch.nn.Module):
             def __init__(self):
-                super(MyTestModuleForDictWithModuleClass, self).__init__()
+                super().__init__()
                 self.foo = Foo()
 
             def forward(self):
@@ -274,7 +263,7 @@
     def test_module_export_operator_list(self):
         class Foo(torch.nn.Module):
             def __init__(self):
-                super(Foo, self).__init__()
+                super().__init__()
                 self.weight = torch.ones((20, 1, 5, 5))
                 self.bias = torch.ones(20)
 
@@ -391,7 +380,7 @@
     def test_source_range_raise_exc(self):
         class FooTest5(torch.jit.ScriptModule):
             def __init__(self, val: int):
-                super(FooTest5, self).__init__()
+                super().__init__()
                 self.val = val
 
             @torch.jit.script_method
@@ -434,9 +423,6 @@
                 pass
 
         class B(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 return x
 
@@ -496,7 +482,7 @@
         # From the example in Static Quantization section of https://pytorch.org/docs/stable/quantization.html
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.quant = torch.ao.quantization.QuantStub()
                 self.conv = torch.nn.Conv2d(1, 1, 1)
                 self.relu = torch.nn.ReLU()
@@ -524,9 +510,6 @@
 
     def test_bundled_input_with_dynamic_type(self):
         class Model(torch.nn.Module):
-            def __init__(self):
-                super(Model, self).__init__()
-
             def forward(
                 self,
                 x: Dict[int, torch.Tensor],
diff --git a/test/mobile/test_lite_script_type.py b/test/mobile/test_lite_script_type.py
index 44eb6d4..913c527 100644
--- a/test/mobile/test_lite_script_type.py
+++ b/test/mobile/test_lite_script_type.py
@@ -42,7 +42,7 @@
 
         class Bar(torch.nn.Module):
             def __init__(self):
-                super(Bar, self).__init__()
+                super().__init__()
                 self.foo = Foo(torch.tensor(1))
 
             def forward(self, a: torch.Tensor):
@@ -104,7 +104,7 @@
 
         class Bar(torch.nn.Module):
             def __init__(self):
-                super(Bar, self).__init__()
+                super().__init__()
                 self.foo = Foo(torch.tensor(1))
 
             def forward(self, a: torch.Tensor):
@@ -153,7 +153,7 @@
 
         class Bar(torch.nn.Module):
             def __init__(self):
-                super(Bar, self).__init__()
+                super().__init__()
                 self.foo = Foo(torch.tensor(1), Baz(torch.tensor(1)))
 
             def forward(self, a: torch.Tensor):
diff --git a/test/mobile/test_quantize_fx_lite_script_module.py b/test/mobile/test_quantize_fx_lite_script_module.py
index ebc96d1..06562ec 100644
--- a/test/mobile/test_quantize_fx_lite_script_module.py
+++ b/test/mobile/test_quantize_fx_lite_script_module.py
@@ -58,7 +58,7 @@
     def test_conv2d(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv1 = nn.Conv2d(1, 1, 1)
                 self.conv2 = nn.Conv2d(1, 1, 1)
 
diff --git a/test/nn/test_init.py b/test/nn/test_init.py
index 9e72c10..b4d0c8d 100644
--- a/test/nn/test_init.py
+++ b/test/nn/test_init.py
@@ -16,7 +16,7 @@
 
 class TestNNInit(TestCase):
     def setUp(self):
-        super(TestNNInit, self).setUp()
+        super().setUp()
         random.seed(123)
 
     def _is_normal(self, tensor, mean, std):
diff --git a/test/nn/test_lazy_modules.py b/test/nn/test_lazy_modules.py
index c3a9dff..d3b0d58 100644
--- a/test/nn/test_lazy_modules.py
+++ b/test/nn/test_lazy_modules.py
@@ -219,9 +219,6 @@
         functions successfully.
         """
         class TestModule(torch.nn.modules.lazy.LazyModuleMixin, torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def initialize_parameters(self, input):
                 return None
 
@@ -242,9 +239,6 @@
         functions successfully.
         """
         class TestModule(torch.nn.modules.lazy.LazyModuleMixin, torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def initialize_parameters(self, input):
                 return None
 
@@ -551,7 +545,7 @@
     def test_chained_initialization(self):
         class MyNetwork(torch.nn.Module):
             def __init__(self):
-                super(MyNetwork, self).__init__()
+                super().__init__()
                 self.linear_1 = torch.nn.LazyLinear(15)
                 self.linear_2 = torch.nn.LazyLinear(10)
 
diff --git a/test/nn/test_module_hooks.py b/test/nn/test_module_hooks.py
index 2aa6481..9edabd1 100644
--- a/test/nn/test_module_hooks.py
+++ b/test/nn/test_module_hooks.py
@@ -393,9 +393,6 @@
             counter['backward'] += 1
 
         class TestModule(nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, dict):
                 inp = dict['x']
                 x = torch.nn.functional.softmax(inp, dim=0)
@@ -478,7 +475,7 @@
         # Test with module instance method as hook
         class MyModule(nn.Module):
             def __init__(self):
-                super(MyModule, self).__init__()
+                super().__init__()
                 self.foo = torch.nn.Parameter(torch.rand(10))
 
             def my_pre_load_hook(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs):
@@ -543,7 +540,7 @@
 
         class MyModule(nn.Module):
             def __init__(self):
-                super(MyModule, self).__init__()
+                super().__init__()
                 self.foo = torch.nn.Parameter(torch.rand(10))
 
             def my_post_load_hook(self, module, incompatible_keys):
diff --git a/test/nn/test_packed_sequence.py b/test/nn/test_packed_sequence.py
index 04856dc..3436212 100644
--- a/test/nn/test_packed_sequence.py
+++ b/test/nn/test_packed_sequence.py
@@ -24,7 +24,7 @@
     }
 
     def __init__(self, *args, **kwargs):
-        super(PackedSequenceTest, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
         self.batch_size = 5
         self.max_length = 6
 
diff --git a/test/onnx/model_defs/op_test.py b/test/onnx/model_defs/op_test.py
index 56a6687..195e3c8 100644
--- a/test/onnx/model_defs/op_test.py
+++ b/test/onnx/model_defs/op_test.py
@@ -19,17 +19,11 @@
 
 
 class ConcatNet(nn.Module):
-    def __init__(self):
-        super().__init__()
-
     def forward(self, inputs):
         return torch.cat(inputs, 1)
 
 
 class PermuteNet(nn.Module):
-    def __init__(self):
-        super().__init__()
-
     def forward(self, input):
         return input.permute(2, 3, 0, 1)
 
diff --git a/test/onnx/test_onnx_opset.py b/test/onnx/test_onnx_opset.py
index ef79e82..7c00862 100644
--- a/test/onnx/test_onnx_opset.py
+++ b/test/onnx/test_onnx_opset.py
@@ -170,9 +170,6 @@
 
     def test_upsample(self):
         class MyModule(Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 size = [v * 2 for v in x.size()[2:]]
                 size = [int(i) for i in size]
@@ -201,9 +198,6 @@
 
     def test_cast_constant(self):
         class MyModule(Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 return x - 1
 
diff --git a/test/onnx/test_operators.py b/test/onnx/test_operators.py
index cfb3673..7bc47e8 100644
--- a/test/onnx/test_operators.py
+++ b/test/onnx/test_operators.py
@@ -880,7 +880,7 @@
     #    def test_c2_op(self):
     #        class MyModel(torch.nn.Module):
     #            def __init__(self):
-    #                super(MyModel, self).__init__()
+    #                super().__init__()
     #
     #            def forward(self, scores, bbox_deltas, im_info, anchors):
     #                a, b = torch.ops._caffe2.GenerateProposals(
diff --git a/test/onnx/test_pytorch_onnx_no_runtime.py b/test/onnx/test_pytorch_onnx_no_runtime.py
index 15d9337..0bd78d3 100644
--- a/test/onnx/test_pytorch_onnx_no_runtime.py
+++ b/test/onnx/test_pytorch_onnx_no_runtime.py
@@ -99,7 +99,7 @@
 
         class TraceMe(torch.nn.Module):
             def __init__(self):
-                super(TraceMe, self).__init__()
+                super().__init__()
                 self.foo = Foo()
 
             def forward(self, x):
@@ -120,9 +120,6 @@
 
     def test_onnx_export_script_module(self):
         class ModuleToExport(torch.jit.ScriptModule):
-            def __init__(self):
-                super(ModuleToExport, self).__init__()
-
             @torch.jit.script_method
             def forward(self, x):
                 y = x - x
@@ -138,9 +135,6 @@
             return torch.nn.functional.sigmoid(inp)  # triggers a deprecation warning
 
         class WarningTest(torch.nn.Module):
-            def __init__(self):
-                super(WarningTest, self).__init__()
-
             def forward(self, x):
                 return func_with_warning(x)
 
@@ -151,16 +145,13 @@
 
     def test_onnx_export_script_python_fail(self):
         class PythonModule(torch.jit.ScriptModule):
-            def __init__(self):
-                super(PythonModule, self).__init__()
-
             @torch.jit.ignore
             def forward(self, x):
                 return torch.neg(x)
 
         class ModuleToExport(torch.jit.ScriptModule):
             def __init__(self):
-                super(ModuleToExport, self).__init__()
+                super().__init__()
                 self.mod = PythonModule()
 
             @torch.jit.script_method
@@ -175,15 +166,12 @@
 
     def test_onnx_export_script_inline_trace(self):
         class ModuleToInline(torch.nn.Module):
-            def __init__(self):
-                super(ModuleToInline, self).__init__()
-
             def forward(self, x):
                 return torch.neg(x)
 
         class ModuleToExport(torch.jit.ScriptModule):
             def __init__(self):
-                super(ModuleToExport, self).__init__()
+                super().__init__()
                 self.mod = torch.jit.trace(ModuleToInline(), torch.zeros(1, 2, 3))
 
             @torch.jit.script_method
@@ -196,16 +184,13 @@
 
     def test_onnx_export_script_inline_script(self):
         class ModuleToInline(torch.jit.ScriptModule):
-            def __init__(self):
-                super(ModuleToInline, self).__init__()
-
             @torch.jit.script_method
             def forward(self, x):
                 return torch.neg(x)
 
         class ModuleToExport(torch.jit.ScriptModule):
             def __init__(self):
-                super(ModuleToExport, self).__init__()
+                super().__init__()
                 self.mod = ModuleToInline()
 
             @torch.jit.script_method
@@ -218,9 +203,6 @@
 
     def test_onnx_export_script_module_loop(self):
         class ModuleToExport(torch.jit.ScriptModule):
-            def __init__(self):
-                super(ModuleToExport, self).__init__()
-
             @torch.jit.script_method
             def forward(self, x):
                 # test if we support end to end onnx export on loop and
@@ -236,9 +218,6 @@
     @common_utils.suppress_warnings
     def test_onnx_export_script_truediv(self):
         class ModuleToExport(torch.jit.ScriptModule):
-            def __init__(self):
-                super(ModuleToExport, self).__init__()
-
             @torch.jit.script_method
             def forward(self, x):
                 z = x.size(0) / 2
@@ -252,9 +231,6 @@
 
     def test_onnx_export_script_non_alpha_add_sub(self):
         class ModuleToExport(torch.jit.ScriptModule):
-            def __init__(self):
-                super(ModuleToExport, self).__init__()
-
             @torch.jit.script_method
             def forward(self, x):
                 bs = x.size(0) + 1
@@ -265,9 +241,6 @@
 
     def test_onnx_export_script_module_if(self):
         class ModuleToExport(torch.jit.ScriptModule):
-            def __init__(self):
-                super(ModuleToExport, self).__init__()
-
             @torch.jit.script_method
             def forward(self, x):
                 if bool(torch.sum(x) > 0):
@@ -280,7 +253,7 @@
     def test_onnx_export_script_inline_params(self):
         class ModuleToInline(torch.jit.ScriptModule):
             def __init__(self):
-                super(ModuleToInline, self).__init__()
+                super().__init__()
                 self.m = torch.nn.Parameter(torch.ones(3, 3))
                 self.unused = torch.nn.Parameter(torch.ones(1, 2, 3))
 
@@ -290,7 +263,7 @@
 
         class ModuleToExport(torch.jit.ScriptModule):
             def __init__(self):
-                super(ModuleToExport, self).__init__()
+                super().__init__()
                 self.mod = ModuleToInline()
                 self.param = torch.nn.Parameter(torch.ones(3, 4))
 
@@ -310,7 +283,7 @@
     def test_onnx_export_speculate(self):
         class Foo(torch.jit.ScriptModule):
             def __init__(self, m):
-                super(Foo, self).__init__()
+                super().__init__()
                 self.m = m
 
             @torch.jit.script_method
@@ -693,9 +666,6 @@
 
     def test_onnx_proto_checker(self):
         class Model(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 return 2 * x
 
@@ -817,9 +787,6 @@
         T, B, C = 3, 5, 7
 
         class PadPackedWrapper(torch.nn.Module):
-            def __init__(self):
-                super(PadPackedWrapper, self).__init__()
-
             def forward(self, x, seq_lens):
                 x = pack_padded_sequence(x, seq_lens)
                 x, _ = pad_packed_sequence(x)
@@ -871,7 +838,7 @@
 
         class RNNTraceWrapper(torch.nn.Module):
             def __init__(self, cell_type):
-                super(RNNTraceWrapper, self).__init__()
+                super().__init__()
                 if cell_type == "RNN":
                     self.rnn = torch.nn.RNN(
                         input_size=C, hidden_size=C, num_layers=num_layers
@@ -930,7 +897,7 @@
 
         class LSTMTraceWrapper(torch.nn.Module):
             def __init__(self):
-                super(LSTMTraceWrapper, self).__init__()
+                super().__init__()
 
                 self.rnn = torch.nn.LSTM(
                     input_size=C, hidden_size=C, num_layers=num_layers
@@ -1101,7 +1068,7 @@
         # For BUILD_CAFFE2=0, aten fallback only when not exportable
         class ONNXExportable(torch.nn.Module):
             def __init__(self):
-                super(ONNXExportable, self).__init__()
+                super().__init__()
                 self.quant = torch.ao.quantization.QuantStub()
                 self.fc1 = torch.nn.Linear(12, 8)
                 self.fc2 = torch.nn.Linear(8, 4)
diff --git a/test/onnx/test_pytorch_onnx_onnxruntime.py b/test/onnx/test_pytorch_onnx_onnxruntime.py
index 80e530c..ad5f7a9 100644
--- a/test/onnx/test_pytorch_onnx_onnxruntime.py
+++ b/test/onnx/test_pytorch_onnx_onnxruntime.py
@@ -851,9 +851,6 @@
     @skipDtypeChecking
     def test_primitive_input_floating(self):
         class Model(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x: float, y):
                 return x + y
 
@@ -863,9 +860,6 @@
 
     def test_primitive_input_bool(self):
         class Model(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, flag: bool, x, y):
                 if flag:
                     return x
@@ -11936,9 +11930,6 @@
 
     def test_tuple_output_from_if_with_raised_exception(self):
         class M(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, t: Tensor) -> Tuple[Tensor, Tensor]:
                 if float(t) < 0:
                     raise Exception("Negative input")
diff --git a/test/onnx/test_utility_funs.py b/test/onnx/test_utility_funs.py
index 77766d1..e94c7bb 100644
--- a/test/onnx/test_utility_funs.py
+++ b/test/onnx/test_utility_funs.py
@@ -1625,9 +1625,6 @@
             return x + z
 
         class MyModule(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x, y):
                 return f(x, y)
 
diff --git a/test/onnx_caffe2/test_pytorch_onnx_caffe2.py b/test/onnx_caffe2/test_pytorch_onnx_caffe2.py
index b8df7b8..a3b0d06 100644
--- a/test/onnx_caffe2/test_pytorch_onnx_caffe2.py
+++ b/test/onnx_caffe2/test_pytorch_onnx_caffe2.py
@@ -814,9 +814,6 @@
         c = torch.randn(BATCH_SIZE, 3, 224, 224)
 
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, input):
                 return input + c.type_as(input)
 
@@ -828,9 +825,6 @@
 
     def _test_index_generic(self, fn):
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, input):
                 return fn(input)
 
@@ -925,9 +919,6 @@
 
     def test_chunk(self):
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, input):
                 # TODO: Why index? This returns a tuple and test runner doesn't
                 # support tuple comparison.
@@ -937,9 +928,6 @@
 
     def test_sqrt(self):
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, input):
                 return input.sqrt()
 
@@ -956,9 +944,6 @@
 
     def test_log(self):
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, input):
                 return input.log()
 
@@ -968,9 +953,6 @@
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_erf(self):
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, input):
                 return input.erf()
 
@@ -980,9 +962,6 @@
     def test_trigonometry(self):
         def test_func(name):
             class MyModel(torch.nn.Module):
-                def __init__(self):
-                    super().__init__()
-
                 def forward(self, input):
                     return getattr(input, name)()
 
@@ -1000,9 +979,6 @@
 
     def test_addconstant(self):
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, input):
                 # TODO: Why index? This returns a tuple and test runner doesn't
                 # support tuple comparison.
@@ -1012,9 +988,6 @@
 
     def test_subconstant(self):
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, input):
                 # TODO: Why index? This returns a tuple and test runner doesn't
                 # support tuple comparison.
@@ -1169,9 +1142,6 @@
 
     def test_mm(self):
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, m1, m2):
                 return torch.mm(m1, m2)
 
@@ -1183,9 +1153,6 @@
 
     def test_addmm(self):
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, ma, m1, m2):
                 return torch.addmm(ma, m1, m2)
 
@@ -1259,9 +1226,6 @@
     # test for a pytorch optimization pass, see https://github.com/pytorch/pytorch/pull/7872
     def test_consecutive_transposes(self):
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 return x.transpose(1, 2).transpose(2, 3)
 
@@ -1275,9 +1239,6 @@
         for params in [{}] + [{"dim": i} for i in range(len(shape))]:
 
             class MyModel(torch.nn.Module):
-                def __init__(self):
-                    super().__init__()
-
                 def forward(self, x):
                     return torch.sum(x, **params)
 
@@ -1291,9 +1252,6 @@
         for params in [{"dim": i} for i in range(len(shape))]:
 
             class MyModel(torch.nn.Module):
-                def __init__(self):
-                    super().__init__()
-
                 def forward(self, x):
                     return torch.cumsum(x, **params)
 
@@ -1412,9 +1370,6 @@
 
     def test_repeat(self):
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 return x.repeat(1, 2, 3, 4)
 
@@ -1434,9 +1389,6 @@
     @skipIfUnsupportedOpsetVersion([10])
     def test_interpolate_upsample(self):
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 size = [v * 2 for v in x.size()[2:]]
                 # work around for now: turn the dynamic sizes into constant
@@ -1452,9 +1404,6 @@
     @skipIfUnsupportedOpsetVersion([7, 8, 10])
     def test_interpolate_upsample_dynamic_sizes(self):
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 size = [v * 2 for v in x.size()[2:]]
                 return nn.functional.interpolate(x, size=size, mode="nearest")
@@ -1467,9 +1416,6 @@
 
     def test_repeat_dim_overflow(self):
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 return x.repeat(1, 2, 3, 4)
 
@@ -1480,9 +1426,6 @@
 
     def test_repeat_dynamic(self):
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x, y):
                 return x.repeat(y.size()[0] // 2, y.size()[1] * 2)
 
@@ -1511,9 +1454,6 @@
         for params in [{}] + [{"dim": i} for i in range(len(shape))]:
 
             class MyModel(torch.nn.Module):
-                def __init__(self):
-                    super().__init__()
-
                 def forward(self, x):
                     return torch.mean(x, **params)
 
@@ -1598,9 +1538,6 @@
         for dim in range(-len(shape) - 1, len(shape) + 1):
 
             class MyModel(torch.nn.Module):
-                def __init__(self):
-                    super().__init__()
-
                 def forward(self, x):
                     return x.unsqueeze(dim)
 
@@ -1615,9 +1552,6 @@
         for dim in range(-len(shape), len(shape)):
 
             class MyModel(torch.nn.Module):
-                def __init__(self):
-                    super().__init__()
-
                 def forward(self, x):
                     return x.squeeze(dim)
 
@@ -1644,9 +1578,6 @@
 
     def test_dynamic_sizes(self):
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 shape = torch.onnx.operators.shape_as_tensor(x)
                 new_shape = torch.cat((torch.LongTensor([-1]), shape[0].view(1)))
@@ -1659,9 +1590,6 @@
 
     def test_advanced_broadcast(self):
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x, y):
                 return torch.mul(x, y)
 
@@ -2362,9 +2290,6 @@
 
     def test_c2_roi_align(self):
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, feature, rois):
                 roi_feature = torch.ops._caffe2.RoIAlign(
                     feature,
@@ -2395,9 +2320,6 @@
 
     def test_c2_generate_proposals(self):
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, scores, bbox_deltas, im_info, anchors):
                 a, b = torch.ops._caffe2.GenerateProposals(
                     scores,
@@ -2433,9 +2355,6 @@
 
     def test_c2_bbox_transform(self):
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, rois, deltas, im_info):
                 a, b = torch.ops._caffe2.BBoxTransform(
                     rois,
@@ -2504,9 +2423,6 @@
         topk_per_image = int(sum(roi_counts) / 2)
 
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, class_prob, pred_bbox, batch_splits):
                 a, b, c, d, e, f = torch.ops._caffe2.BoxWithNMSLimit(
                     class_prob,
@@ -2545,9 +2461,6 @@
         is_bidirectional = True
 
         class MyModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, lstm_in):
                 a, b, c = torch.ops._caffe2.InferenceLSTM(
                     lstm_in, num_layers, has_bias, batch_first, is_bidirectional
diff --git a/test/onnx_caffe2/test_verify.py b/test/onnx_caffe2/test_verify.py
index af8c29b..3a5dc27 100644
--- a/test/onnx_caffe2/test_verify.py
+++ b/test/onnx_caffe2/test_verify.py
@@ -48,9 +48,6 @@
 
     def test_jumbled_params(self):
         class MyModel(Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 y = x * x
                 self.param = Parameter(torch.tensor([2.0]))
diff --git a/test/package/package_a/fake_interface.py b/test/package/package_a/fake_interface.py
index 66802b3..02d343a 100644
--- a/test/package/package_a/fake_interface.py
+++ b/test/package/package_a/fake_interface.py
@@ -11,9 +11,6 @@
 class OrigModule(torch.nn.Module):
     """A module that implements ModuleInterface."""
 
-    def __init__(self):
-        super(OrigModule, self).__init__()
-
     def one(self, inp1: Tensor, inp2: Tensor) -> Tensor:
         return inp1 + inp2 + 1
 
@@ -27,9 +24,6 @@
 class NewModule(torch.nn.Module):
     """A *different* module that implements ModuleInterface."""
 
-    def __init__(self):
-        super(NewModule, self).__init__()
-
     def one(self, inp1: Tensor, inp2: Tensor) -> Tensor:
         return inp1 * inp2 + 1
 
diff --git a/test/package/package_a/fake_script_class.py b/test/package/package_a/fake_script_class.py
index f68b835..988a726 100644
--- a/test/package/package_a/fake_script_class.py
+++ b/test/package/package_a/fake_script_class.py
@@ -30,9 +30,6 @@
 
 
 class UsesIdListFeature(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-
     def forward(self, feature: Any):
         if isinstance(feature, IdListFeature):
             return feature.id_list
diff --git a/test/package/package_a/std_sys_module_hacks.py b/test/package/package_a/std_sys_module_hacks.py
index fa8df64..bb7435c 100644
--- a/test/package/package_a/std_sys_module_hacks.py
+++ b/test/package/package_a/std_sys_module_hacks.py
@@ -8,8 +8,5 @@
 
 
 class Module(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-
     def forward(self):
         return os.path.abspath("test")
diff --git a/test/package/package_a/test_nn_module.py b/test/package/package_a/test_nn_module.py
index 17ce630..fec5fd2 100644
--- a/test/package/package_a/test_nn_module.py
+++ b/test/package/package_a/test_nn_module.py
@@ -5,7 +5,7 @@
 
 class TestNnModule(torch.nn.Module):
     def __init__(self, nz=6, ngf=9, nc=3):
-        super(TestNnModule, self).__init__()
+        super().__init__()
         self.main = torch.nn.Sequential(
             # input is Z, going into a convolution
             torch.nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
diff --git a/test/package/test_package_script.py b/test/package/test_package_script.py
index 6dcaa26..04e3a5b 100644
--- a/test/package/test_package_script.py
+++ b/test/package/test_package_script.py
@@ -240,9 +240,6 @@
         """
 
         class Submod(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, input: str):
                 input = input + "_submod"
                 return input
@@ -260,9 +257,6 @@
         # redefinition is intentional, change single inner string
         # string attribute, should trigger new module type
         class Submod(torch.nn.Module):  # noqa: F811
-            def __init__(self):
-                super().__init__()
-
             def forward(self, input: str):
                 input = input + "_submod(changed)"
                 return input
diff --git a/test/profiler/test_profiler.py b/test/profiler/test_profiler.py
index d4adc7e..8e826cb 100644
--- a/test/profiler/test_profiler.py
+++ b/test/profiler/test_profiler.py
@@ -504,7 +504,7 @@
 
         class DummyModule(nn.Module):
             def __init__(self):
-                super(DummyModule, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 2, kernel_size=1, stride=2, padding=3, bias=False)
 
             def forward(self, x):
@@ -967,9 +967,6 @@
     @unittest.skipIf(not kineto_available(), "Kineto is required")
     def test_module_hierarchy(self):
         class A(nn.Module):
-            def __init__(self):
-                super(A, self).__init__()
-
             def my_new_method(self, x):
                 return x * 3
 
@@ -981,15 +978,12 @@
                 return self.forward_impl_(x, y)
 
         class B(nn.Module):
-            def __init__(self):
-                super(B, self).__init__()
-
             def forward(self, x):
                 return x + 2
 
         class C(nn.Module):
             def __init__(self):
-                super(C, self).__init__()
+                super().__init__()
                 self.A0 = A()
                 self.B0 = B()
 
@@ -1045,7 +1039,7 @@
 
         class TwoLayerNet(torch.nn.Module):
             def __init__(self, D_in, H, D_out):
-                super(TwoLayerNet, self).__init__()
+                super().__init__()
                 self.linear1 = torch.nn.Linear(D_in, H)
                 self.linear2 = torch.nn.Linear(H, D_out)
 
@@ -1056,7 +1050,7 @@
 
         class CustomSGD(torch.optim.SGD):
             def __init__(self, *args, **kwargs):
-                super(CustomSGD, self).__init__(*args, **kwargs)
+                super().__init__(*args, **kwargs)
 
         def train():
             for _, data in enumerate(dataloader):
diff --git a/test/quantization/bc/test_backward_compatibility.py b/test/quantization/bc/test_backward_compatibility.py
index 987b0ea..0dbe60d 100644
--- a/test/quantization/bc/test_backward_compatibility.py
+++ b/test/quantization/bc/test_backward_compatibility.py
@@ -360,7 +360,7 @@
     def test_default_qat_qconfig(self):
         class Model(nn.Module):
             def __init__(self):
-                super(Model, self).__init__()
+                super().__init__()
                 self.linear = nn.Linear(5, 5)
                 self.relu = nn.ReLU()
 
diff --git a/test/quantization/core/test_docs.py b/test/quantization/core/test_docs.py
index ecfb1ab..ab41c51 100644
--- a/test/quantization/core/test_docs.py
+++ b/test/quantization/core/test_docs.py
@@ -25,7 +25,7 @@
 
     def run(self, result=None):
         with override_quantized_engine("qnnpack") if IS_ARM64 else contextlib.nullcontext():
-            super(TestQuantizationDocs, self).run(result)
+            super().run(result)
 
     def _get_code(
         self, path_from_pytorch, unique_identifier, offset=2, short_snippet=False
diff --git a/test/quantization/core/test_quantized_op.py b/test/quantization/core/test_quantized_op.py
index 1d38d39..58a7ed4 100644
--- a/test/quantization/core/test_quantized_op.py
+++ b/test/quantization/core/test_quantized_op.py
@@ -2861,7 +2861,7 @@
     def test_custom_module_multi_head_attention(self):
         class MultiheadAttentionModel(torch.nn.Module):
             def __init__(self, *args, **kwargs):
-                super(MultiheadAttentionModel, self).__init__()
+                super().__init__()
                 self.layer = torch.nn.MultiheadAttention(*args, **kwargs)
 
             def forward(
diff --git a/test/quantization/core/test_quantized_tensor.py b/test/quantization/core/test_quantized_tensor.py
index 5a164f8..c0d9b02 100644
--- a/test/quantization/core/test_quantized_tensor.py
+++ b/test/quantization/core/test_quantized_tensor.py
@@ -22,7 +22,7 @@
 
 class Foo(torch.nn.Module):
     def __init__(self):
-        super(Foo, self).__init__()
+        super().__init__()
         self.qscheme = torch.per_tensor_symmetric
 
 def _calculate_dynamic_qparams(X, dtype, reduce_range=False):
@@ -1404,7 +1404,7 @@
                 __constants__ = ['fname']
 
                 def __init__(self):
-                    super(M, self).__init__()
+                    super().__init__()
                     self.fname = fname
 
                 @torch.jit.script_method
@@ -1432,7 +1432,7 @@
     def test_jit_serialization(self):
         class SimpleQTensor(torch.jit.ScriptModule):
             def __init__(self, per_channel):
-                super(SimpleQTensor, self).__init__()
+                super().__init__()
                 x = torch.rand(5, 5).float()
                 if not per_channel:
                     x_q = torch.quantize_per_tensor(x, 0.2, 10, torch.quint8)
diff --git a/test/quantization/core/test_workflow_module.py b/test/quantization/core/test_workflow_module.py
index 8f8ad4d..87a8c31 100644
--- a/test/quantization/core/test_workflow_module.py
+++ b/test/quantization/core/test_workflow_module.py
@@ -909,7 +909,7 @@
             # create conv-bn
             class Model(nn.Module):
                 def __init__(self):
-                    super(Model, self).__init__()
+                    super().__init__()
                     self.conv = nn.Conv2d(4, 1, 3, padding=1)
                     self.bn = nn.BatchNorm2d(1)
 
@@ -958,7 +958,7 @@
         class Model(nn.Module):
 
             def __init__(self):
-                super(Model, self).__init__()
+                super().__init__()
                 self.conv = nn.Conv2d(1, 1, 1)
                 self.bn = nn.BatchNorm2d(1)
                 self.relu = nn.ReLU()
@@ -1189,7 +1189,7 @@
     def test_embedding_bag_qat_config(self):
         class Model(nn.Module):
             def __init__(self):
-                super(Model, self).__init__()
+                super().__init__()
                 self.emb1 = torch.nn.EmbeddingBag(num_embeddings=10, embedding_dim=12,
                                                   include_last_offset=True, scale_grad_by_freq=False, mode='sum')
                 self.emb2 = torch.nn.EmbeddingBag(num_embeddings=10, embedding_dim=12,
@@ -1269,7 +1269,7 @@
     def test_default_fused_qat_config(self):
         class Model(nn.Module):
             def __init__(self):
-                super(Model, self).__init__()
+                super().__init__()
                 self.linear = nn.Linear(2, 2)
                 self.relu = nn.ReLU()
 
diff --git a/test/quantization/core/test_workflow_ops.py b/test/quantization/core/test_workflow_ops.py
index a0687d8..a352809 100644
--- a/test/quantization/core/test_workflow_ops.py
+++ b/test/quantization/core/test_workflow_ops.py
@@ -629,7 +629,7 @@
     def test_fake_quant_preserves_qparam_shapes_for_activations(self):
         class Model(nn.Module):
             def __init__(self):
-                super(Model, self).__init__()
+                super().__init__()
                 self.linear = nn.Linear(4, 4)
 
             def forward(self, x):
diff --git a/test/quantization/eager/test_bias_correction_eager.py b/test/quantization/eager/test_bias_correction_eager.py
index 0fc8743..d29d39b 100644
--- a/test/quantization/eager/test_bias_correction_eager.py
+++ b/test/quantization/eager/test_bias_correction_eager.py
@@ -68,7 +68,7 @@
     def test_linear_chain(self):
         class LinearChain(nn.Module):
             def __init__(self):
-                super(LinearChain, self).__init__()
+                super().__init__()
                 self.linear1 = nn.Linear(3, 4)
                 self.linear2 = nn.Linear(4, 5)
                 self.linear3 = nn.Linear(5, 6)
@@ -87,7 +87,7 @@
     def test_conv_chain(self):
         class ConvChain(nn.Module):
             def __init__(self):
-                super(ConvChain, self).__init__()
+                super().__init__()
                 self.conv2d1 = nn.Conv2d(3, 4, 5, 5)
                 self.conv2d2 = nn.Conv2d(4, 5, 5, 5)
                 self.conv2d3 = nn.Conv2d(5, 6, 5, 5)
diff --git a/test/quantization/eager/test_equalize_eager.py b/test/quantization/eager/test_equalize_eager.py
index 2fd8557..f08ff2b 100644
--- a/test/quantization/eager/test_equalize_eager.py
+++ b/test/quantization/eager/test_equalize_eager.py
@@ -73,7 +73,7 @@
         '''
         class ChainModule(nn.Module):
             def __init__(self):
-                super(ChainModule, self).__init__()
+                super().__init__()
                 self.linear1 = nn.Linear(3, 4)
                 self.linear2 = nn.Linear(4, 5)
                 self.linear3 = nn.Linear(5, 6)
diff --git a/test/quantization/eager/test_numeric_suite_eager.py b/test/quantization/eager/test_numeric_suite_eager.py
index 794630e..128f7cb 100644
--- a/test/quantization/eager/test_numeric_suite_eager.py
+++ b/test/quantization/eager/test_numeric_suite_eager.py
@@ -40,7 +40,7 @@
 
 class SubModule(torch.nn.Module):
     def __init__(self):
-        super(SubModule, self).__init__()
+        super().__init__()
         self.qconfig = default_qconfig
         self.mod1 = torch.nn.Conv2d(3, 3, 3, bias=False).to(dtype=torch.float)
         self.mod2 = nn.ReLU()
@@ -57,7 +57,7 @@
 
 class ModelWithSubModules(torch.nn.Module):
     def __init__(self):
-        super(ModelWithSubModules, self).__init__()
+        super().__init__()
         self.mod1 = SubModule()
         self.conv = torch.nn.Conv2d(3, 5, 3, bias=False).to(dtype=torch.float)
 
@@ -69,7 +69,7 @@
 
 class ModelWithFunctionals(torch.nn.Module):
     def __init__(self):
-        super(ModelWithFunctionals, self).__init__()
+        super().__init__()
         self.mycat = nnq.FloatFunctional()
         self.myadd = nnq.FloatFunctional()
         self.mymul = nnq.FloatFunctional()
diff --git a/test/quantization/eager/test_quantize_eager_ptq.py b/test/quantization/eager/test_quantize_eager_ptq.py
index 3b878b7..9b3e1dd 100644
--- a/test/quantization/eager/test_quantize_eager_ptq.py
+++ b/test/quantization/eager/test_quantize_eager_ptq.py
@@ -1362,7 +1362,7 @@
 
             class ScriptWrapperPackedLSTM(torch.nn.Module):
                 def __init__(self, cell):
-                    super(ScriptWrapperPackedLSTM, self).__init__()
+                    super().__init__()
                     self.cell = cell
 
                 def forward(self, x: PackedSequence) -> Tuple[PackedSequence, Tuple[torch.Tensor, torch.Tensor]]:
@@ -1370,7 +1370,7 @@
 
             class ScriptWrapperPackedGRU(torch.nn.Module):
                 def __init__(self, cell):
-                    super(ScriptWrapperPackedGRU, self).__init__()
+                    super().__init__()
                     self.cell = cell
 
                 def forward(self, x: PackedSequence) -> Tuple[PackedSequence, torch.Tensor]:
diff --git a/test/quantization/eager/test_quantize_eager_qat.py b/test/quantization/eager/test_quantize_eager_qat.py
index b83f2e1..d51fcbb 100644
--- a/test/quantization/eager/test_quantize_eager_qat.py
+++ b/test/quantization/eager/test_quantize_eager_qat.py
@@ -120,7 +120,7 @@
             init.uniform_(self.bias, -bound, bound)
 
     def reset_parameters(self):
-        super(_ReferenceConvBnNd, self).reset_parameters()
+        super().reset_parameters()
         # A hack to avoid resetting on undefined parameters
         if hasattr(self, 'gamma'):
             self.reset_bn_parameters()
@@ -191,7 +191,7 @@
 
     def extra_repr(self):
         # TODO(jerryzh): extend
-        return super(_ReferenceConvBnNd, self).extra_repr()
+        return super().extra_repr()
 
     def forward(self, input):
         return self.activation_post_process(self._forward(input))
diff --git a/test/quantization/fx/test_model_report_fx.py b/test/quantization/fx/test_model_report_fx.py
index 24bb7c4..85f9975 100644
--- a/test/quantization/fx/test_model_report_fx.py
+++ b/test/quantization/fx/test_model_report_fx.py
@@ -434,7 +434,7 @@
         # first we want a QAT model
         class QATConvLinearReluModel(torch.nn.Module):
             def __init__(self):
-                super(QATConvLinearReluModel, self).__init__()
+                super().__init__()
                 # QuantStub converts tensors from floating point to quantized
                 self.quant = torch.ao.quantization.QuantStub()
                 self.conv = torch.nn.Conv2d(1, 1, 1)
@@ -704,7 +704,7 @@
 
         class ModifiedThreeOps(torch.nn.Module):
             def __init__(self, batch_norm_dim):
-                super(ModifiedThreeOps, self).__init__()
+                super().__init__()
                 self.obs1 = ModelReportObserver()
                 self.linear = torch.nn.Linear(7, 3, 2)
                 self.obs2 = ModelReportObserver()
@@ -728,7 +728,7 @@
 
         class HighDimensionNet(torch.nn.Module):
             def __init__(self):
-                super(HighDimensionNet, self).__init__()
+                super().__init__()
                 self.obs1 = ModelReportObserver()
                 self.fc1 = torch.nn.Linear(3, 7)
                 self.block1 = ModifiedThreeOps(3)
@@ -787,7 +787,7 @@
     def test_nested_detection_case(self):
         class SingleLinear(torch.nn.Module):
             def __init__(self):
-                super(SingleLinear, self).__init__()
+                super().__init__()
                 self.linear = torch.nn.Linear(3, 3)
 
             def forward(self, x):
@@ -796,7 +796,7 @@
 
         class TwoBlockNet(torch.nn.Module):
             def __init__(self):
-                super(TwoBlockNet, self).__init__()
+                super().__init__()
                 self.block1 = SingleLinear()
                 self.block2 = SingleLinear()
 
diff --git a/test/quantization/fx/test_numeric_suite_fx.py b/test/quantization/fx/test_numeric_suite_fx.py
index 0a65907..f84e204 100644
--- a/test/quantization/fx/test_numeric_suite_fx.py
+++ b/test/quantization/fx/test_numeric_suite_fx.py
@@ -390,9 +390,6 @@
     @skipIfNoFBGEMM
     def test_simple_tensor_ops(self):
         class M(nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x, y):
                 z = x + y
                 return z
@@ -433,9 +430,6 @@
     def test_nodes_before_cat(self):
         # verify that nodes before cat get matched
         class M(nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x0):
                 x1 = torch.add(x0, 1.0)
                 y1 = torch.add(x0, 1.0)
@@ -468,9 +462,6 @@
     def test_dict_return_type(self):
         # verify that we can traverse up nodes which return dictionaries
         class M(nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x0):
                 x1 = torch.add(x0, 1.0)
                 y1 = torch.add(x0, 1.0)
diff --git a/test/quantization/fx/test_quantize_fx.py b/test/quantization/fx/test_quantize_fx.py
index eb2f630..66180e5 100644
--- a/test/quantization/fx/test_quantize_fx.py
+++ b/test/quantization/fx/test_quantize_fx.py
@@ -1448,7 +1448,7 @@
         class Model(nn.Module):
 
             def __init__(self):
-                super(Model, self).__init__()
+                super().__init__()
                 self.conv = nn.Conv2d(1, 1, 1)
                 self.bn = nn.BatchNorm2d(1)
                 self.relu = nn.ReLU()
@@ -1700,7 +1700,7 @@
     def test_qconfig_none(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv1 = nn.Conv2d(1, 1, 1)
                 self.conv2 = nn.Conv2d(1, 1, 1)
 
@@ -1798,9 +1798,6 @@
 
     def test_qconfig_function(self):
         class M(torch.nn.Module):
-            def __init__(self):
-                super(M, self).__init__()
-
             def forward(self, x, y):
                 return x + y
 
@@ -1823,7 +1820,7 @@
     def test_qconfig_module_name_regex(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv1 = nn.Conv2d(1, 1, 1)
                 self.conv2 = nn.Conv2d(1, 1, 1)
 
@@ -1852,7 +1849,7 @@
         for device in get_supported_device_types():
             class M(torch.nn.Module):
                 def __init__(self):
-                    super(M, self).__init__()
+                    super().__init__()
                     self.linear = nn.Linear(1, 1)
                     self.conv = nn.Conv2d(1, 1, 1)
                     self.module_conv1 = nn.Conv2d(1, 1, 1)
@@ -2026,7 +2023,7 @@
     def test_qconfig_dict_with_fused_modules(self):
         class LinearReLUModel(torch.nn.Module):
             def __init__(self, relu):
-                super(LinearReLUModel, self).__init__()
+                super().__init__()
                 self.linear = torch.nn.Linear(3, 3)
                 self.relu = relu
 
@@ -2037,7 +2034,7 @@
 
         class ConvReLUModel(torch.nn.Module):
             def __init__(self, relu):
-                super(ConvReLUModel, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv1d(3, 3, 3)
                 self.relu = relu
 
@@ -2048,7 +2045,7 @@
 
         class ConvBnReLUModel(torch.nn.Module):
             def __init__(self, relu):
-                super(ConvBnReLUModel, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv1d(3, 3, 3)
                 self.bn = torch.nn.BatchNorm1d(3)
                 self.relu = relu
@@ -3120,18 +3117,12 @@
     @skipIfNoFBGEMM
     def test_non_traceable_module(self):
         class NonTraceable(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 for k in x.keys():
                     print(x[k])
                 return x
 
         class NonTraceable2(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 # data dependent control flow is not traceable
                 for i in x:
@@ -3509,9 +3500,6 @@
         pattern.
         """
         class M1(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 dims = x.ndim
                 dims_sub = dims - 1
@@ -3520,9 +3508,6 @@
                 return x
 
         class M2(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 dims = x.ndim
                 dims_sub = dims - 2
@@ -3754,9 +3739,6 @@
 
     def test_propagate_dtypes_for_known_nodes_dict_tuple_args(self):
         class reshape_module(nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x, y, z):
                 return x.reshape(y["shape"])
 
@@ -4000,9 +3982,6 @@
         """ Test quantizing a not used value"""
 
         class M(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 x = x + x
                 x.sigmoid_()
@@ -5171,9 +5150,6 @@
                 return x
 
         class M2(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 x = x.reshape()
                 return x
@@ -5311,7 +5287,7 @@
     def test_qconfig_dict_setup(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.Conv1d = torch.nn.Conv1d(1, 1, 1)
                 self.Conv2d = torch.nn.Conv2d(1, 1, 1)
                 self.Conv3d = torch.nn.Conv3d(1, 1, 1)
@@ -5417,7 +5393,7 @@
         """
         class MyModel(torch.nn.Module):
             def __init__(self):
-                super(MyModel, self).__init__()
+                super().__init__()
                 self.linear = torch.nn.Linear(30, 4).float()
 
             def forward(self, x):
@@ -5480,7 +5456,7 @@
         """
         class MyModel(torch.nn.Module):
             def __init__(self):
-                super(MyModel, self).__init__()
+                super().__init__()
                 self.linear = torch.nn.Linear(30, 4).float()
 
             def forward(self, x):
@@ -5540,7 +5516,7 @@
         """
         class MyModel(torch.nn.Module):
             def __init__(self):
-                super(MyModel, self).__init__()
+                super().__init__()
                 self.linear = torch.nn.Linear(30, 4).float()
 
             def forward(self, x):
@@ -5571,7 +5547,7 @@
 
         class MyModel(torch.nn.Module):
             def __init__(self):
-                super(MyModel, self).__init__()
+                super().__init__()
                 self.linear = torch.nn.Linear(30, 4).float()
 
             def forward(self, x):
@@ -6041,7 +6017,7 @@
         with override_quantized_engine('fbgemm'):
             class LinearModel(torch.nn.Module):
                 def __init__(self):
-                    super(LinearModel, self).__init__()
+                    super().__init__()
                     self.linear = torch.nn.Linear(30, 4).float()
 
                 def forward(self, x):
@@ -6049,7 +6025,7 @@
 
             class LinearReLUModel(torch.nn.Module):
                 def __init__(self, f_relu=False):
-                    super(LinearReLUModel, self).__init__()
+                    super().__init__()
                     self.linear = torch.nn.Linear(30, 4).float()
                     if f_relu:
                         self.relu = F.relu
@@ -6063,7 +6039,7 @@
 
             class LinearBnModel(torch.nn.Module):
                 def __init__(self):
-                    super(LinearBnModel, self).__init__()
+                    super().__init__()
                     self.linear = torch.nn.Linear(4, 4).float()
                     self.bn = torch.nn.BatchNorm1d(4)
 
@@ -6103,7 +6079,7 @@
         with override_quantized_engine('fbgemm'):
             class FuncLinear(torch.nn.Module):
                 def __init__(self, use_bias, has_relu, f_relu):
-                    super(FuncLinear, self).__init__()
+                    super().__init__()
                     self.w = torch.randn(4, 30)
                     self.b = torch.randn(4)
                     self.use_bias = use_bias
@@ -6198,7 +6174,7 @@
         with override_quantized_engine('fbgemm'):
             class FuncLinear(torch.nn.Module):
                 def __init__(self, use_bias, has_relu, f_relu):
-                    super(FuncLinear, self).__init__()
+                    super().__init__()
                     self.w = torch.randn(4, 30)
                     self.b = torch.randn(4)
                     self.use_bias = use_bias
@@ -6253,7 +6229,7 @@
     def test_linear_static_fp16(self):
         class FuncLinear(torch.nn.Module):
             def __init__(self, use_bias, has_relu, f_relu):
-                super(FuncLinear, self).__init__()
+                super().__init__()
                 self.w = torch.randn(4, 30)
                 self.b = torch.randn(4)
                 self.use_bias = use_bias
@@ -6319,7 +6295,7 @@
 
         class ConvWrapper(torch.nn.Module):
             def __init__(self, dim):
-                super(ConvWrapper, self).__init__()
+                super().__init__()
                 self.conv = conv_module[dim](3, 3, 3).float()
 
             def forward(self, x):
@@ -6452,7 +6428,7 @@
 
         class ConvNdRelu(torch.nn.Module):
             def __init__(self, dim, inplace):
-                super(ConvNdRelu, self).__init__()
+                super().__init__()
                 self.conv = conv_module[dim](3, 3, 3).float()
                 self.relu = torch.nn.ReLU(inplace)
 
@@ -6461,7 +6437,7 @@
 
         class ConvNdFunctionalRelu(torch.nn.Module):
             def __init__(self, dim):
-                super(ConvNdFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv = conv_module[dim](3, 3, 3).float()
 
             def forward(self, x):
@@ -6469,7 +6445,7 @@
 
         class ConvNdInplaceFunctionalRelu(torch.nn.Module):
             def __init__(self, dim):
-                super(ConvNdInplaceFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv = conv_module[dim](3, 3, 3).float()
 
             def forward(self, x):
@@ -6641,9 +6617,6 @@
     @unittest.skip("This is no longer needed right now, can enable later with new api")
     def test_bmm(self):
         class BMMMethod(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x, y):
                 return x.bmm(y)
 
@@ -6876,7 +6849,7 @@
 
         class M(torch.nn.Module):
             def __init__(self, dim):
-                super(M, self).__init__()
+                super().__init__()
                 self.bn = bn_module[dim](3).to(torch.float)
 
             def forward(self, x):
@@ -6905,7 +6878,7 @@
 
         class BNRelu(torch.nn.Module):
             def __init__(self, dim, inplace):
-                super(BNRelu, self).__init__()
+                super().__init__()
                 self.bn = bn_module[dim](3).to(torch.float)
                 self.relu = torch.nn.ReLU(inplace=inplace)
 
@@ -6914,7 +6887,7 @@
 
         class BNFuncRelu(torch.nn.Module):
             def __init__(self, dim):
-                super(BNFuncRelu, self).__init__()
+                super().__init__()
                 self.bn = bn_module[dim](3).to(torch.float)
 
             def forward(self, x):
@@ -6922,7 +6895,7 @@
 
         class BNFuncInplaceRelu(torch.nn.Module):
             def __init__(self, dim):
-                super(BNFuncInplaceRelu, self).__init__()
+                super().__init__()
                 self.bn = bn_module[dim](3).to(torch.float)
 
             def forward(self, x):
@@ -6953,7 +6926,7 @@
         '''
         class M(torch.nn.Module):
             def __init__(self, is_module, inplace):
-                super(M, self).__init__()
+                super().__init__()
                 self.is_module = is_module
                 self.inplace = inplace
                 if self.is_module:
@@ -6998,7 +6971,7 @@
     def test_prelu(self):
         class M(torch.nn.Module):
             def __init__(self, num_param: int):
-                super(M, self).__init__()
+                super().__init__()
                 self.op = torch.nn.PReLU(num_parameters=num_param)
 
             def forward(self, input):
@@ -7025,7 +6998,7 @@
         '''
         class M(torch.nn.Module):
             def __init__(self, is_module):
-                super(M, self).__init__()
+                super().__init__()
                 self.is_module = is_module
                 if self.is_module:
                     self.op = float_module(*op_args)
@@ -7060,7 +7033,7 @@
         '''
         class M(torch.nn.Module):
             def __init__(self, is_module):
-                super(M, self).__init__()
+                super().__init__()
                 self.is_module = is_module
                 if self.is_module:
                     self.op = float_module(*op_args)
@@ -7362,7 +7335,7 @@
     def test_clamp(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(2, 2, 2).float()
                 self.relu6 = torch.nn.ReLU6()
                 self.relu6_ = torch.nn.ReLU6(True)
@@ -7491,7 +7464,7 @@
         """
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.maxpool1d = torch.nn.MaxPool1d(kernel_size=3)
                 self.maxpool2d = torch.nn.MaxPool2d(kernel_size=3)
                 self.maxpool3d = torch.nn.MaxPool3d(kernel_size=3)
@@ -8346,7 +8319,7 @@
 
         class Net(nn.Module):
             def __init__(self):
-                super(Net, self).__init__()
+                super().__init__()
                 self.relu1 = nn.ReLU()
                 self.conv1 = nn.Conv2d(1, 6, 5)
                 self.linear1 = nn.Linear(120, 1)
@@ -8372,7 +8345,7 @@
 
         class Net(nn.Module):
             def __init__(self):
-                super(Net, self).__init__()
+                super().__init__()
                 self.relu1 = nn.ReLU()
                 self.conv1 = nn.Conv2d(1, 6, 5)
                 self.linear1 = nn.Linear(120, 1)
@@ -8399,7 +8372,7 @@
     def test_prepare_serialize_switch_device_convert(self):
         class Net(nn.Module):
             def __init__(self):
-                super(Net, self).__init__()
+                super().__init__()
                 self.conv1 = nn.Conv2d(1, 6, 5)
                 self.linear1 = nn.Linear(120, 1)
 
@@ -8699,7 +8672,7 @@
         for device in get_supported_device_types():
             class EmbeddingBagLinear(torch.nn.Module):
                 def __init__(self):
-                    super(EmbeddingBagLinear, self).__init__()
+                    super().__init__()
                     self.emb = torch.nn.EmbeddingBag(num_embeddings=10, embedding_dim=12, mode='sum')
                     self.linear = torch.nn.Linear(12, 1).to(dtype=torch.float)
 
@@ -8740,7 +8713,7 @@
         for device in get_supported_device_types():
             class EmbeddingLinear(torch.nn.Module):
                 def __init__(self):
-                    super(EmbeddingLinear, self).__init__()
+                    super().__init__()
                     self.emb = torch.nn.Embedding(num_embeddings=10, embedding_dim=12)
                     self.linear = torch.nn.Linear(12, 1).to(dtype=torch.float)
 
diff --git a/test/quantization/fx/test_quantize_pt2e.py b/test/quantization/fx/test_quantize_pt2e.py
index 150df70..1fe8714 100644
--- a/test/quantization/fx/test_quantize_pt2e.py
+++ b/test/quantization/fx/test_quantize_pt2e.py
@@ -44,7 +44,7 @@
     def test_qconfig_none(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv1 = nn.Conv2d(1, 1, 1)
                 self.conv2 = nn.Conv2d(1, 1, 1)
 
diff --git a/test/quantization/jit/test_deprecated_jit_quant.py b/test/quantization/jit/test_deprecated_jit_quant.py
index 97e361d..806cff2 100644
--- a/test/quantization/jit/test_deprecated_jit_quant.py
+++ b/test/quantization/jit/test_deprecated_jit_quant.py
@@ -73,7 +73,7 @@
             if isinstance(cell, torch.jit.quantized.QuantizedLSTMCell):
                 class ScriptWrapper(torch.jit.ScriptModule):
                     def __init__(self, cell):
-                        super(ScriptWrapper, self).__init__()
+                        super().__init__()
                         self.cell = cell
 
                     @torch.jit.script_method
@@ -85,7 +85,7 @@
 
                 class ScriptWrapper(torch.jit.ScriptModule):
                     def __init__(self, cell):
-                        super(ScriptWrapper, self).__init__()
+                        super().__init__()
                         self.cell = cell
 
                     @torch.jit.script_method
@@ -197,7 +197,7 @@
             if isinstance(cell, torch.jit.quantized.QuantizedGRU):
                 class ScriptWrapper(torch.jit.ScriptModule):
                     def __init__(self, cell):
-                        super(ScriptWrapper, self).__init__()
+                        super().__init__()
                         self.cell = cell
 
                     @torch.jit.script_method
@@ -209,7 +209,7 @@
                 for cell in [cell_int8, cell_fp16]:
                     class ScriptWrapper(torch.jit.ScriptModule):
                         def __init__(self, cell):
-                            super(ScriptWrapper, self).__init__()
+                            super().__init__()
                             self.cell = cell
 
                         @torch.jit.script_method
@@ -227,7 +227,7 @@
 
             class FooBar(torch.nn.Module):
                 def __init__(self):
-                    super(FooBar, self).__init__()
+                    super().__init__()
                     self.linear1 = torch.nn.Linear(K1, N1).float()
 
                 def forward(self, x):
@@ -261,7 +261,7 @@
     def test_erase_class_tensor_shapes(self):
         class Linear(torch.nn.Module):
             def __init__(self, in_features, out_features):
-                super(Linear, self).__init__()
+                super().__init__()
                 qweight = torch._empty_affine_quantized(
                     [out_features, in_features], scale=1, zero_point=0,
                     dtype=torch.qint8)
diff --git a/test/quantization/jit/test_fusion_passes.py b/test/quantization/jit/test_fusion_passes.py
index 1f79693..d35b341 100644
--- a/test/quantization/jit/test_fusion_passes.py
+++ b/test/quantization/jit/test_fusion_passes.py
@@ -9,9 +9,6 @@
 class TestFusionPasses(QuantizationTestCase):
     def test_quantized_add_relu_fusion(self):
         class MAdd(torch.nn.Module):
-            def __init__(self):
-                super(MAdd, self).__init__()
-
             def forward(self, x, y):
                 a = torch.ops.quantized.add(x, y, 1., 0)
                 relu_out = torch.relu(a)
@@ -44,9 +41,6 @@
         self.assertEqual(ref_output, output)
 
         class MAddOut(torch.nn.Module):
-            def __init__(self):
-                super(MAddOut, self).__init__()
-
             def forward(self, x, y, z):
                 a = torch.ops.quantized.add_out(x, y, z)
                 relu_out = torch.relu(a)
@@ -74,9 +68,6 @@
         self.assertEqual(ref_output, output)
 
         class MAddScalar(torch.nn.Module):
-            def __init__(self):
-                super(MAddScalar, self).__init__()
-
             def forward(self, x, y : float):
                 a = torch.ops.quantized.add_scalar(x, y)
                 relu_out = torch.relu(a)
@@ -96,9 +87,6 @@
         self.assertEqual(ref_output, output)
 
         class MAddScalarOut(torch.nn.Module):
-            def __init__(self):
-                super(MAddScalarOut, self).__init__()
-
             def forward(self, x, y : float, z):
                 a = torch.ops.quantized.add_scalar_out(x, y, z)
                 relu_out = torch.relu(a)
diff --git a/test/quantization/jit/test_ondevice_quantization.py b/test/quantization/jit/test_ondevice_quantization.py
index 90fb3fb..b3bd4b9 100644
--- a/test/quantization/jit/test_ondevice_quantization.py
+++ b/test/quantization/jit/test_ondevice_quantization.py
@@ -33,7 +33,7 @@
 
 class myMod(torch.nn.Module):
     def __init__(self, weight):
-        super(myMod, self).__init__()
+        super().__init__()
         self.fc1 = torch.nn.Linear(5, 5).float()
         self.fc1.weight = weight
         self.fc2 = torch.nn.Linear(5, 5).float()
@@ -44,7 +44,7 @@
 
 class MyConvLinearModule(torch.nn.Module):
     def __init__(self):
-        super(MyConvLinearModule, self).__init__()
+        super().__init__()
         self.conv = torch.nn.Conv2d(3, 5, 3)
         weight = torch.nn.Parameter(torch.ones(5, 5))
         self.weight1 = torch.nn.Parameter(torch.ones(5, 5))
diff --git a/test/quantization/jit/test_quantize_jit.py b/test/quantization/jit/test_quantize_jit.py
index 01fb7e9..2787626 100644
--- a/test/quantization/jit/test_quantize_jit.py
+++ b/test/quantization/jit/test_quantize_jit.py
@@ -89,7 +89,7 @@
     def test_skip_dequant_constant_prop(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 5, 3).float()
 
             def forward(self, x):
@@ -133,7 +133,7 @@
         # Test trivial case
         class TestModule(torch.nn.Module):
             def __init__(self, dim):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.conv = conv_module[dim](1, 20, 5, 1)
                 self.bn = bn_module[dim](num_features=20)
                 self.bn.eps = 0.0023
@@ -176,7 +176,7 @@
         # Test trivial case
         class TestModule(torch.nn.Module):
             def __init__(self, dim):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.conv = conv_module[dim](1, 20, 5, 1, bias=False)
                 self.bn = bn_module[dim](num_features=20)
                 # to make sure new bias is not zero
@@ -220,7 +220,7 @@
         # Test that we find Conv-BN patterns in submodules
         class SubModule(torch.nn.Module):
             def __init__(self, dim):
-                super(SubModule, self).__init__()
+                super().__init__()
                 self.conv = conv_module[dim](1, 20, 5, 1)
                 self.bn = bn_module[dim](num_features=20)
 
@@ -231,7 +231,7 @@
 
         class TestModule(torch.nn.Module):
             def __init__(self, dim):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.sub = SubModule(dim)
 
             def forward(self, x):
@@ -262,7 +262,7 @@
 
         class TestModule(torch.nn.Module):
             def __init__(self, dim, bias=False):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.conv1 = conv_module[dim](5, 5, 3, bias=bias)
                 self.bn1 = bn_module[dim](num_features=5)
                 self.bn1.running_mean.fill_(-0.2)
@@ -296,22 +296,16 @@
         """Test that we don't fuse the cases when module type does not match"""
 
         class CustomConv(torch.nn.Module):
-            def __init__(self):
-                super(CustomConv, self).__init__()
-
             def forward(self, x):
                 return x
 
         class CustomBn(torch.nn.Module):
-            def __init__(self):
-                super(CustomBn, self).__init__()
-
             def forward(self, x):
                 return x
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = CustomConv()
                 self.bn = CustomBn()
 
@@ -333,7 +327,7 @@
 
         class SubModule(torch.nn.Module):
             def __init__(self, dim, num_blocks, enable_bias, enable_affine):
-                super(SubModule, self).__init__()
+                super().__init__()
                 layers = []
                 for i in range(num_blocks):
                     layers.append(conv_module[dim](20, 20, 5, 1, bias=enable_bias))
@@ -353,7 +347,7 @@
 
         class TestModule(torch.nn.Module):
             def __init__(self, dim, num_blocks, enable_bias, enable_affine):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.sub = SubModule(dim, num_blocks, enable_bias, enable_affine)
 
             def forward(self, x):
@@ -386,7 +380,7 @@
     def test_fuse_linear(self):
         class FunctionalLinear(torch.nn.Module):
             def __init__(self, weight, bias):
-                super(FunctionalLinear, self).__init__()
+                super().__init__()
                 self.weight = weight
                 self.bias = bias
 
@@ -430,7 +424,7 @@
         # check matmuls are not fused
         class Matmul(torch.nn.Module):
             def __init__(self, weight):
-                super(Matmul, self).__init__()
+                super().__init__()
                 self.weight = weight
 
             def forward(self, x):
@@ -449,7 +443,7 @@
     def test_insert_observers(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 5, 3)
 
             def forward(self, x):
@@ -471,7 +465,7 @@
 
         class Sub(torch.nn.Module):
             def __init__(self):
-                super(Sub, self).__init__()
+                super().__init__()
                 self.fc = torch.nn.Linear(5, 5)
 
             def addOne(self, inp):
@@ -482,7 +476,7 @@
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 5, 3)
                 self.sub = Sub()
 
@@ -538,7 +532,7 @@
     def test_insert_observers_child_qconfig(self):
         class Sub(torch.nn.Module):
             def __init__(self):
-                super(Sub, self).__init__()
+                super().__init__()
                 self.fc = torch.nn.Linear(5, 5)
 
             def forward(self, x):
@@ -546,7 +540,7 @@
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 5, 3)
                 self.sub = Sub()
 
@@ -573,7 +567,7 @@
     def test_insert_observers_skip_values(self):
         class ConvFunctionalReLU(torch.nn.Module):
             def __init__(self):
-                super(ConvFunctionalReLU, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 5, 3)
 
             def forward(self, x):
@@ -581,7 +575,7 @@
 
         class ConvReLUModule(torch.nn.Module):
             def __init__(self):
-                super(ConvReLUModule, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 5, 3)
                 self.relu = torch.nn.ReLU()
 
@@ -590,7 +584,7 @@
 
         class AddReLUModule(torch.nn.Module):
             def __init__(self):
-                super(AddReLUModule, self).__init__()
+                super().__init__()
                 self.relu = torch.nn.ReLU()
                 self.conv = torch.nn.Conv2d(3, 3, 3).float()
 
@@ -601,7 +595,7 @@
 
         class AddFunctionalReLU(torch.nn.Module):
             def __init__(self):
-                super(AddFunctionalReLU, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 3, 3).float()
 
             def forward(self, x):
@@ -651,7 +645,7 @@
     def test_insert_observers_weight_dtype(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 5, 3)
 
             def forward(self, x):
@@ -679,9 +673,6 @@
 
     def test_insert_observers_for_reused_weight(self):
         class M(torch.nn.Module):
-            def __init__(self):
-                super(M, self).__init__()
-
             def forward(self, x, y, weight):
                 x = F.conv2d(x, weight)
                 y = F.conv2d(y, weight)
@@ -695,7 +686,7 @@
     def test_insert_observers_shared_class_type(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(3, 5, 3).float()
                 self.conv2 = torch.nn.Conv2d(3, 5, 3).float()
 
@@ -722,7 +713,7 @@
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 3, 3).float()
 
             def forward(self, x):
@@ -754,7 +745,7 @@
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(3, 3, 3).float()
                 self.conv2 = torch.nn.Conv2d(3, 3, 3).float()
 
@@ -792,7 +783,7 @@
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(3, 3, 3).float()
                 self.conv2 = torch.nn.Conv2d(3, 3, 3).float()
                 self.avgpool = torch.nn.AdaptiveAvgPool2d((1, 1))
@@ -839,7 +830,7 @@
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(3, 3, 1).float()
                 self.conv2 = torch.nn.Conv2d(3, 3, 1).float()
 
@@ -874,7 +865,7 @@
     def test_insert_observers_for_if(self):
         class QuantProp(torch.nn.Module):
             def __init__(self, use_skip):
-                super(QuantProp, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 3, 1).float()
                 self.use_skip = use_skip
 
@@ -888,7 +879,7 @@
 
         class Res(torch.nn.Module):
             def __init__(self, use_skip):
-                super(Res, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 3, 1).float()
                 self.use_skip = use_skip
 
@@ -900,7 +891,7 @@
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.quant_prop = QuantProp(True)
                 self.res = Res(False)
 
@@ -948,7 +939,7 @@
     def test_insert_observers_for_nested_if(self):
         class Res(torch.nn.Module):
             def __init__(self, use_skip):
-                super(Res, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 3, 1).float()
                 self.cond = use_skip
                 self.use_skip = use_skip
@@ -964,7 +955,7 @@
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.res1 = Res(True)
                 self.res2 = Res(False)
 
@@ -990,7 +981,7 @@
 
         class M(torch.nn.Module):
             def __init__(self, cond):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 3, 3).float()
                 self.cond = cond
 
@@ -1003,7 +994,7 @@
 
         class M2(torch.nn.Module):
             def __init__(self, cond):
-                super(M2, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(3, 3, 3).float()
                 self.conv2 = torch.nn.Conv2d(3, 3, 3).float()
                 self.cond = cond
@@ -1041,7 +1032,7 @@
     def test_insert_quant_dequant(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 5, 3).float()
 
             def forward(self, x):
@@ -1075,7 +1066,7 @@
     def test_insert_quant_dequant_shared_class_type(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(3, 3, 3).float()
                 self.conv2 = torch.nn.Conv2d(3, 3, 3).float()
 
@@ -1141,7 +1132,7 @@
     def test_dedup_module_uses(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.relu = torch.nn.ReLU()
 
             def forward(self, x):
@@ -1166,7 +1157,7 @@
     def test_replicate_dequantize(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 3, 1).float()
 
             def forward(self, x):
@@ -1188,7 +1179,7 @@
     def test_replicate_dequantize_in_block(self):
         class M(torch.nn.Module):
             def __init__(self, cond):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 3, 1).float()
 
                 self.cond = cond
@@ -1224,9 +1215,6 @@
             return torch.nn.functional.linear(input, weight, bias)
 
         class M(torch.nn.Module):
-            def __init__(self):
-                super(M, self).__init__()
-
             def forward(self, x, weight, bias):
                 x = torch.dequantize(x)
                 weight = torch.dequantize(weight)
@@ -1259,7 +1247,7 @@
 
         class Res(torch.nn.Module):
             def __init__(self):
-                super(Res, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 3, 1).float()
                 self.conv2 = torch.nn.Conv2d(3, 3, 1).float()
                 self.use_skip = True
@@ -1274,7 +1262,7 @@
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.res1 = Res()
                 self.res2 = Res()
 
@@ -1293,7 +1281,7 @@
     def test_finalize_for_linear(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.fc = torch.nn.Linear(5, 5).float()
 
             def forward(self, x):
@@ -1325,7 +1313,7 @@
     def test_finalize_debug(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 3, 3).float()
                 self.avgpool = torch.nn.AvgPool2d(3)
 
@@ -1353,7 +1341,7 @@
     def test_module_list(self):
         class SimpleLinearLayer(torch.nn.Module):
             def __init__(self):
-                super(SimpleLinearLayer, self).__init__()
+                super().__init__()
                 self.fc = torch.nn.Linear(5, 5).float()
 
             def forward(self, x):
@@ -1361,7 +1349,7 @@
 
         class ComplexModel(torch.nn.Module):
             def __init__(self):
-                super(ComplexModel, self).__init__()
+                super().__init__()
                 self.layers = torch.nn.ModuleList(
                     [SimpleLinearLayer() for i in range(2)]
                 )
@@ -1387,7 +1375,7 @@
     def test_conv_trace(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv1d = torch.nn.Conv1d(3, 3, 3).float()
                 self.conv2d = torch.nn.Conv2d(3, 3, 3).float()
                 self.conv3d = torch.nn.Conv3d(3, 3, 3).float()
@@ -1419,7 +1407,7 @@
     def test_convtranspose_trace(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.convtranspose1d = torch.nn.ConvTranspose1d(3, 3, 3).float()
                 self.convtranspose2d = torch.nn.ConvTranspose2d(3, 3, 3).float()
                 self.convtranspose3d = torch.nn.ConvTranspose3d(3, 3, 3).float()
@@ -1456,7 +1444,7 @@
     def test_replicate_dequant_same_value(self):
         class Mul(torch.nn.Module):
             def __init__(self):
-                super(Mul, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 3, 3).float()
 
             def forward(self, x):
@@ -1472,7 +1460,7 @@
     def test_interface_with_fork(self):
         class SubModule(torch.nn.Module):
             def __init__(self):
-                super(SubModule, self).__init__()
+                super().__init__()
                 self.embedding1 = torch.nn.EmbeddingBag(
                     num_embeddings=10,
                     embedding_dim=12,
@@ -1486,7 +1474,7 @@
 
         class OrigMod(torch.nn.Module):
             def __init__(self):
-                super(OrigMod, self).__init__()
+                super().__init__()
                 self.embedding1 = torch.nn.EmbeddingBag(
                     num_embeddings=10,
                     embedding_dim=12,
@@ -1507,7 +1495,7 @@
             proxy_mod: ModInterface
 
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.proxy_mod = OrigMod()
                 self.sub = SubModule()
 
@@ -1518,7 +1506,7 @@
 
         class MainModule(torch.nn.Module):
             def __init__(self):
-                super(MainModule, self).__init__()
+                super().__init__()
                 self.test = TestModule()
 
             def forward(self, x, y):
@@ -1586,7 +1574,7 @@
 
         class MainModule(nn.Module):
             def __init__(self):
-                super(MainModule, self).__init__()
+                super().__init__()
                 self.fork_ops = ForkModule()
 
             def init_values(self, x):
@@ -1598,9 +1586,6 @@
                 return val
 
         class TestModule(torch.nn.Module):
-            def __init__(self):
-                super(TestModule, self).__init__()
-
             def forward(self, x):
                 w = torch.ones(5, 5)
                 b = torch.zeros(5)
@@ -1608,7 +1593,7 @@
 
         class ForkModule(nn.Module):
             def __init__(self):
-                super(ForkModule, self).__init__()
+                super().__init__()
                 self.test = TestModule()
 
             def forward(self, x):
@@ -1634,7 +1619,7 @@
     def test_linear(self):
         class ModuleLinear(torch.nn.Module):
             def __init__(self, has_relu=False, f_relu=False):
-                super(ModuleLinear, self).__init__()
+                super().__init__()
                 self.linear = torch.nn.Linear(30, 4).float()
                 if has_relu:
                     if f_relu:
@@ -1649,7 +1634,7 @@
 
         class FuncLinear(torch.nn.Module):
             def __init__(self, has_relu=False, f_relu=False):
-                super(FuncLinear, self).__init__()
+                super().__init__()
                 self.w = torch.randn(4, 30)
                 self.b = torch.randn(4)
                 if has_relu:
@@ -1696,7 +1681,7 @@
 
         class Conv(torch.nn.Module):
             def __init__(self, dim):
-                super(Conv, self).__init__()
+                super().__init__()
                 self.conv = conv_module[dim](3, 3, 3).float()
 
             def forward(self, x):
@@ -1727,7 +1712,7 @@
 
         class ConvNdRelu(torch.nn.Module):
             def __init__(self, dim, inplace):
-                super(ConvNdRelu, self).__init__()
+                super().__init__()
                 self.conv = conv_module[dim](3, 3, 3).float()
                 self.relu = torch.nn.ReLU(inplace)
 
@@ -1736,7 +1721,7 @@
 
         class ConvNdFunctionalRelu(torch.nn.Module):
             def __init__(self, dim):
-                super(ConvNdFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv = conv_module[dim](3, 3, 3).float()
 
             def forward(self, x):
@@ -1744,7 +1729,7 @@
 
         class ConvNdInplaceFunctionalRelu(torch.nn.Module):
             def __init__(self, dim):
-                super(ConvNdInplaceFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv = conv_module[dim](3, 3, 3).float()
 
             def forward(self, x):
@@ -1782,7 +1767,7 @@
 
         class QuantizedAdd(torch.nn.Module):
             def __init__(self):
-                super(QuantizedAdd, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
 
@@ -1812,7 +1797,7 @@
 
         class AddRelu(torch.nn.Module):
             def __init__(self, inplace):
-                super(AddRelu, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
                 self.relu = torch.nn.ReLU(inplace)
@@ -1827,7 +1812,7 @@
 
         class InplaceAddRelu(torch.nn.Module):
             def __init__(self, inplace):
-                super(InplaceAddRelu, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
                 self.relu = torch.nn.ReLU(inplace)
@@ -1842,7 +1827,7 @@
 
         class AddFunctionalRelu(torch.nn.Module):
             def __init__(self):
-                super(AddFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
 
@@ -1856,7 +1841,7 @@
 
         class InplaceAddFunctionalRelu(torch.nn.Module):
             def __init__(self):
-                super(InplaceAddFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
 
@@ -1870,7 +1855,7 @@
 
         class AddInplaceFunctionalRelu(torch.nn.Module):
             def __init__(self):
-                super(AddInplaceFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
 
@@ -1884,7 +1869,7 @@
 
         class InplaceAddInplaceFunctionalRelu(torch.nn.Module):
             def __init__(self):
-                super(InplaceAddInplaceFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
 
@@ -1931,7 +1916,7 @@
     def test_quantized_add(self):
         class QuantizedAdd(torch.nn.Module):
             def __init__(self):
-                super(QuantizedAdd, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
 
@@ -1942,7 +1927,7 @@
 
         class QuantizedInplaceAdd(torch.nn.Module):
             def __init__(self):
-                super(QuantizedInplaceAdd, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
 
@@ -1953,16 +1938,10 @@
                 return x
 
         class NonQuantizedAdd(torch.nn.Module):
-            def __init__(self):
-                super(NonQuantizedAdd, self).__init__()
-
             def forward(self, x, y):
                 return x + y
 
         class NonQuantizedInplaceAdd(torch.nn.Module):
-            def __init__(self):
-                super(NonQuantizedInplaceAdd, self).__init__()
-
             def forward(self, x, y):
                 x += y
                 return x
@@ -1994,7 +1973,7 @@
     def test_quantized_add_scalar(self):
         class QuantizedAddScalar(torch.nn.Module):
             def __init__(self):
-                super(QuantizedAddScalar, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(2, 2, 2).float()
 
             def forward(self, x):
@@ -2003,7 +1982,7 @@
 
         class QuantizedInplaceAddScalar(torch.nn.Module):
             def __init__(self):
-                super(QuantizedInplaceAddScalar, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(2, 2, 2).float()
 
             def forward(self, x):
@@ -2012,16 +1991,10 @@
                 return x
 
         class NonQuantizedAddScalar(torch.nn.Module):
-            def __init__(self):
-                super(NonQuantizedAddScalar, self).__init__()
-
             def forward(self, x):
                 return x + 3
 
         class NonQuantizedInplaceAddScalar(torch.nn.Module):
-            def __init__(self):
-                super(NonQuantizedInplaceAddScalar, self).__init__()
-
             def forward(self, x):
                 x += 3
                 return x
@@ -2050,7 +2023,7 @@
     def test_quantized_add_relu(self):
         class AddRelu(torch.nn.Module):
             def __init__(self, inplace):
-                super(AddRelu, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
                 self.relu = torch.nn.ReLU(inplace)
@@ -2063,7 +2036,7 @@
 
         class InplaceAddRelu(torch.nn.Module):
             def __init__(self, inplace):
-                super(InplaceAddRelu, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
                 self.relu = torch.nn.ReLU(inplace)
@@ -2076,7 +2049,7 @@
 
         class AddFunctionalRelu(torch.nn.Module):
             def __init__(self):
-                super(AddFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
 
@@ -2088,7 +2061,7 @@
 
         class InplaceAddFunctionalRelu(torch.nn.Module):
             def __init__(self):
-                super(InplaceAddFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
 
@@ -2100,7 +2073,7 @@
 
         class AddInplaceFunctionalRelu(torch.nn.Module):
             def __init__(self):
-                super(AddInplaceFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
 
@@ -2112,7 +2085,7 @@
 
         class InplaceAddInplaceFunctionalRelu(torch.nn.Module):
             def __init__(self):
-                super(InplaceAddInplaceFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
 
@@ -2152,7 +2125,7 @@
     def test_quantized_add_scalar_relu(self):
         class AddScalarRelu(torch.nn.Module):
             def __init__(self, inplace):
-                super(AddScalarRelu, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(2, 2, 2).float()
                 self.relu = torch.nn.ReLU(inplace)
 
@@ -2162,7 +2135,7 @@
 
         class InplaceAddScalarRelu(torch.nn.Module):
             def __init__(self, inplace):
-                super(InplaceAddScalarRelu, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(2, 2, 2).float()
                 self.relu = torch.nn.ReLU(inplace)
 
@@ -2173,7 +2146,7 @@
 
         class AddScalarFunctionalRelu(torch.nn.Module):
             def __init__(self):
-                super(AddScalarFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(2, 2, 2).float()
 
             def forward(self, x):
@@ -2182,7 +2155,7 @@
 
         class InplaceAddScalarFunctionalRelu(torch.nn.Module):
             def __init__(self):
-                super(InplaceAddScalarFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(2, 2, 2).float()
 
             def forward(self, x):
@@ -2192,7 +2165,7 @@
 
         class AddScalarInplaceFunctionalRelu(torch.nn.Module):
             def __init__(self):
-                super(AddScalarInplaceFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(2, 2, 2).float()
 
             def forward(self, x):
@@ -2201,7 +2174,7 @@
 
         class InplaceAddScalarInplaceFunctionalRelu(torch.nn.Module):
             def __init__(self):
-                super(InplaceAddScalarInplaceFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(2, 2, 2).float()
 
             def forward(self, x):
@@ -2244,7 +2217,7 @@
 
         class QuantizedCat(torch.nn.Module):
             def __init__(self):
-                super(QuantizedCat, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
 
@@ -2254,9 +2227,6 @@
                 return torch.cat([x, y], 1)
 
         class NonQuantizedCat(torch.nn.Module):
-            def __init__(self):
-                super(NonQuantizedCat, self).__init__()
-
             def forward(self, x, y):
                 return torch.cat([x, y], 1)
 
@@ -2283,7 +2253,7 @@
 
         class M(torch.nn.Module):
             def __init__(self, dim):
-                super(M, self).__init__()
+                super().__init__()
                 self.bn = bn_module[dim](3).to(torch.float)
 
             def forward(self, x):
@@ -2303,7 +2273,7 @@
 
         class BNRelu(torch.nn.Module):
             def __init__(self, dim, inplace):
-                super(BNRelu, self).__init__()
+                super().__init__()
                 self.bn = bn_module[dim](3).to(torch.float)
                 self.relu = torch.nn.ReLU(inplace=inplace)
 
@@ -2326,7 +2296,7 @@
 
         class BNFuncRelu(torch.nn.Module):
             def __init__(self, dim):
-                super(BNFuncRelu, self).__init__()
+                super().__init__()
                 self.bn = bn_module[dim](3).to(torch.float)
 
             def forward(self, x):
@@ -2348,7 +2318,7 @@
 
         class BNFuncInplaceRelu(torch.nn.Module):
             def __init__(self, dim):
-                super(BNFuncInplaceRelu, self).__init__()
+                super().__init__()
                 self.bn = bn_module[dim](3).to(torch.float)
 
             def forward(self, x):
@@ -2368,7 +2338,7 @@
     def test_quantized_mul(self):
         class QuantizedMul(torch.nn.Module):
             def __init__(self):
-                super(QuantizedMul, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
 
@@ -2379,7 +2349,7 @@
 
         class QuantizedInplaceMul(torch.nn.Module):
             def __init__(self):
-                super(QuantizedInplaceMul, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
 
@@ -2390,16 +2360,10 @@
                 return x
 
         class NonQuantizedMul(torch.nn.Module):
-            def __init__(self):
-                super(NonQuantizedMul, self).__init__()
-
             def forward(self, x, y):
                 return x * y
 
         class NonQuantizedInplaceMul(torch.nn.Module):
-            def __init__(self):
-                super(NonQuantizedInplaceMul, self).__init__()
-
             def forward(self, x, y):
                 x *= y
                 return x
@@ -2431,7 +2395,7 @@
     def test_quantized_mul_scalar(self):
         class QuantizedMulScalar(torch.nn.Module):
             def __init__(self):
-                super(QuantizedMulScalar, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(2, 2, 2).float()
 
             def forward(self, x):
@@ -2440,7 +2404,7 @@
 
         class QuantizedInplaceMulScalar(torch.nn.Module):
             def __init__(self):
-                super(QuantizedInplaceMulScalar, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(2, 2, 2).float()
 
             def forward(self, x):
@@ -2449,16 +2413,10 @@
                 return x
 
         class NonQuantizedMulScalar(torch.nn.Module):
-            def __init__(self):
-                super(NonQuantizedMulScalar, self).__init__()
-
             def forward(self, x):
                 return x * 3
 
         class NonQuantizedInplaceMulScalar(torch.nn.Module):
-            def __init__(self):
-                super(NonQuantizedInplaceMulScalar, self).__init__()
-
             def forward(self, x):
                 x *= 3
                 return x
@@ -2487,7 +2445,7 @@
     def test_quantized_mul_relu(self):
         class MulRelu(torch.nn.Module):
             def __init__(self, inplace):
-                super(MulRelu, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
                 self.relu = torch.nn.ReLU(inplace)
@@ -2500,7 +2458,7 @@
 
         class InplaceMulRelu(torch.nn.Module):
             def __init__(self, inplace):
-                super(InplaceMulRelu, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
                 self.relu = torch.nn.ReLU(inplace)
@@ -2513,7 +2471,7 @@
 
         class MulFunctionalRelu(torch.nn.Module):
             def __init__(self):
-                super(MulFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
 
@@ -2525,7 +2483,7 @@
 
         class InplaceMulFunctionalRelu(torch.nn.Module):
             def __init__(self):
-                super(InplaceMulFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
 
@@ -2537,7 +2495,7 @@
 
         class MulInplaceFunctionalRelu(torch.nn.Module):
             def __init__(self):
-                super(MulInplaceFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
 
@@ -2549,7 +2507,7 @@
 
         class InplaceMulInplaceFunctionalRelu(torch.nn.Module):
             def __init__(self):
-                super(InplaceMulInplaceFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(2, 2, 2).float()
                 self.conv2 = torch.nn.Conv2d(2, 2, 2).float()
 
@@ -2589,7 +2547,7 @@
     def test_quantized_mul_scalar_relu(self):
         class MulScalarRelu(torch.nn.Module):
             def __init__(self, inplace):
-                super(MulScalarRelu, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(2, 2, 2).float()
                 self.relu = torch.nn.ReLU(inplace)
 
@@ -2599,7 +2557,7 @@
 
         class InplaceMulScalarRelu(torch.nn.Module):
             def __init__(self, inplace):
-                super(InplaceMulScalarRelu, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(2, 2, 2).float()
                 self.relu = torch.nn.ReLU(inplace)
 
@@ -2610,7 +2568,7 @@
 
         class MulScalarFunctionalRelu(torch.nn.Module):
             def __init__(self):
-                super(MulScalarFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(2, 2, 2).float()
 
             def forward(self, x):
@@ -2619,7 +2577,7 @@
 
         class InplaceMulScalarFunctionalRelu(torch.nn.Module):
             def __init__(self):
-                super(InplaceMulScalarFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(2, 2, 2).float()
 
             def forward(self, x):
@@ -2629,7 +2587,7 @@
 
         class MulScalarInplaceFunctionalRelu(torch.nn.Module):
             def __init__(self):
-                super(MulScalarInplaceFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(2, 2, 2).float()
 
             def forward(self, x):
@@ -2638,7 +2596,7 @@
 
         class InplaceMulScalarInplaceFunctionalRelu(torch.nn.Module):
             def __init__(self):
-                super(InplaceMulScalarInplaceFunctionalRelu, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(2, 2, 2).float()
 
             def forward(self, x):
@@ -2676,7 +2634,7 @@
     def test_hardswish(self):
         class FunctionalHardswish(torch.nn.Module):
             def __init__(self, inplace):
-                super(FunctionalHardswish, self).__init__()
+                super().__init__()
                 self.inplace = inplace
 
             def forward(self, input):
@@ -2701,7 +2659,7 @@
     def test_elu(self):
         class FunctionalELU(torch.nn.Module):
             def __init__(self, inplace=False):
-                super(FunctionalELU, self).__init__()
+                super().__init__()
                 self.inplace = inplace
 
             def forward(self, input):
@@ -2760,7 +2718,7 @@
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv1 = torch.nn.Conv2d(3, 3, 3).float()
                 self.conv2 = torch.nn.Conv2d(3, 3, 3).float()
 
@@ -2776,7 +2734,7 @@
     def test_clamp(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(2, 2, 2).float()
                 self.relu6 = torch.nn.ReLU6()
                 self.relu6_ = torch.nn.ReLU6(True)
@@ -2817,7 +2775,7 @@
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.maxpool1d = torch.nn.MaxPool1d(kernel_size=3)
                 self.maxpool2d = torch.nn.MaxPool2d(kernel_size=3)
                 self.maxpool3d = torch.nn.MaxPool3d(kernel_size=3)
@@ -2933,7 +2891,7 @@
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 3, 3)
                 self.avg_pool1d = torch.nn.AvgPool1d(3)
                 self.avg_pool2d = torch.nn.AvgPool2d(3)
@@ -3058,7 +3016,7 @@
     def test_cat_linear(self):
         class LinearModel(torch.nn.Module):
             def __init__(self):
-                super(LinearModel, self).__init__()
+                super().__init__()
                 self.weight = torch.randn(5, 5)
 
             def forward(self, x, y):
@@ -3082,7 +3040,7 @@
     def test_prepare_dynamic(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.fc = torch.nn.Linear(5, 5)
 
             def forward(self, x):
@@ -3109,7 +3067,7 @@
     def test_prepare_dynamic_child_qconfig(self):
         class Sub(torch.nn.Module):
             def __init__(self):
-                super(Sub, self).__init__()
+                super().__init__()
                 self.fc = torch.nn.Linear(5, 5)
 
             def forward(self, x):
@@ -3117,7 +3075,7 @@
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(3, 5, 3)
                 self.sub = Sub()
 
@@ -3147,7 +3105,7 @@
     def test_insert_quant_dequant_linear_dynamic(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.fc1 = torch.nn.Linear(5, 5).float()
                 self.fc2 = torch.nn.Linear(5, 5).float()
 
@@ -3198,7 +3156,7 @@
     def test_dynamic_multi_op(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.fc1 = torch.nn.Linear(5, 5).to(dtype=torch.float)
 
             def forward(self, x):
@@ -3217,7 +3175,7 @@
     def test_dynamic_quant_multi_uses(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.fc = torch.nn.Linear(5, 5).float()
 
             def forward(self, x):
@@ -3245,7 +3203,7 @@
 
         class DynamicModel(torch.nn.Module):
             def __init__(self):
-                super(DynamicModel, self).__init__()
+                super().__init__()
                 self.weight = torch.nn.Parameter(torch.ones(5, 5))
                 self.mod1 = myMod(self.weight)
 
@@ -3278,7 +3236,7 @@
     def test_dynamic_with_if(self):
         class Res(torch.nn.Module):
             def __init__(self):
-                super(Res, self).__init__()
+                super().__init__()
                 self.weight = torch.nn.Parameter(torch.ones(5, 5))
 
             def forward(self, x: torch.Tensor, cond: bool) -> torch.Tensor:
@@ -3289,7 +3247,7 @@
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.res1 = Res()
                 self.res2 = Res()
 
@@ -3334,7 +3292,7 @@
     def test_dynamic_weight_observer(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.fc = torch.nn.Linear(5, 5).float()
                 self.fc2 = torch.nn.Linear(5, 5).float()
 
@@ -3366,7 +3324,7 @@
     def test_convert_dynamic_fp16(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.fc = torch.nn.Linear(5, 5)
 
             def forward(self, x):
@@ -3381,7 +3339,7 @@
     def test_quantize_dynamic_fp16(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.fc = torch.nn.Linear(5, 5)
 
             def forward(self, x):
@@ -3404,7 +3362,7 @@
     def test_linear(self):
         class FunctionalLinear(torch.nn.Module):
             def __init__(self, weight, bias):
-                super(FunctionalLinear, self).__init__()
+                super().__init__()
                 self.weight = weight
                 self.bias = bias
 
@@ -3437,7 +3395,7 @@
     def test_embedding_bag(self):
         class M(torch.nn.Module):
             def __init__(self, weights):
-                super(M, self).__init__()
+                super().__init__()
                 self.embedding1 = torch.nn.EmbeddingBag(
                     num_embeddings=10,
                     embedding_dim=12,
@@ -3536,7 +3494,7 @@
     def test_embedding_bag_padding_idx_error(self):
         class M(torch.nn.Module):
             def __init__(self, weights):
-                super(M, self).__init__()
+                super().__init__()
                 self.embedding = torch.nn.EmbeddingBag(
                     num_embeddings=10,
                     embedding_dim=12,
diff --git a/test/run_test.py b/test/run_test.py
index 8037f13..9619cb2 100755
--- a/test/run_test.py
+++ b/test/run_test.py
@@ -907,7 +907,7 @@
 
 class TestChoices(list):
     def __init__(self, *args, **kwargs):
-        super(TestChoices, self).__init__(args[0])
+        super().__init__(args[0])
 
     def __contains__(self, item):
         return list.__contains__(self, parse_test_module(item))
diff --git a/test/test_autocast.py b/test/test_autocast.py
index 1a8263a..127d964 100644
--- a/test/test_autocast.py
+++ b/test/test_autocast.py
@@ -10,12 +10,12 @@
 
 class TestAutocastCPU(TestCase):
     def setUp(self):
-        super(TestAutocastCPU, self).setUp()
+        super().setUp()
         self.autocast_lists = AutocastCPUTestLists(torch.device('cpu'))
 
     def tearDown(self):
         del self.autocast_lists
-        super(TestAutocastCPU, self).tearDown()
+        super().tearDown()
 
     def _run_autocast_outofplace(self, op, args, run_as_type, out_type=None, module=torch, add_kwargs=None):
         # helper to cast args
diff --git a/test/test_autograd.py b/test/test_autograd.py
index e620bb6..efacfc0 100644
--- a/test/test_autograd.py
+++ b/test/test_autograd.py
@@ -10228,12 +10228,12 @@
             def run(self):
                 self.exception = None
                 try:
-                    self.ret = super(PropagatingThread, self).run()
+                    self.ret = super().run()
                 except Exception as e:
                     self.exception = e
 
             def join(self, timeout=None):
-                super(PropagatingThread, self).join(timeout)
+                super().join(timeout)
                 if self.exception:
                     raise self.exception from self.exception
                 return self.ret
diff --git a/test/test_cpp_extensions_aot.py b/test/test_cpp_extensions_aot.py
index ac24193..75ea8a9 100644
--- a/test/test_cpp_extensions_aot.py
+++ b/test/test_cpp_extensions_aot.py
@@ -282,7 +282,7 @@
 class TestRNGExtension(common.TestCase):
 
     def setUp(self):
-        super(TestRNGExtension, self).setUp()
+        super().setUp()
 
     @skipIfTorchDynamo("https://github.com/pytorch/torchdynamo/issues/1991")
     def test_rng(self):
diff --git a/test/test_cpp_extensions_jit.py b/test/test_cpp_extensions_jit.py
index 2add6d4..9351d5e 100644
--- a/test/test_cpp_extensions_jit.py
+++ b/test/test_cpp_extensions_jit.py
@@ -532,7 +532,7 @@
         # Create a torch.nn.Module which uses the C++ module as a submodule.
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.x = torch.nn.Parameter(torch.tensor(1.0))
                 self.net = extension.Net(3, 5)
 
diff --git a/test/test_cuda.py b/test/test_cuda.py
index 9b907b0..9bb601c 100644
--- a/test/test_cuda.py
+++ b/test/test_cuda.py
@@ -79,12 +79,12 @@
     FIFTY_MIL_CYCLES = 50000000
 
     def setUp(self):
-        super(TestCuda, self).setUp()
+        super().setUp()
         self.autocast_lists = AutocastTestLists(torch.device('cuda:0'))
 
     def tearDown(self):
         del self.autocast_lists
-        super(TestCuda, self).tearDown()
+        super().tearDown()
 
     def _check_memory_stat_consistency(self):
         snapshot = torch.cuda.memory_snapshot()
@@ -1871,7 +1871,7 @@
 
         class StreamModel(torch.nn.Module):
             def __init__(self):
-                super(StreamModel, self).__init__()
+                super().__init__()
                 self.event = torch.cuda.Event()
                 self.stream0 = torch.cuda.Stream()
                 self.stream1 = torch.cuda.Stream()
diff --git a/test/test_dataloader.py b/test/test_dataloader.py
index 0511968..39d9187 100644
--- a/test/test_dataloader.py
+++ b/test/test_dataloader.py
@@ -283,7 +283,7 @@
 
 class CUDACountingDataset(Dataset):
     def __init__(self, n):
-        super(CUDACountingDataset, self).__init__()
+        super().__init__()
         self.n = n
 
     def __getitem__(self, i):
@@ -295,7 +295,7 @@
 
 class CountingDataset(Dataset):
     def __init__(self, n):
-        super(CountingDataset, self).__init__()
+        super().__init__()
         self.n = n
 
     def __getitem__(self, i):
@@ -307,7 +307,7 @@
 
 class CountingIterableDataset(IterableDataset):
     def __init__(self, n):
-        super(CountingIterableDataset, self).__init__()
+        super().__init__()
         self.n = n
 
     def __iter__(self):
@@ -459,7 +459,7 @@
     # Setting disable_stderr=True may generate a lot of unrelated error outputs
     # but could be helpful for debugging.
     def __init__(self, disable_stderr=True, **kwargs):
-        super(ErrorTrackingProcess, self).__init__(**kwargs)
+        super().__init__(**kwargs)
         self._pconn, self._cconn = mp.Pipe()
         self._exception = None
         self.disable_stderr = disable_stderr
@@ -471,7 +471,7 @@
             with open(os.devnull, 'w') as devnull:
                 os.dup2(devnull.fileno(), sys.stderr.fileno())
         try:
-            super(ErrorTrackingProcess, self).run()
+            super().run()
             self._cconn.send(None)
         except Exception:
             self._cconn.send(ExceptionWrapper(sys.exc_info()))
@@ -940,7 +940,7 @@
 class TestDataLoader(TestCase):
 
     def setUp(self):
-        super(TestDataLoader, self).setUp()
+        super().setUp()
         self.data = torch.randn(100, 2, 3, 5)
         self.labels = torch.randperm(50).repeat(2)
         self.dataset = TensorDataset(self.data, self.labels)
@@ -2295,7 +2295,7 @@
     "fork is not supported. Dying (set die_after_fork=0 to override)")
 class TestStringDataLoader(TestCase):
     def setUp(self):
-        super(TestStringDataLoader, self).setUp()
+        super().setUp()
         self.dataset = StringDataset()
 
     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
@@ -2325,7 +2325,7 @@
     "fork is not supported. Dying (set die_after_fork=0 to override)")
 class TestDictDataLoader(TestCase):
     def setUp(self):
-        super(TestDictDataLoader, self).setUp()
+        super().setUp()
         self.dataset = DictDataset()
 
     def test_sequential_batch(self):
@@ -2400,7 +2400,7 @@
 class TestDataLoaderPersistentWorkers(TestDataLoader):
 
     def setUp(self):
-        super(TestDataLoaderPersistentWorkers, self).setUp()
+        super().setUp()
         self.persistent_workers = True
 
     @unittest.skipIf(IS_SANDCASTLE, "subprocess doesn't work in FB internal CI")
@@ -2513,7 +2513,7 @@
     "fork is not supported. Dying (set die_after_fork=0 to override)")
 class TestNamedTupleDataLoader(TestCase):
     def setUp(self):
-        super(TestNamedTupleDataLoader, self).setUp()
+        super().setUp()
         self.dataset = NamedTupleDataset()
 
     def test_dataloader_with_namedtuple(self):
@@ -2576,7 +2576,7 @@
     "fork is not supported. Dying (set die_after_fork=0 to override)")
 class TestCustomPinFn(TestCase):
     def setUp(self):
-        super(TestCustomPinFn, self).setUp()
+        super().setUp()
         inps = torch.arange(10 * 5, dtype=torch.float32).view(10, 5)
         tgts = torch.arange(10 * 5, dtype=torch.float32).view(10, 5)
         self.dataset = TensorDataset(inps, tgts)
@@ -2634,7 +2634,7 @@
     "Flaky with ASAN, see https://github.com/pytorch/pytorch/issues/65727")
 class TestIndividualWorkerQueue(TestCase):
     def setUp(self):
-        super(TestIndividualWorkerQueue, self).setUp()
+        super().setUp()
         self.dataset = TestWorkerQueueDataset(list(range(128)))
 
     def _run_ind_worker_queue_test(self, batch_size, num_workers):
diff --git a/test/test_fake_tensor.py b/test/test_fake_tensor.py
index 450bfb6..5d52ef3 100644
--- a/test/test_fake_tensor.py
+++ b/test/test_fake_tensor.py
@@ -458,7 +458,7 @@
 
         class ModuleNew(torch.nn.Module):
             def __init__(self):
-                super(ModuleNew, self).__init__()
+                super().__init__()
                 self.a = torch.rand([10, 2])
                 self.b = self.a
                 self.c = self.a[0]
diff --git a/test/test_fx.py b/test/test_fx.py
index 2b70c58..4ec0591 100644
--- a/test/test_fx.py
+++ b/test/test_fx.py
@@ -442,7 +442,7 @@
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.batchnorm1d = torch.nn.BatchNorm1d(2, affine=False)
 
             def forward(self, x: torch.Tensor):
@@ -1973,9 +1973,6 @@
             yield
 
         class M(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             @do_nothing()
             def forward(self, x):
                 return torch.relu(x)
@@ -1994,9 +1991,6 @@
 
     def test_layout(self):
         class M(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 return torch.empty_like(x, layout=torch.strided, pin_memory=False).fill_(0)
 
@@ -2006,9 +2000,6 @@
 
     def test_ellipsis(self):
         class M(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x, y):
                 return x + y[:, 1:10, ...]
 
@@ -2380,9 +2371,6 @@
 
     def test_single_default_arg(self):
         class M(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, y=1):
                 return y
 
@@ -2392,9 +2380,6 @@
 
     def test_multiple_default_args(self):
         class M(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, y=1, z=2):
                 return y + z
 
@@ -2405,9 +2390,6 @@
 
     def test_regular_and_default_args(self):
         class M(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x, y=1):
                 return x + y
 
@@ -2417,9 +2399,6 @@
 
     def test_string_literal_return(self):
         class M(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self):
                 return "foo"
 
@@ -2447,7 +2426,7 @@
 
         class FooBar1234(torch.nn.Module):
             def __init__(self):
-                super(FooBar1234, self).__init__()
+                super().__init__()
                 self.f = torch.classes._TorchScriptTesting._StackString(["3", "4"])
 
             def forward(self):
@@ -2462,7 +2441,7 @@
 
         class FooBar2341(torch.nn.Module):
             def __init__(self):
-                super(FooBar2341, self).__init__()
+                super().__init__()
                 self.f = torch.classes._TorchScriptTesting._ReLUClass()
 
             def forward(self, x):
@@ -2614,7 +2593,7 @@
     def test_snake_case(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.activations = torch.nn.ModuleDict([
                     ["snake_case", torch.nn.ReLU()],
                     ["PascalCase", torch.nn.LeakyReLU()],
@@ -2680,7 +2659,7 @@
     def test_custom_traceback_raised_when_exception_source_is_graphmodule(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.W = torch.nn.Parameter(torch.randn(5))
 
             def forward(self, x):
@@ -2892,7 +2871,7 @@
     def test_ast_rewriter_wrap_with_submodule(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.batchnorm1d = torch.nn.BatchNorm1d(2, affine=False)
 
             def forward(self, x: torch.Tensor):
@@ -2911,7 +2890,7 @@
     def test_submodule_manipulation_API(self):
         class C(torch.nn.Module):
             def __init__(self):
-                super(C, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(16, 33, 3, stride=2)
                 self.param = torch.nn.Parameter(torch.rand(2, 3))
 
@@ -2920,7 +2899,7 @@
 
         class B(torch.nn.Module):
             def __init__(self):
-                super(B, self).__init__()
+                super().__init__()
                 self.linear = torch.nn.Linear(100, 200)
                 self.register_buffer("buf", torch.randn(2, 3))
                 self.net_c = C()
@@ -2930,7 +2909,7 @@
 
         class A(torch.nn.Module):
             def __init__(self):
-                super(A, self).__init__()
+                super().__init__()
                 self.net_b = B()
                 self.param = torch.nn.Parameter(torch.rand(2, 3))
 
diff --git a/test/test_fx_experimental.py b/test/test_fx_experimental.py
index 298ef8f..4283a7c 100644
--- a/test/test_fx_experimental.py
+++ b/test/test_fx_experimental.py
@@ -247,7 +247,7 @@
                 return layers
 
             def __init__(self):
-                super(MyRecommendationModule, self).__init__()
+                super().__init__()
                 layers = self.create_mlp(4, 4, 4)
                 self.bottom_layers = torch.nn.Sequential(*layers)
                 layers = self.create_mlp(3, 24, 24)
@@ -301,7 +301,7 @@
     def test_partition_latency(self):
         class TestModule(torch.nn.Module):
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.linear = torch.nn.Linear(4, 4)
 
             def forward(self, a):
@@ -420,7 +420,7 @@
     def test_aot_based_partition(self):
         class TestModule(torch.nn.Module):
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.b = torch.rand(4)
                 self.c = torch.rand(4)
 
@@ -479,7 +479,7 @@
     def test_saturate_host(self):
         class TestModule(torch.nn.Module):
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.linear = torch.nn.Linear(4, 4)
 
             def forward(self, a):
@@ -535,7 +535,7 @@
     def test_conv_bn_fusion_not_running_state(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(32, 64, 3, stride=2)
                 self.bn = torch.nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
 
@@ -987,9 +987,6 @@
 
     def test_normalize_args_preserve_meta(self):
         class MyModule(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, a):
                 return torch.add(a, 3)
 
@@ -1190,7 +1187,7 @@
     def test_to_folder(self):
         class Test(torch.nn.Module):
             def __init__(self):
-                super(Test, self).__init__()
+                super().__init__()
                 self.W = torch.nn.Parameter(torch.randn(2))
                 self.seq = torch.nn.Sequential(torch.nn.BatchNorm1d(2, 2))
                 self.linear = torch.nn.Linear(2, 2)
diff --git a/test/test_itt.py b/test/test_itt.py
index b43df32..99841e1 100644
--- a/test/test_itt.py
+++ b/test/test_itt.py
@@ -10,12 +10,6 @@
 
 @unittest.skipIf(not torch.profiler.itt.is_available(), "ITT is required")
 class TestItt(TestCase):
-    def setUp(self):
-        super(TestItt, self).setUp()
-
-    def tearDown(self):
-        super(TestItt, self).tearDown()
-
     def test_itt(self):
         # Just making sure we can see the symbols
         torch.profiler.itt.range_push("foo")
diff --git a/test/test_jit.py b/test/test_jit.py
index e40871e..530b448 100644
--- a/test/test_jit.py
+++ b/test/test_jit.py
@@ -339,7 +339,7 @@
 # has to be at top level or Pickle complains
 class FooToPickle(torch.nn.Module):
     def __init__(self):
-        super(FooToPickle, self).__init__()
+        super().__init__()
         self.bar = torch.jit.ScriptModule()
 
 class TestJit(JitTestCase):
@@ -396,7 +396,7 @@
     def test_restore_device(self):
         class M(torch.jit.ScriptModule):
             def __init__(self, cpu_device_str):
-                super(M, self).__init__()
+                super().__init__()
                 self.p0 = nn.Parameter(torch.tensor([0.3], dtype=torch.float,
                                                     device=cpu_device_str))
                 self.b0 = torch.tensor([0.9], dtype=torch.float,
@@ -414,7 +414,7 @@
     def test_restore_device_cuda(self):
         class MyModule(torch.jit.ScriptModule):
             def __init__(self):
-                super(MyModule, self).__init__()
+                super().__init__()
                 self.register_buffer('b0', torch.randn(1, 3))
                 self.p0 = nn.Parameter(torch.randn(2, 3))
 
@@ -468,7 +468,7 @@
     def test_restore_shared_storage_on_cuda(self):
         class Foo(torch.jit.ScriptModule):
             def __init__(self):
-                super(Foo, self).__init__()
+                super().__init__()
                 whole_tensor = torch.randn(4, 5, dtype=torch.float, device='cpu')
                 self.p0 = nn.Parameter(whole_tensor.narrow(0, 0, 1))
                 self.register_buffer('b0', whole_tensor.narrow(0, 3, 1))
@@ -486,7 +486,7 @@
     def test_add_relu_fusion(self):
         class M(torch.nn.Module):
             def __init__(self, relu_op):
-                super(M, self).__init__()
+                super().__init__()
                 self.relu_op = relu_op
 
             def forward(self, a, b, c):
@@ -533,7 +533,7 @@
 
         class Madd_(torch.nn.Module):
             def __init__(self, relu_op):
-                super(Madd_, self).__init__()
+                super().__init__()
                 self.relu_op = relu_op
 
             def forward(self, a, b):
@@ -567,7 +567,7 @@
 
         class Madd_out(torch.nn.Module):
             def __init__(self, relu_op):
-                super(Madd_out, self).__init__()
+                super().__init__()
                 self.relu_op = relu_op
 
             def forward(self, a, b):
@@ -834,9 +834,6 @@
             return x + 2
 
         class Mod(nn.Module):
-            def __init__(self):
-                super(Mod, self).__init__()
-
             def forward(self, t):
                 return t + 2
 
@@ -888,7 +885,7 @@
     def test_Sequential(self):
         class Seq(nn.Module):
             def __init__(self):
-                super(Seq, self).__init__()
+                super().__init__()
                 self.seq = nn.Sequential(nn.Linear(10, 20), nn.Linear(20, 30))
 
             @torch.jit.script_method
@@ -903,7 +900,7 @@
     def test_ModuleList(self):
         class Mod(nn.Module):
             def __init__(self):
-                super(Mod, self).__init__()
+                super().__init__()
                 self.model = nn.ModuleList([nn.Linear(10, 10) for _ in range(10)])
                 self.model += (nn.Linear(10, 20),)
                 self.model.append(nn.Linear(20, 30))
@@ -949,7 +946,7 @@
 
         class MyModule(torch.jit.ScriptModule):
             def __init__(self, module):
-                super(MyModule, self).__init__()
+                super().__init__()
                 self.module = module
 
             @torch.jit.script_method
@@ -1399,7 +1396,7 @@
         # Check match::module behavior
         class Test(torch.nn.Module):
             def __init__(self):
-                super(Test, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(1, 20, 5, 1)
                 self.bn = torch.nn.BatchNorm2d(num_features=20)
 
@@ -1423,9 +1420,6 @@
 
     def test_pattern_based_rewrite_with_source_range_preserved(self):
         class TestModule1(torch.nn.Module):
-            def __init__(self):
-                super(TestModule1, self).__init__()
-
             def forward(self, x, y, z, w):
                 x = x + y
                 x = x * z
@@ -1455,9 +1449,6 @@
         self.assertTrue(source_range_1 == source_range_2)
 
         class TestModule2(torch.nn.Module):
-            def __init__(self):
-                super(TestModule2, self).__init__()
-
             def forward(self, x, y, z, w):
                 x = x + y
                 x = x + z
@@ -1820,7 +1811,7 @@
         with enable_profiling_mode_for_profiling_tests():
             class MyModule(torch.nn.Module):
                 def __init__(self, M):
-                    super(MyModule, self).__init__()
+                    super().__init__()
                     self.dropout = torch.nn.Dropout(0.5)
                     self.linear = torch.nn.Linear(M, M)
 
@@ -2451,7 +2442,7 @@
     def test_cuda_export_restore(self):
         class Sub(torch.jit.ScriptModule):
             def __init__(self):
-                super(Sub, self).__init__()
+                super().__init__()
                 self.weight = nn.Parameter(torch.randn(3, 4))
 
             @torch.jit.script_method
@@ -2460,7 +2451,7 @@
 
         class M(torch.jit.ScriptModule):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.mod = Sub()
 
             @torch.jit.script_method
@@ -2494,7 +2485,7 @@
         for clazz in [nn.RNN(10, 20, 2), nn.GRU(10, 20, 2)]:
             class RNNTest(torch.nn.Module):
                 def __init__(self):
-                    super(RNNTest, self).__init__()
+                    super().__init__()
                     self.rnn = clazz
 
                 def forward(self, x, lengths, h0):
@@ -2516,7 +2507,7 @@
     def test_export_lstm(self):
         class LSTMTest(torch.nn.Module):
             def __init__(self):
-                super(LSTMTest, self).__init__()
+                super().__init__()
                 self.rnn = nn.LSTM(10, 20, 2)
 
             def forward(self, x, lengths, hiddens):
@@ -2539,7 +2530,7 @@
     def test_unique_state_dict(self):
         class MyModule(torch.nn.Module):
             def __init__(self):
-                super(MyModule, self).__init__()
+                super().__init__()
                 shared_param = torch.nn.Parameter(torch.ones(1))
                 self.register_parameter('w1', shared_param)
                 self.register_parameter('w2', shared_param)
@@ -2650,9 +2641,6 @@
     def test_import_method(self):
         with torch._jit_internal._disable_emit_hooks():
             class Foo(torch.jit.ScriptModule):
-                def __init__(self):
-                    super(Foo, self).__init__()
-
                 @torch.jit.script_method
                 def forward(self, x, y):
                     return 2 * x + y
@@ -2669,7 +2657,7 @@
     def test_non_ascii_string(self):
         class Foo(torch.jit.ScriptModule):
             def __init__(self):
-                super(Foo, self).__init__()
+                super().__init__()
                 self.a = "Over \u0e55\u0e57 57"
 
             @torch.jit.script_method
@@ -2749,9 +2737,6 @@
         four = torch.tensor(4)
 
         class Test(torch.jit.ScriptModule):
-            def __init__(self):
-                super(Test, self).__init__()
-
             @torch.jit.script_method
             def forward(self, input, other=four):
                 return input + other
@@ -2823,9 +2808,6 @@
     @unittest.skipIf(True, "TODO: re-enable with https://github.com/pytorch/pytorch/pull/29339")
     def test_torch_load_error(self):
         class J(torch.jit.ScriptModule):
-            def __init__(self):
-                super(J, self).__init__()
-
             @torch.jit.script_method
             def forward(self, input):
                 return input + 100
@@ -2887,9 +2869,6 @@
 
     def test_export_opnames(self):
         class Foo(torch.jit.ScriptModule):
-            def __init__(self):
-                super(Foo, self).__init__()
-
             def one(self, x, y):
                 # type: (Tensor, Tensor) -> Tensor
                 return x + y
@@ -2905,7 +2884,7 @@
 
         class Bar(torch.jit.ScriptModule):
             def __init__(self):
-                super(Bar, self).__init__()
+                super().__init__()
                 self.sub = Foo()
 
             @torch.jit.script_method
@@ -3013,9 +2992,6 @@
 
 
         class TestModule(torch.nn.Module):
-            def __init__(self):
-                super(TestModule, self).__init__()
-
             def forward(self, x):
                 return MyScriptClass()
 
@@ -3029,16 +3005,10 @@
 
     def test_dictionary_as_example_inputs_for_jit_trace(self):
         class TestModule_v1(torch.nn.Module):
-            def __init__(self):
-                super(TestModule_v1, self).__init__()
-
             def forward(self, key2=None, key3=None, key4=None, key5=None, key1=None, key6=None):
                 return key1 + key2 + key3
 
         class TestModule_v2(torch.nn.Module):
-            def __init__(self):
-                super(TestModule_v2, self).__init__()
-
             def forward(self, x, y):
                 return x + y
 
@@ -3096,16 +3066,13 @@
             return torch.add(x, x)
 
         class MyNestedMod(torch.nn.Module):
-            def __init__(self):
-                super(MyNestedMod, self).__init__()
-
             def forward(self, x):
                 return torch.sub(x, x)
 
 
         class MyMod(torch.nn.Module):
             def __init__(self):
-                super(MyMod, self).__init__()
+                super().__init__()
                 self.nested = MyNestedMod()
 
             def forward(self, x):
@@ -3124,9 +3091,6 @@
         Check that the `@staticmethod` annotation on a function on a module works.
         """
         class MyCell(torch.nn.Module):
-            def __init__(self):
-                super(MyCell, self).__init__()
-
             @staticmethod
             def do_it(x, h):
                 new_h = torch.tanh(x + h)
@@ -3153,9 +3117,6 @@
             return x
 
         class Moddy(torch.nn.Module):
-            def __init__(self):
-                super(Moddy, self).__init__()
-
             def forward(self, x):
                 return foo()
 
@@ -3174,9 +3135,6 @@
             return x
 
         class Moddy(torch.nn.Module):
-            def __init__(self):
-                super(Moddy, self).__init__()
-
             def forward(self, x):
                 return foo()
 
@@ -3387,7 +3345,7 @@
     def test_ignored_method_binding(self):
         class Bar(torch.nn.Module):
             def __init__(self):
-                super(Bar, self).__init__()
+                super().__init__()
                 self.x : int = 0
 
             @torch.jit.export
@@ -3417,7 +3375,7 @@
             __annotations__ = {"x": Optional[torch.Tensor]}
 
             def __init__(self):
-                super(A, self).__init__()
+                super().__init__()
                 self.x = None
 
             @torch.jit.ignore
@@ -3440,7 +3398,7 @@
             __constants__ = ["foo"]
 
             def __init__(self, foo):
-                super(M, self).__init__()
+                super().__init__()
                 self.foo = foo
 
         m = M(5)
@@ -3454,7 +3412,7 @@
             FOO = 0
 
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.foo = self.FOO
         m = M()
         self.assertEqual(m.foo, M.FOO)
@@ -3463,9 +3421,6 @@
         class M(torch.jit.ScriptModule):
             FOO = 0
 
-            def __init__(self):
-                super(M, self).__init__()
-
             @torch.jit.script_method
             def forward(self):
                 return self.FOO
@@ -3482,7 +3437,7 @@
     def test_attribute_in_init(self):
         class M(torch.jit.ScriptModule):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.foo = torch.jit.Attribute(0.1, float)
                 # we should be able to use self.foo as a float here
                 assert 0.0 < self.foo
@@ -3491,7 +3446,7 @@
     def test_scriptable_fn_as_attr(self):
         class M(torch.nn.Module):
             def __init__(self, fn):
-                super(M, self).__init__()
+                super().__init__()
                 self.fn = fn
 
             def forward(self, x):
@@ -3543,9 +3498,6 @@
         FileCheck().check("NamedTuple").run(fn2.graph)
 
         class MyMod(torch.nn.Module):
-            def __init__(self):
-                super(MyMod, self).__init__()
-
             @torch.jit.unused
             def fn(self):
                 # type: () -> MyTuple
@@ -3562,9 +3514,6 @@
 
     def test_unused_decorator(self):
         class MyMod(torch.nn.Module):
-            def __init__(self):
-                super(MyMod, self).__init__()
-
             @torch.jit.unused
             @torch.no_grad()
             def fn(self, x):
@@ -3743,16 +3692,10 @@
             self.assertFalse(loaded._c.getattr('training'))
 
         class M(nn.Module):
-            def __init__(self):
-                super(M, self).__init__()
-
             def forward(self, x):
                 return self.training
 
         class OldM(torch.jit.ScriptModule):
-            def __init__(self):
-                super(OldM, self).__init__()
-
             @torch.jit.script_method
             def forward(self, x):
                 return self.training
@@ -3762,17 +3705,11 @@
 
     def test_inherit_method(self):
         class A(torch.jit.ScriptModule):
-            def __init__(self):
-                super(A, self).__init__()
-
             @torch.jit.script_method
             def forward(self, x):
                 return x + self.bar(x)
 
         class B(A):
-            def __init__(self):
-                super(B, self).__init__()
-
             @torch.jit.script_method
             def bar(self, x):
                 return x * x
@@ -3785,16 +3722,13 @@
         self.assertEqual(b(v), v + v * v)
 
         class C(torch.jit.ScriptModule):
-            def __init__(self):
-                super(C, self).__init__()
-
             @torch.jit.script_method
             def bar(self, x):
                 return x
 
         class D(C, B):
             def __init__(self):
-                super(D, self).__init__()
+                super().__init__()
 
         self.assertEqual(D()(v), v + v)
 
@@ -3822,7 +3756,7 @@
     def test_first_class_module(self):
         class Foo(torch.jit.ScriptModule):
             def __init__(self):
-                super(Foo, self).__init__()
+                super().__init__()
                 self.foo = nn.Parameter(torch.rand(3, 4))
 
             @torch.jit.script_method
@@ -3857,9 +3791,6 @@
 
     def test_static_methods(self):
         class M(nn.Module):
-            def __init__(self):
-                super(M, self).__init__()
-
             @staticmethod
             def my_method(x):
                 return x + 100
@@ -3868,9 +3799,6 @@
                 return x + M.my_method(x)
 
         class N(nn.Module):
-            def __init__(self):
-                super(N, self).__init__()
-
             @staticmethod
             def my_method(x):
                 return x * 100
@@ -3907,7 +3835,7 @@
     def test_builtin_function_attributes(self):
         class Add(nn.Module):
             def __init__(self):
-                super(Add, self).__init__()
+                super().__init__()
                 self.add = torch.add
 
             def forward(self, input):
@@ -4127,16 +4055,13 @@
 
         class What(torch.jit.ScriptModule):
             def __init__(self, x):
-                super(What, self).__init__()
+                super().__init__()
                 self.foo = x
         a = What(foo)
         c = What(foo)
 
     def test_training_param(self):
         class What(torch.jit.ScriptModule):
-            def __init__(self):
-                super(What, self).__init__()
-
             @torch.jit.script_method
             def forward(self, x):
                 # type: (int) -> int
@@ -4163,7 +4088,7 @@
 
         class FooBar1234(torch.nn.Module):
             def __init__(self):
-                super(FooBar1234, self).__init__()
+                super().__init__()
                 self.f = Foo321()
 
             def forward(self, x):
@@ -4222,9 +4147,6 @@
 
         with torch._jit_internal._disable_emit_hooks():
             class Foo(torch.jit.ScriptModule):
-                def __init__(self):
-                    super(Foo, self).__init__()
-
                 @torch.jit.script_method
                 def forward(self):
                     return math.pi, 0.1, mod.inf, mod.ninf, 2.225073858507201e-308, mod.nan
@@ -4645,16 +4567,13 @@
         https://github.com/pytorch/pytorch/issues/25871
         """
         class A(torch.jit.ScriptModule):
-            def __init__(self):
-                super(A, self).__init__()
-
             @torch.jit.script_method
             def forward(self, x):
                 return x
 
         class B(torch.jit.ScriptModule):
             def __init__(self):
-                super(B, self).__init__()
+                super().__init__()
                 self.foo = torch.nn.ModuleList([A()])
 
             @torch.jit.script_method
@@ -4665,7 +4584,7 @@
 
         class C(torch.jit.ScriptModule):
             def __init__(self):
-                super(C, self).__init__()
+                super().__init__()
                 self.foo = torch.nn.Sequential(B())
 
             @torch.jit.script_method
@@ -5267,7 +5186,7 @@
     def test_module_copy_with_attributes(self):
         class Vocabulary(torch.jit.ScriptModule):
             def __init__(self, vocab_list):
-                super(Vocabulary, self).__init__()
+                super().__init__()
                 self._vocab = torch.jit.Attribute(vocab_list, List[str])
                 self.some_idx = torch.jit.Attribute(2, int)
                 self.idx = torch.jit.Attribute(
@@ -7211,7 +7130,7 @@
     def test_nested_select_assign(self):
         class SubSubModule(torch.nn.Module):
             def __init__(self):
-                super(SubSubModule, self).__init__()
+                super().__init__()
                 self.abc = 11
 
             def forward(self, x):
@@ -7219,7 +7138,7 @@
 
         class SubModule(torch.nn.Module):
             def __init__(self):
-                super(SubModule, self).__init__()
+                super().__init__()
                 self.a = 11
                 self.nested = SubSubModule()
 
@@ -7228,7 +7147,7 @@
 
         class TestModule(torch.nn.Module):
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.sub = SubModule()
                 self.hi = 1
 
@@ -7796,7 +7715,7 @@
     def test_dropout_eval(self):
         class ScriptedConv2d(torch.jit.ScriptModule):
             def __init__(self, in_channels, out_channels, **kwargs):
-                super(ScriptedConv2d, self).__init__()
+                super().__init__()
                 self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
                 self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
 
@@ -7808,7 +7727,7 @@
 
         class ScriptMod(torch.jit.ScriptModule):
             def __init__(self):
-                super(ScriptMod, self).__init__()
+                super().__init__()
                 self.Conv2d_1a_3x3 = ScriptedConv2d(3, 32, kernel_size=3, stride=2)
 
             @torch.jit.script_method
@@ -7818,7 +7737,7 @@
 
         class EagerConv2d(torch.nn.Module):
             def __init__(self, in_channels, out_channels, **kwargs):
-                super(EagerConv2d, self).__init__()
+                super().__init__()
                 self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
                 self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
 
@@ -7829,7 +7748,7 @@
 
         class EagerMod(torch.nn.Module):
             def __init__(self):
-                super(EagerMod, self).__init__()
+                super().__init__()
                 self.Conv2d_1a_3x3 = EagerConv2d(3, 32, kernel_size=3, stride=2)
 
             def forward(self, x):
@@ -8255,7 +8174,7 @@
     def test_script_module(self):
         class M1(torch.jit.ScriptModule):
             def __init__(self):
-                super(M1, self).__init__()
+                super().__init__()
                 self.weight = nn.Parameter(torch.randn(2))
 
             @torch.jit.script_method
@@ -8264,7 +8183,7 @@
 
         class PModule(nn.Module):
             def __init__(self):
-                super(PModule, self).__init__()
+                super().__init__()
                 self.a = nn.Parameter(torch.randn(2, 3))
 
             def forward(self, a):
@@ -8272,7 +8191,7 @@
 
         class M2(torch.jit.ScriptModule):
             def __init__(self):
-                super(M2, self).__init__()
+                super().__init__()
                 # test submodule
                 self.sub = M1()
                 self.sub2 = PModule()
@@ -8824,7 +8743,7 @@
     def test_script_module_call_noscript(self):
         class M(torch.jit.ScriptModule):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.value = 1
 
             @torch.jit.ignore
@@ -8849,7 +8768,7 @@
     def test_script_module_nochange_submodule(self):
         class M(torch.jit.ScriptModule):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.sub = nn.Linear(5, 5)
 
             @torch.jit.script_method
@@ -8865,22 +8784,16 @@
 
     def test_module_apis(self):
         class Sub(torch.nn.Module):
-            def __init__(self):
-                super(Sub, self).__init__()
-
             def forward(self, thing):
                 return thing - 2
 
         class Double(torch.nn.Module):
-            def __init__(self):
-                super(Double, self).__init__()
-
             def forward(self, thing):
                 return thing * 2
 
         class MyMod(torch.nn.Module):
             def __init__(self):
-                super(MyMod, self).__init__()
+                super().__init__()
                 self.mod = (Sub())
                 self.mod2 = (Sub())
                 self.mod3 = nn.Sequential(nn.Sequential(Sub()))
@@ -8919,7 +8832,7 @@
             __constants__ = ['b', 'i', 'c', 's']
 
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.b = False
                 self.i = 1
                 self.c = 3.5
@@ -8938,9 +8851,6 @@
 
     def test_script_module_fail_exist(self):
         class M(torch.jit.ScriptModule):
-            def __init__(self):
-                super(M, self).__init__()
-
             @torch.jit.script_method
             def forward(self, x):
                 return x + self.whatisgoingon
@@ -8951,7 +8861,7 @@
     def test_script_module_none_exist_fail(self):
         class M(torch.jit.ScriptModule):
             def __init__(self, my_optional):
-                super(M, self).__init__()
+                super().__init__()
                 self.my_optional = my_optional
 
             @torch.jit.script_method
@@ -8969,7 +8879,7 @@
             __constants__ = ['invalid']
 
             def __init__(self):
-                super(Foo, self).__init__()
+                super().__init__()
                 self.invalid = [nn.Linear(3, 4)]
 
         with self.assertRaisesRegex(
@@ -8981,7 +8891,7 @@
             __constants__ = ['invalid']
 
             def __init__(self):
-                super(Foo2, self).__init__()
+                super().__init__()
                 self.invalid = int
 
         with self.assertRaisesRegex(TypeError, "not a valid constant"):
@@ -8991,7 +8901,7 @@
             __constants__ = ['invalid']
 
             def __init__(self):
-                super(Foo3, self).__init__()
+                super().__init__()
                 self.invalid = (3, 4, {})
 
         with self.assertRaisesRegex(TypeError, "not a valid constant"):
@@ -9001,7 +8911,7 @@
             __constants__ = ['invalid']
 
             def __init__(self):
-                super(Foo4, self).__init__()
+                super().__init__()
                 self.invalid = np.int64(5)
 
         # verify that we capture human understandable class name
@@ -9012,7 +8922,7 @@
         # TODO: add param mutation test case after JIT support it
         class ModuleBufferMutate(torch.jit.ScriptModule):
             def __init__(self):
-                super(ModuleBufferMutate, self).__init__()
+                super().__init__()
                 self.register_buffer('running_var', torch.tensor(0, dtype=torch.long))
 
             @torch.jit.script_method
@@ -9032,7 +8942,7 @@
             __constants__ = ['b']
 
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.b = [1, 2, 3, 4]
 
             @torch.jit.script_method
@@ -9048,9 +8958,6 @@
 
     def test_override_magic(self):
         class OverrideMagic(nn.Module):
-            def __init__(self):
-                super(OverrideMagic, self).__init__()
-
             @torch.jit.export
             def __len__(self):
                 return 10
@@ -9059,9 +8966,6 @@
         self.assertEqual(len(mod), len(torch.jit.script(mod)))
 
         class OverrideMagicSeq(nn.Sequential):
-            def __init__(self):
-                super(OverrideMagicSeq, self).__init__()
-
             @torch.jit.export
             def __len__(self):
                 return 10
@@ -9073,7 +8977,7 @@
     def test_script_module_for2(self):
         class Sub(torch.jit.ScriptModule):
             def __init__(self):
-                super(Sub, self).__init__()
+                super().__init__()
                 self.weight = nn.Parameter(torch.randn(2))
 
             @torch.jit.script_method
@@ -9082,7 +8986,7 @@
 
         class M(torch.jit.ScriptModule):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.mods = nn.ModuleList([Sub() for i in range(10)])
 
             @torch.jit.script_method
@@ -9105,7 +9009,7 @@
     def test_attr_qscheme_script(self):
         class Foo(torch.nn.Module):
             def __init__(self):
-                super(Foo, self).__init__()
+                super().__init__()
                 self.qscheme = torch.per_tensor_affine
 
             def forward(self):
@@ -9121,7 +9025,7 @@
     def test_script_module_const_submodule_fail(self):
         class Sub(torch.jit.ScriptModule):
             def __init__(self):
-                super(Sub, self).__init__()
+                super().__init__()
                 self.weight = nn.Parameter(torch.randn(2))
 
             @torch.jit.script_method
@@ -9130,7 +9034,7 @@
 
         class M(torch.jit.ScriptModule):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.mods = [Sub() for _ in range(10)]
 
             @torch.jit.script_method
@@ -9291,7 +9195,7 @@
     def test_missing_getstate(self):
         class Foo(torch.nn.Module):
             def __init__(self):
-                super(Foo, self).__init__()
+                super().__init__()
                 self.x = 1
 
             def forward(self, x):
@@ -9321,7 +9225,7 @@
     def test_pack_unpack_nested(self):
         class SubSubMod(torch.jit.ScriptModule):
             def __init__(self):
-                super(SubSubMod, self).__init__()
+                super().__init__()
                 self.register_buffer('buf', torch.ones(3, 4) * 3)
 
             @torch.jit.script_method
@@ -9338,7 +9242,7 @@
 
         class SubMod(torch.jit.ScriptModule):
             def __init__(self):
-                super(SubMod, self).__init__()
+                super().__init__()
                 self.register_buffer('buf', torch.ones(3, 4) * 2)
                 self.ssm = SubSubMod()
 
@@ -9356,7 +9260,7 @@
 
         class Mod(torch.jit.ScriptModule):
             def __init__(self):
-                super(Mod, self).__init__()
+                super().__init__()
                 self.submod = SubMod()
                 self.register_buffer('buf', torch.ones(3, 4) * 1)
 
@@ -9429,7 +9333,7 @@
             __constants__ = ['mods']
 
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.mods = 1
 
             @torch.jit.script_method
@@ -9443,7 +9347,7 @@
     def test_attr_module_constants(self):
         class M2(torch.jit.ScriptModule):
             def __init__(self, mod_list):
-                super(M2, self).__init__()
+                super().__init__()
                 self.mods = mod_list
 
             @torch.jit.script_method
@@ -9457,7 +9361,7 @@
     def test_script_sequential_for(self):
         class Sub(torch.jit.ScriptModule):
             def __init__(self):
-                super(Sub, self).__init__()
+                super().__init__()
                 self.weight = nn.Parameter(torch.randn(2))
 
             @torch.jit.script_method
@@ -9466,7 +9370,7 @@
 
         class M(torch.jit.ScriptModule):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.mods = nn.Sequential(Sub(), Sub(), Sub())
 
             @torch.jit.script_method
@@ -9494,7 +9398,7 @@
     def test_script_sequential_sliced_iteration(self):
         class seq_mod(nn.Module):
             def __init__(self):
-                super(seq_mod, self).__init__()
+                super().__init__()
                 self.layers = [nn.ReLU(), nn.ReLU(), nn.ReLU()]
                 self.layers = nn.Sequential(*self.layers)
 
@@ -9512,7 +9416,7 @@
     def test_script_sequential_orderdict(self):
         class M(torch.jit.ScriptModule):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.mods = nn.Sequential(OrderedDict([
                     ("conv", nn.Conv2d(1, 20, 5)),
                     ("relu", nn.ReLU())
@@ -9528,7 +9432,7 @@
     def test_script_sequential_multi_output_fail(self):
         class Sub(torch.jit.ScriptModule):
             def __init__(self):
-                super(Sub, self).__init__()
+                super().__init__()
                 self.weight = nn.Parameter(torch.randn(2))
 
             @torch.jit.script_method
@@ -9536,16 +9440,13 @@
                 return self.weight + thing
 
         class ReturnMulti(torch.jit.ScriptModule):
-            def __init__(self):
-                super(ReturnMulti, self).__init__()
-
             @torch.jit.script_method
             def forward(self, x):
                 return x, x, x
 
         class HaveSequential(torch.jit.ScriptModule):
             def __init__(self):
-                super(HaveSequential, self).__init__()
+                super().__init__()
                 self.someseq = nn.Sequential(
                     Sub(),
                     ReturnMulti(),
@@ -9566,7 +9467,7 @@
     def test_script_sequential_in_mod_list(self):
         class Sub(torch.jit.ScriptModule):
             def __init__(self):
-                super(Sub, self).__init__()
+                super().__init__()
                 self.weight = nn.Parameter(torch.randn(2))
 
             @torch.jit.script_method
@@ -9575,7 +9476,7 @@
 
         class M(torch.jit.ScriptModule):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.mods = nn.ModuleList([Sub(), nn.Sequential(Sub(), nn.Sequential(Sub(), Sub()), Sub())])
 
             @torch.jit.script_method
@@ -9593,7 +9494,7 @@
     def test_script_nested_mod_list(self):
         class Sub(torch.jit.ScriptModule):
             def __init__(self):
-                super(Sub, self).__init__()
+                super().__init__()
                 self.weight = nn.Parameter(torch.randn(2))
 
             @torch.jit.script_method
@@ -9602,7 +9503,7 @@
 
         class M(torch.jit.ScriptModule):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.mods = nn.ModuleList([nn.ModuleList([Sub()]), nn.Sequential(Sub()), nn.ModuleList([Sub(), Sub()])])
 
             @torch.jit.script_method
@@ -9622,7 +9523,7 @@
             __constants__ = ['dim']
 
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.dim = 1
 
             @torch.jit.script_method
@@ -9653,7 +9554,7 @@
 
         class M2(torch.jit.ScriptModule):
             def __init__(self):
-                super(M2, self).__init__()
+                super().__init__()
                 self.m = torch.jit.trace(TestScript.StarTestSumStarred(),
                                          (torch.ones(4, 3), torch.ones(4, 3), torch.ones(4, 3)))
                 self.g = torch.jit.trace(TestScript.StarTestReturnThree(), torch.ones(4, 3))
@@ -9669,7 +9570,7 @@
     def test_script_star_expr_string(self):
         class M2(torch.jit.ScriptModule):
             def __init__(self):
-                super(M2, self).__init__()
+                super().__init__()
                 self.m = torch.jit.trace(TestScript.StarTestSumStarred(),
                                          (torch.ones(4, 3), torch.ones(4, 3), torch.ones(4, 3)))
                 self.g = torch.jit.trace(TestScript.StarTestReturnThree(), torch.ones(4, 3))
@@ -9696,7 +9597,7 @@
     def test_script_star_assign(self):
         class M2(torch.jit.ScriptModule):
             def __init__(self):
-                super(M2, self).__init__()
+                super().__init__()
                 self.g = torch.jit.trace(TestScript.StarTestSumAndReturnThree(), torch.ones(4, 3))
                 self.define('''
             def forward(self, rep):
@@ -9710,7 +9611,7 @@
     def test_script_module_star_assign2(self):
         class M2(torch.jit.ScriptModule):
             def __init__(self):
-                super(M2, self).__init__()
+                super().__init__()
                 self.g = torch.jit.trace(
                     TestScript.StarTestSumAndReturnThree(),
                     (torch.ones(4, 3), torch.ones(4, 3), torch.ones(4, 3)),
@@ -9727,7 +9628,7 @@
     def test_script_module_star_assign2_inplace(self):
         class M2(torch.jit.ScriptModule):
             def __init__(self):
-                super(M2, self).__init__()
+                super().__init__()
                 self.g = torch.jit.trace(
                     TestScript.StarTestSumAndReturnThree(),
                     (torch.ones(4, 3), torch.ones(4, 3), torch.ones(4, 3)),
@@ -9749,7 +9650,7 @@
         with self.assertRaisesRegex(RuntimeError, "cannot be used as a tuple"):
             class M2(torch.jit.ScriptModule):
                 def __init__(self):
-                    super(M2, self).__init__()
+                    super().__init__()
 
                     @torch.jit.ignore
                     def myfunc():
@@ -9768,7 +9669,7 @@
         with self.assertRaisesRegex(RuntimeError, "cannot be used as a tuple"):
             class M2(torch.jit.ScriptModule):
                 def __init__(self):
-                    super(M2, self).__init__()
+                    super().__init__()
 
                     self.define('''
                 def forward(self, rep):
@@ -9933,7 +9834,7 @@
             }
 
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.val = None
 
             def some_method(self):
@@ -9952,16 +9853,13 @@
     def test_script_forward_method_replacement(self):
         # We want to support the use case of attaching a different `forward` method
         class LowLevelModule(torch.nn.Module):
-            def __init__(self):
-                super(LowLevelModule, self).__init__()
-
             def forward(self, input: torch.Tensor):
                 # Generic forward dispatch
                 return self.forward_pytorch(input) * 2
 
         class TestModule(LowLevelModule):
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 # Replace the forward method
                 self.forward = types.MethodType(LowLevelModule.forward, self)
 
@@ -10126,7 +10024,7 @@
 
         class ScriptMod(torch.jit.ScriptModule):
             def __init__(self, mod):
-                super(ScriptMod, self).__init__()
+                super().__init__()
                 x = torch.zeros(1, 3)
                 mod_fn = lambda : mod(x)  # noqa: E731
                 self.mod = torch.jit.trace(mod_fn, tuple())
@@ -10466,7 +10364,7 @@
     def test_script_module_export_submodule(self):
         class M1(torch.jit.ScriptModule):
             def __init__(self):
-                super(M1, self).__init__()
+                super().__init__()
                 self.weight = nn.Parameter(torch.randn(2))
 
             @torch.jit.script_method
@@ -10475,7 +10373,7 @@
 
         class M2(torch.jit.ScriptModule):
             def __init__(self):
-                super(M2, self).__init__()
+                super().__init__()
                 # test submodule
                 self.sub = M1()
                 self.weight = nn.Parameter(torch.randn(2, 3))
@@ -10518,7 +10416,7 @@
     def test_compile_module_with_constant(self):
         class Double(nn.Module):
             def __init__(self, downsample=None):
-                super(Double, self).__init__()
+                super().__init__()
 
             def forward(self, input):
                 return input * 2
@@ -10527,7 +10425,7 @@
             __constants__ = ['downsample']
 
             def __init__(self, downsample=None):
-                super(Mod, self).__init__()
+                super().__init__()
                 self.downsample = downsample
 
             def forward(self, input):
@@ -10550,7 +10448,7 @@
     def test_script_module_export_tensor_type(self):
         class M(torch.jit.ScriptModule):
             def __init__(self, type):
-                super(M, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.zeros((5, 5), dtype=type).random_())
 
             @torch.jit.script_method
@@ -10571,7 +10469,7 @@
         class M(torch.jit.ScriptModule):
 
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.zeros((5, 5), device='cuda:0').random_())
 
             @torch.jit.script_method
@@ -10589,7 +10487,7 @@
     def test_script_module_export_blocks(self):
         class M(torch.jit.ScriptModule):
             def __init__(self, n, m):
-                super(M, self).__init__()
+                super().__init__()
                 self.weight = torch.nn.Parameter(torch.rand(n, m))
 
             @torch.jit.script_method
@@ -10610,7 +10508,7 @@
         class M(torch.jit.ScriptModule):
 
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.param1 = torch.nn.Parameter(torch.rand(5, 5))
                 self.param2 = torch.nn.Parameter(self.param1[3])
                 self.param3 = torch.nn.Parameter(torch.rand(5, 5))
@@ -10631,22 +10529,16 @@
 
     def test_sequential_intermediary_types(self):
         class A(torch.nn.Module):
-            def __init__(self):
-                super(A, self).__init__()
-
             def forward(self, x):
                 return x + 3
 
         class B(torch.nn.Module):
-            def __init__(self):
-                super(B, self).__init__()
-
             def forward(self, x):
                 return {"1": x}
 
         class C(torch.nn.Module):
             def __init__(self):
-                super(C, self).__init__()
+                super().__init__()
                 self.foo = torch.nn.Sequential(A(), B())
 
             def forward(self, x):
@@ -10896,9 +10788,6 @@
 
     def test_torch_ignore_conversion_to_none(self):
         class A(torch.nn.Module):
-            def __init__(self):
-                super(A, self).__init__()
-
             @torch.jit.ignore
             def ignored(self, a: int) -> None:
                 l: int = len([2 for i in range(a) if i > 2])
@@ -10911,9 +10800,6 @@
                 return a + b
 
         class B(torch.nn.Module):
-            def __init__(self):
-                super(B, self).__init__()
-
             @torch.jit.ignore
             def ignored(self, a: int):
                 l: int = len([2 for i in range(a) if i > 2])
@@ -10981,7 +10867,7 @@
         with enable_profiling_mode_for_profiling_tests():
             class MyBatchNorm(torch.nn.Module):
                 def __init__(self, num_features, affine, track_running_stats):
-                    super(MyBatchNorm, self).__init__()
+                    super().__init__()
                     self.bn = torch.nn.BatchNorm2d(
                         num_features, 1e-5, affine=affine, track_running_stats=track_running_stats).float()
 
@@ -11043,7 +10929,7 @@
             __constants__ = ['d']
 
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.d = torch.device('cpu')
 
             @torch.jit.script_method
@@ -11232,7 +11118,7 @@
 
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.weight_0 = torch.nn.Parameter(torch.rand(weight_0_shape))
                 self.weight_1 = torch.nn.Parameter(torch.rand(weight_1_shape))
 
@@ -11611,23 +11497,17 @@
     @skipIfTorchDynamo("TorchDynamo fails with unknown reason")
     def test_zip_enumerate_modulelist(self):
         class Sub(torch.nn.Module):
-            def __init__(self):
-                super(Sub, self).__init__()
-
             def forward(self, thing):
                 return thing - 2
 
         class Double(torch.nn.Module):
-            def __init__(self):
-                super(Double, self).__init__()
-
             def forward(self, thing):
                 return thing * 2
 
         # zipping over two
         class ZipModLists(torch.nn.Module):
             def __init__(self, mods, mods2):
-                super(ZipModLists, self).__init__()
+                super().__init__()
                 self.mods = mods
                 self.mods2 = mods2
 
@@ -11642,7 +11522,7 @@
             __constants__ = ['tup_larger', 'tup_smaller']
 
             def __init__(self, mods, mods2):
-                super(ZipWithValues, self).__init__()
+                super().__init__()
                 self.mods = mods
                 self.mods2 = mods2
                 self.tup_larger = list(range(len(mods2) + 1))
@@ -11675,7 +11555,7 @@
 
         class Mod(torch.nn.Module):
             def __init__(self):
-                super(Mod, self).__init__()
+                super().__init__()
                 self.mods = nn.ModuleList([Double(), Double()])
 
             def forward(self, x):
@@ -11776,7 +11656,7 @@
 
         class M(torch.nn.Module):
             def __init__(self, mod_list):
-                super(M, self).__init__()
+                super().__init__()
                 self.module_list = mod_list
 
             def forward(self, x):
@@ -11791,7 +11671,7 @@
 
         class M2(M):
             def __init__(self, mod_list):
-                super(M2, self).__init__(mod_list)
+                super().__init__(mod_list)
 
             def forward(self, x):
                 out = [mod(x) for mod in self.module_list]
@@ -12321,7 +12201,7 @@
     def test_call_python_mod_from_tracing_fn(self):
         class PythonMod(torch.nn.Module):
             def __init__(self):
-                super(PythonMod, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.rand(4, 3), requires_grad=False)
 
             def forward(self, x):
@@ -12355,7 +12235,7 @@
     def test_call_traced_mod_from_tracing_fn(self):
         class TracedModule(torch.nn.Module):
             def __init__(self):
-                super(TracedModule, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.rand(4, 3), requires_grad=False)
 
             def forward(self, x):
@@ -12385,7 +12265,7 @@
         with self.assertRaisesRegex(RuntimeError, "must be registered as submodules"):
             class ScriptMod(torch.jit.ScriptModule):
                 def __init__(self):
-                    super(ScriptMod, self).__init__()
+                    super().__init__()
                     self.param = torch.nn.Parameter(torch.rand(3, 4), requires_grad=False)
 
                 @torch.jit.script_method
@@ -12407,7 +12287,7 @@
 
         class TracedModule(torch.nn.Module):
             def __init__(self):
-                super(TracedModule, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.rand(4, 3))
 
             def forward(self, x):
@@ -12424,7 +12304,7 @@
     def test_call_python_mod_from_traced_module(self):
         class PythonModule(torch.nn.Module):
             def __init__(self):
-                super(PythonModule, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.rand(5, 7))
 
             def forward(self, x):
@@ -12432,7 +12312,7 @@
 
         class TracedModule(torch.nn.Module):
             def __init__(self):
-                super(TracedModule, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.rand(4, 5))
                 self.mod = PythonModule()
 
@@ -12515,7 +12395,7 @@
     def test_call_python_mod_from_script_fn(self):
         class PythonModule(torch.nn.Module):
             def __init__(self):
-                super(PythonModule, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.rand(5, 7))
 
             def forward(self, x):
@@ -12546,9 +12426,6 @@
     def test_call_script_mod_from_script_fn(self):
         with self.assertRaisesRegex(RuntimeError, "Cannot call a ScriptModule that is not a submodule of the caller"):
             class ScriptMod(torch.jit.ScriptModule):
-                def __init__(self):
-                    super(ScriptMod, self).__init__()
-
                 @torch.jit.script_method
                 def forward(self, x):
                     return torch.mm(x, torch.zeros([4, 3]))
@@ -12566,7 +12443,7 @@
 
         class ScriptMod(torch.jit.ScriptModule):
             def __init__(self):
-                super(ScriptMod, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.rand(4, 3))
 
             @torch.jit.script_method
@@ -12580,7 +12457,7 @@
     def test_call_python_mod_from_script_module(self):
         class PythonMod(torch.nn.Module):
             def __init__(self):
-                super(PythonMod, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.rand(3, 5))
 
             @torch.jit.ignore
@@ -12589,7 +12466,7 @@
 
         class ScriptMod(torch.jit.ScriptModule):
             def __init__(self):
-                super(ScriptMod, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.rand(4, 3))
                 self.pm = PythonMod()
 
@@ -12610,7 +12487,7 @@
 
         class ScriptMod(torch.jit.ScriptModule):
             def __init__(self):
-                super(ScriptMod, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.rand(4, 3))
 
             @torch.jit.script_method
@@ -12625,7 +12502,7 @@
     def test_call_script_mod_from_script_module(self):
         class ScriptMod1(torch.jit.ScriptModule):
             def __init__(self):
-                super(ScriptMod1, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.rand(3, 5))
 
             @torch.jit.script_method
@@ -12634,7 +12511,7 @@
 
         class ScriptMod(torch.jit.ScriptModule):
             def __init__(self):
-                super(ScriptMod, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.rand(4, 3))
                 self.tm = ScriptMod1()
 
@@ -12653,7 +12530,7 @@
         with self.assertRaisesRegex(RuntimeError, "Cannot call a ScriptModule that is not a submodule of the caller"):
             class ScriptMod(torch.jit.ScriptModule):
                 def __init__(self):
-                    super(ScriptMod, self).__init__()
+                    super().__init__()
                     self.param = torch.nn.Parameter(torch.rand(3, 3))
 
                 @torch.jit.script_method
@@ -12947,7 +12824,7 @@
     def test_annot_string_py3_method(self):
         class TestModule(torch.jit.ScriptModule):
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
 
         code = '''
             def foo(self, x : {input}, y : Tuple[Tensor, Tensor]) -> Tuple[{output}, {output}]:
@@ -12979,7 +12856,7 @@
     def test_annot_string_mypy_method(self):
         class TestModule(torch.jit.ScriptModule):
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
 
         code = '''
         def foo(self, x, y):
@@ -13176,7 +13053,7 @@
 
         class TestLinear(torch.nn.Module):
             def __init__(self, in_features, out_features):
-                super(TestLinear, self).__init__()
+                super().__init__()
                 self.in_features = in_features
                 self.out_features = out_features
                 self.weight = torch.nn.Parameter(torch.empty(out_features, in_features))
@@ -13197,7 +13074,7 @@
         # Initialize a ScriptModule that uses the weak module above multiple times
         class Strong(torch.jit.ScriptModule):
             def __init__(self):
-                super(Strong, self).__init__()
+                super().__init__()
                 self.fc1 = TestLinear(10, 10)
                 self.fc1.weight = torch.nn.Parameter(weights)
                 self.fc1.bias = torch.nn.Parameter(bias)
@@ -13226,15 +13103,12 @@
 
     def test_module_copying(self):
         class Submodule(torch.nn.Module):
-            def __init__(self):
-                super(Submodule, self).__init__()
-
             def forward(self, x):
                 return x + 100
 
         class Weak(torch.nn.Module):
             def __init__(self, in_features, out_features):
-                super(Weak, self).__init__()
+                super().__init__()
                 self.weight = torch.nn.Parameter(torch.ones(out_features, in_features))
                 self.bias = torch.nn.Parameter(torch.ones(out_features))
                 self.register_buffer("buffer", torch.ones(out_features))
@@ -13246,7 +13120,7 @@
 
         class Strong(torch.jit.ScriptModule):
             def __init__(self, weak):
-                super(Strong, self).__init__()
+                super().__init__()
                 self.weak = weak
 
             @torch.jit.script_method
@@ -13319,9 +13193,6 @@
         class A(nn.Module):
             __jit_ignored_attributes__ = ["ignored", "ignored_return_val"]
 
-            def __init__(self):
-                super().__init__()
-
             @property
             def ignored(self):
                 raise ValueError("shouldn't be called")
@@ -13694,7 +13565,7 @@
             __constants__ = ['number']
 
             def __init__(self, number):
-                super(Root, self).__init__()
+                super().__init__()
                 self.register_buffer('buffer1', torch.ones(2, 2))
                 self.register_buffer('buffer2', torch.ones(2, 2))
                 self.number = number
@@ -13713,7 +13584,7 @@
             __constants__ = ['number']
 
             def __init__(self, number, submodule):
-                super(M, self).__init__()
+                super().__init__()
                 self.register_buffer('buffer1', torch.ones(2, 2))
                 self.register_buffer('buffer2', torch.ones(2, 2))
                 self.number = number
@@ -13750,7 +13621,7 @@
         # Check simpler module
         class NoArgState(torch.nn.Module):
             def __init__(self):
-                super(NoArgState, self).__init__()
+                super().__init__()
                 self.register_buffer('buffer1', torch.ones(2, 2))
                 self.register_buffer('buffer2', torch.ones(2, 2))
 
@@ -14274,7 +14145,7 @@
             __constants__ = ['val']
 
             def __init__(self, val):
-                super(Mod, self).__init__()
+                super().__init__()
                 self.val = val
 
             def forward(self):
@@ -14359,9 +14230,6 @@
         make_global(Point)
 
         class M(torch.nn.Module):
-            def __init__(self):
-                super(M, self).__init__()
-
             def forward(self, point: Point):
                 return point
 
@@ -14385,9 +14253,6 @@
         make_global(Point)
 
         class M(torch.nn.Module):
-            def __init__(self):
-                super(M, self).__init__()
-
             def forward(self, point: Point):
                 return point
 
@@ -14412,9 +14277,6 @@
         make_global(Point)
 
         class M(torch.nn.Module):
-            def __init__(self):
-                super(M, self).__init__()
-
             def forward(self, point: Point):
                 return point
 
@@ -14439,9 +14301,6 @@
         make_global(Point)
 
         class M(torch.nn.Module):
-            def __init__(self):
-                super(M, self).__init__()
-
             def forward(self, point: Point):
                 return point
 
@@ -14706,9 +14565,6 @@
             torch.jit.script(null_overload_driver)
 
         class OverloadMisuse(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             @torch.jit._overload_method
             def forward(self, x: int):
                 pass
@@ -14767,9 +14623,6 @@
 
     def test_method_overloading(self):
         class Over(torch.nn.Module):
-            def __init__(self):
-                super(Over, self).__init__()
-
             @torch.jit._overload_method  # noqa: F811
             def forward(self, x):  # noqa: F811
                 # type: (Tuple[Tensor, Tensor]) -> Tensor
@@ -14788,7 +14641,7 @@
 
         class S(torch.jit.ScriptModule):
             def __init__(self):
-                super(S, self).__init__()
+                super().__init__()
                 self.weak = Over()
 
             @torch.jit.script_method
@@ -14804,9 +14657,6 @@
         self.assertEqual(over((x)), x + 20)
 
         class Unannotated(torch.nn.Module):
-            def __init__(self):
-                super(Unannotated, self).__init__()
-
             @torch.jit._overload_method  # noqa: F811
             def hello(self, x):  # noqa: F811
                 pass
@@ -14827,9 +14677,6 @@
             torch.jit.script(w)
 
         class CompileOverloadError(torch.nn.Module):
-            def __init__(self):
-                super(CompileOverloadError, self).__init__()
-
             @torch.jit._overload_method  # noqa: F811
             def hello(self, x):  # noqa: F811
                 # type: (str) -> (int)
@@ -14853,9 +14700,6 @@
         # testing overload declared first, then non-overload
         with self.assertRaisesRegex(Exception, "Overloads are not useable when a module"):
             class W3(torch.nn.Module):
-                def __init__(self):
-                    super(W3, self).__init__()
-
                 @torch.jit._overload_method  # noqa: F811
                 def forward(self, x):  # noqa: F811
                     # type: (int) -> int
@@ -14873,9 +14717,6 @@
             b = torch.jit.script(a)
 
             class W3(torch.nn.Module):
-                def __init__(self):
-                    super(W3, self).__init__()
-
                 def forward(self, x):  # noqa: F811
                     return x + 5 + 10
 
@@ -14884,9 +14725,6 @@
 
         # testing non-overload declared first, then overload
         class W2(torch.nn.Module):
-            def __init__(self):
-                super(W2, self).__init__()
-
             def hello(self, x1, x2):
                 return x1 + x2
 
@@ -14897,9 +14735,6 @@
         self.assertEqual(a(torch.tensor(1)), torch.tensor(2))
 
         class W2(torch.nn.Module):
-            def __init__(self):
-                super(W2, self).__init__()
-
             @torch.jit._overload_method  # noqa: F811
             def hello(self, x):  # noqa: F811
                 pass
@@ -14936,7 +14771,7 @@
     def test_nn_LSTM_with_layers(self):
         class M(torch.jit.ScriptModule):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.rnn = nn.LSTM(2, 3, 2, dropout=0)
 
             @torch.jit.script_method
@@ -14945,7 +14780,7 @@
 
         class Eager(torch.nn.Module):
             def __init__(self):
-                super(Eager, self).__init__()
+                super().__init__()
                 self.rnn = nn.LSTM(2, 3, 2, dropout=0)
 
             def forward(self, x, lengths, h0, c0):
@@ -14962,7 +14797,7 @@
 
         class S(torch.jit.ScriptModule):
             def __init__(self):
-                super(S, self).__init__()
+                super().__init__()
                 self.x = torch.nn.LSTM(5, 5)
 
             @torch.jit.script_method
@@ -14980,7 +14815,7 @@
 
         class SeqLengthGRU(torch.jit.ScriptModule):
             def __init__(self):
-                super(SeqLengthGRU, self).__init__()
+                super().__init__()
                 self.x = torch.nn.GRU(5, 5)
 
             @torch.jit.script_method
@@ -14989,7 +14824,7 @@
 
         class TensorGRU(torch.jit.ScriptModule):
             def __init__(self):
-                super(TensorGRU, self).__init__()
+                super().__init__()
                 self.x = torch.nn.GRU(5, 5)
 
             @torch.jit.script_method
@@ -15099,7 +14934,7 @@
 
         class MyModule(torch.jit.ScriptModule):
             def __init__(self, embed_dim, num_heads):
-                super(MyModule, self).__init__()
+                super().__init__()
                 sample_q = torch.randn(3, 2, embed_dim)
                 sample_kv = torch.randn(3, 2, embed_dim)
                 attention = nn.MultiheadAttention(embed_dim, num_heads)
@@ -15135,7 +14970,7 @@
 
         class MyModule(torch.jit.ScriptModule):
             def __init__(self, transformer, sample_q, sample_kv):
-                super(MyModule, self).__init__()
+                super().__init__()
                 transformer.eval()
 
                 self.mod = torch.jit.trace(transformer,
@@ -15184,7 +15019,7 @@
     def test_weak_cuda(self):
         class M(torch.jit.ScriptModule):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.lstm = torch.nn.LSTM(5, 5)
                 self.lstm.cuda()
 
@@ -15201,7 +15036,7 @@
         with warnings.catch_warnings(record=True) as warns:
             class M(torch.jit.ScriptModule):
                 def __init__(self):
-                    super(M, self).__init__()
+                    super().__init__()
                     tensor = torch.zeros(1, requires_grad=False)
                     self.register_buffer('some_state', torch.nn.Parameter(tensor))
 
@@ -15228,9 +15063,6 @@
 
     def test_ignored_as_value(self):
         class Model(nn.Module):
-            def __init__(self):
-                super(Model, self).__init__()
-
             @torch.jit.unused
             def tuple_ignored(self, x):
                 # type: (Tensor) -> Tuple[Tensor, Tensor]
@@ -15263,9 +15095,6 @@
 
     def test_module_error(self):
         class MyModule(torch.nn.Module):
-            def __init__(self):
-                super(MyModule, self).__init__()
-
             def forward(self, foo):
                 return foo
 
@@ -15286,7 +15115,7 @@
     def test_module_attrs(self):
         class M(torch.jit.ScriptModule):
             def __init__(self, table):
-                super(M, self).__init__()
+                super().__init__()
                 self.table = torch.jit.Attribute(table, Dict[str, torch.Tensor])
                 self.x = torch.nn.Parameter(torch.tensor([100.0]))
 
@@ -15304,7 +15133,7 @@
     def test_module_none_attrs(self):
         class MyMod(torch.jit.ScriptModule):
             def __init__(self):
-                super(MyMod, self).__init__()
+                super().__init__()
                 self.optional_value = None
 
             @torch.jit.script_method
@@ -15350,7 +15179,7 @@
 
         class M(torch.jit.ScriptModule):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 for name, value, the_type in tester.get_pickle_values():
                     setattr(self, name, torch.jit.Attribute(value, the_type))
 
@@ -15390,7 +15219,7 @@
 
         class M(torch.jit.ScriptModule):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 for name, value, the_type in tester.get_pickle_values():
                     setattr(self, "_" + name, torch.jit.Attribute(value, the_type))
 
@@ -15469,7 +15298,7 @@
     def test_submodule_attribute_serialization(self):
         class S(torch.jit.ScriptModule):
             def __init__(self, list_data):
-                super(S, self).__init__()
+                super().__init__()
                 self.table = torch.jit.Attribute({"I": "am", "a test": "test"}, Dict[str, str])
                 self.list = torch.jit.Attribute(list_data, List[Tuple[int, int]])
 
@@ -15479,7 +15308,7 @@
 
         class M(torch.jit.ScriptModule):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.table = torch.jit.Attribute({"this": "is", "a different": "dict"}, Dict[str, str])
                 self.tensor = torch.jit.Attribute(torch.randn(2, 2), torch.Tensor)
                 self.s1 = S([(1, 2)])
@@ -15496,7 +15325,7 @@
     def test_serialization_big_ints(self):
         class M(torch.jit.ScriptModule):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.int32_max = torch.jit.Attribute(2**31 - 1, int)
                 self.int32_min = torch.jit.Attribute(-2**31, int)
                 self.uint32_max = torch.jit.Attribute(2**32, int)
@@ -15528,7 +15357,7 @@
     def test_serialization_sharing(self):
         class M(torch.jit.ScriptModule):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.list = torch.jit.Attribute([], List[str])
 
             @torch.jit.script_method
@@ -15585,7 +15414,7 @@
     def test_dtype_attr(self):
         class Foo(torch.nn.Module):
             def __init__(self):
-                super(Foo, self).__init__()
+                super().__init__()
                 self.dtype = torch.zeros([]).dtype
 
             def forward(self):
@@ -15598,7 +15427,7 @@
     def test_named_buffers_are_iterable(self):
         class MyMod(torch.nn.Module):
             def __init__(self):
-                super(MyMod, self).__init__()
+                super().__init__()
                 self.mod = (torch.nn.ReLU())
                 self.mod2 = (torch.nn.ReLU())
                 self.mod3 = torch.nn.Sequential(torch.nn.Sequential(torch.nn.ReLU()))
@@ -15637,7 +15466,7 @@
     def test_static_if_prop(self):
         class MaybeHasAttr(torch.nn.Module):
             def __init__(self, add_attr):
-                super(MaybeHasAttr, self).__init__()
+                super().__init__()
                 if add_attr:
                     self.maybe_attr = 1
 
@@ -15649,7 +15478,7 @@
 
         class MaybeHasAttr2(torch.nn.Module):
             def __init__(self, add_attr):
-                super(MaybeHasAttr2, self).__init__()
+                super().__init__()
                 if add_attr:
                     self.maybe_attr = 1
 
@@ -15724,7 +15553,7 @@
                 __constants__ = ['fname']
 
                 def __init__(self, tensor):
-                    super(M, self).__init__()
+                    super().__init__()
                     self.fname = fname
                     self.tensor = torch.nn.Parameter(tensor)
 
@@ -15748,7 +15577,7 @@
                 __constants__ = ['fname']
 
                 def __init__(self, tensor):
-                    super(M, self).__init__()
+                    super().__init__()
                     self.fname = fname
                     self.tensor = torch.nn.Parameter(tensor)
 
@@ -15806,7 +15635,7 @@
     def test_get_set_state_with_tensors(self):
         class M(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.tensor = torch.randn(2, 2)
 
             @torch.jit.export
@@ -15940,7 +15769,7 @@
                          b   # type: int
                          ):
                 # type: (...) -> None
-                super(M, self).__init__()
+                super().__init__()
                 self.a = a  # type: int
                 self.b = b  # type: int
 
@@ -15955,9 +15784,6 @@
 
     def test_module_method_reassignment(self):
         class Foo(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def _forward(self, x):
                 return x
 
@@ -15979,9 +15805,6 @@
 
     def test_save_load_attr_error(self):
         class Inner(nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 return x
 
@@ -16061,9 +15884,6 @@
     def test_signed_float_zero(self):
 
         class MyModule(torch.nn.Module):
-            def __init__(self):
-                super(MyModule, self).__init__()
-
             def forward(self, x):
                 return torch.div(x, -0.)
 
@@ -16072,9 +15892,6 @@
 
     def test_index_with_tuple(self):
         class MyModule(torch.nn.Module):
-            def __init__(self):
-                super(MyModule, self).__init__()
-
             def forward(self, x):
                 return x[(1,)]
 
@@ -16082,9 +15899,6 @@
 
     def test_context_manager(self):
         class MyModule(torch.nn.Module):
-            def __init__(self):
-                super(MyModule, self).__init__()
-
             def forward(self, x, y):
                 p = x + y
                 q = p + 2.0
@@ -16273,7 +16087,7 @@
                 __constants__ = submodule_constants
 
                 def __init__(self):
-                    super(TheModule, self).__init__()
+                    super().__init__()
                     self.submodule = nn_module(*constructor_args)
 
             def make_module(script):
diff --git a/test/test_jit_autocast.py b/test/test_jit_autocast.py
index d311eb6..6fbb04b 100644
--- a/test/test_jit_autocast.py
+++ b/test/test_jit_autocast.py
@@ -664,9 +664,6 @@
     @unittest.skipIf(not TEST_CUDA, "No cuda")
     def test_jit_freeze_autocast_basic(self):
         class TestModule(torch.nn.Module):
-            def __init__(self):
-                super(TestModule, self).__init__()
-
             def forward(self, x, y):
                 with torch.cuda.amp.autocast():
                     return torch.mm(x, y)
@@ -691,7 +688,7 @@
     def test_jit_freeze_autocast_constants(self):
         class TestModule(torch.nn.Module):
             def __init__(self):
-                super(TestModule, self).__init__()
+                super().__init__()
                 self.x = torch.rand((3, 4), dtype=torch.float).cuda()
 
             def forward(self, y):
@@ -753,7 +750,7 @@
 
 class convbn(torch.nn.Module):
     def __init__(self, bias_enabled=True):
-        super(convbn, self).__init__()
+        super().__init__()
         self.conv = torch.nn.Conv2d(3, 64, 7, stride=2, bias=bias_enabled)
         self.bn = torch.nn.BatchNorm2d(64)
 
@@ -762,7 +759,7 @@
 
 class TestJitTraceAutocast(JitTestCase):
     def setUp(self):
-        super(TestJitTraceAutocast, self).setUp()
+        super().setUp()
         self.previous_default_dtype = torch.get_default_dtype()
         torch.set_default_dtype(torch.float32)
         self.models = [MnistNet(),
@@ -776,7 +773,7 @@
     def tearDown(self):
         torch._C._jit_set_autocast_mode(self.previous_jit_autocast_pass)
         torch.set_default_dtype(self.previous_default_dtype)
-        super(TestJitTraceAutocast, self).tearDown()
+        super().tearDown()
 
     def test_generate_autocast_jit_trace_model(self):
         def test_generate_autocast_jit_trace_model(model, x):
@@ -821,11 +818,9 @@
 
     def test_cat_promote(self):
         class TestModel(torch.nn.Module):
-            def __init__(self):
-                super(TestModel, self).__init__()
-
             def forward(self, a, b):
                 return torch.cat([a, b], 0)
+
         with torch.jit.fuser("none"):
             # In this testcase, we will check whether cat has done the promotion in AMP with mixed dtype inputs.
             # To avoid the fusion group from TE, we will disable the fuser here.
diff --git a/test/test_jit_cuda_fuser.py b/test/test_jit_cuda_fuser.py
index 8f9b467..310bb29 100644
--- a/test/test_jit_cuda_fuser.py
+++ b/test/test_jit_cuda_fuser.py
@@ -117,7 +117,7 @@
 class TestCudaFuser(JitTestCase):
     def assertEqual(self, *args, **kwargs):
         kwargs["exact_layout"] = True
-        super(JitTestCase, self).assertEqual(*args, **kwargs)
+        super().assertEqual(*args, **kwargs)
 
     def _getSubgraphInFusion(self, graph):
         num_node = 0
@@ -137,7 +137,7 @@
         return ret[1]
 
     def setUp(self):
-        super(TestCudaFuser, self).setUp()
+        super().setUp()
 
         self.skip_node_list = []
         disabled_ops = ("aten::batch_norm",
@@ -191,7 +191,7 @@
 
         if(RUN_NVFUSER):
             self.cuda_fuser_options.restore()
-        super(TestCudaFuser, self).tearDown()
+        super().tearDown()
 
     def _run_helper(self, jit_op, op, *args, check_stride=False, num_fusion=1, check_runs=1):
         seed = 123
@@ -1432,7 +1432,7 @@
             __constants__ = ['reduction_axis', 'keepdim']
 
             def __init__(self):
-                super(MyReduction, self).__init__()
+                super().__init__()
                 self.reduction_axis = reduction_axis
                 self.keepdim = keepdim
 
@@ -1577,7 +1577,7 @@
             __constants__ = ['norm_shape']
 
             def __init__(self, elementwise_affine=True):
-                super(MyLayerNorm, self).__init__()
+                super().__init__()
                 self.norm_shape = norm_shape
                 if elementwise_affine:
                     self.weight = torch.randn(norm_shape, dtype=dtype, device=device)
@@ -1660,18 +1660,12 @@
                      *,
                      layer_dtype=torch.float32):
         class MyBatchNorm(torch.nn.Module):
-            def __init__(self):
-                super(MyBatchNorm, self).__init__()
-
             def forward(self, x: torch.Tensor, r_mean: torch.Tensor, r_var: torch.Tensor):
                 o = torch.nn.functional.batch_norm(x, r_mean, r_var, training=True)
                 o = torch.relu(o)
                 return o
 
         class MyInstanceNorm(torch.nn.Module):
-            def __init__(self):
-                super(MyInstanceNorm, self).__init__()
-
             def forward(self, x: torch.Tensor, r_mean: torch.Tensor, r_var: torch.Tensor):
                 o = torch.nn.functional.instance_norm(x, r_mean, r_var, use_input_stats=True)
                 o = torch.relu(o)
@@ -1824,7 +1818,7 @@
             __constants__ = ['reduction_axis']
 
             def __init__(self):
-                super(MySoftmax, self).__init__()
+                super().__init__()
                 self.reduction_axis = reduction_axis
 
             def forward(self, x: torch.Tensor, y: torch.Tensor):
@@ -1836,7 +1830,7 @@
             __constants__ = ['reduction_axis']
 
             def __init__(self):
-                super(MyLogSoftmax, self).__init__()
+                super().__init__()
                 self.reduction_axis = reduction_axis
 
             def forward(self, x: torch.Tensor, y: torch.Tensor):
@@ -3252,7 +3246,7 @@
 
         class MyModule(torch.nn.Module):
             def __init__(self, num_features=10, affine=True, track_running_stats=True):
-                super(MyModule, self).__init__()
+                super().__init__()
                 self.bn = torch.nn.BatchNorm2d(num_features,
                                                1e-5,
                                                affine=affine,
@@ -3510,7 +3504,7 @@
     def test_remove_output_used_only_in_dtype(self):
         class MyModule(torch.nn.Module):
             def __init__(self, num_features=4):
-                super(MyModule, self).__init__()
+                super().__init__()
                 self.bn0 = torch.nn.BatchNorm2d(num_features)
                 self.bn1 = torch.nn.BatchNorm2d(num_features)
 
@@ -3543,7 +3537,7 @@
     def test_fix_shape_expression_bn(self):
         class MyModule(torch.nn.Module):
             def __init__(self, num_features=4):
-                super(MyModule, self).__init__()
+                super().__init__()
                 self.bn = torch.nn.BatchNorm2d(num_features)
 
             def forward(self, x, y):
@@ -3651,7 +3645,7 @@
     def _bias_view_relu_helper(self, shape, output_shape, dtype, device, error):
         class BiasViewRelu(torch.nn.Module):
             def __init__(self):
-                super(BiasViewRelu, self).__init__()
+                super().__init__()
                 self.bias = torch.nn.Parameter(torch.randn(shape, dtype=dtype, device=device), requires_grad=False)
                 with torch.no_grad():
                     self.bias.fill_(10)
@@ -3690,7 +3684,7 @@
     def _alias_bias_view_relu_helper(self, shape, output_shape, dtype, device, error):
         class BiasViewRelu(torch.nn.Module):
             def __init__(self):
-                super(BiasViewRelu, self).__init__()
+                super().__init__()
                 self.bias = torch.nn.Parameter(torch.randn(shape, dtype=dtype, device=device), requires_grad=False)
                 with torch.no_grad():
                     self.bias.fill_(10)
@@ -3840,7 +3834,7 @@
     def _bias_flatten_relu_helper(self, shape, start_dim, end_dim, dtype, device, error):
         class BiasFlattenRelu(torch.nn.Module):
             def __init__(self):
-                super(BiasFlattenRelu, self).__init__()
+                super().__init__()
                 self.bias = torch.nn.Parameter(torch.randn(shape, dtype=dtype, device=device), requires_grad=False)
                 with torch.no_grad():
                     self.bias.fill_(10)
@@ -3860,7 +3854,7 @@
     def _alias_bias_flatten_relu_helper(self, shape, start_dim, end_dim, dtype, device, error):
         class BiasFlattenRelu(torch.nn.Module):
             def __init__(self):
-                super(BiasFlattenRelu, self).__init__()
+                super().__init__()
                 self.bias = torch.nn.Parameter(torch.randn(shape, dtype=dtype, device=device), requires_grad=False)
                 with torch.no_grad():
                     self.bias.fill_(10)
@@ -3938,7 +3932,7 @@
         # modeled after LTC linear layer
         class LTC(torch.nn.Module):
             def __init__(self):
-                super(LTC, self).__init__()
+                super().__init__()
                 self.weight = torch.nn.Parameter(torch.randn([1024, 1024], dtype=dtype, device=device), requires_grad=False)
                 self.bias = torch.nn.Parameter(torch.randn([1, 1024], dtype=dtype, device=device), requires_grad=False)
 
@@ -3975,9 +3969,6 @@
 
     def _bias_squeeze_relu_helper(self, shape, dtype, device, error):
         class BiasSqueezeRelu(torch.nn.Module):
-            def __init__(self):
-                super(BiasSqueezeRelu, self).__init__()
-
             def forward(self, inputs: torch.Tensor, bias: torch.Tensor):
                 o = inputs + bias
                 o = torch.squeeze(o)
@@ -4001,9 +3992,6 @@
 
     def _alias_bias_squeeze_relu_helper(self, shape, dtype, device, error):
         class BiasSqueezeRelu(torch.nn.Module):
-            def __init__(self):
-                super(BiasSqueezeRelu, self).__init__()
-
             def forward(self, inputs: torch.Tensor, bias: torch.Tensor):
                 o = torch.squeeze(inputs)
                 inputs.add_(bias)
@@ -4060,9 +4048,6 @@
 
     def _bias_unsqueeze_relu_helper(self, shape, dtype, device, error):
         class BiasUnsqueezeRelu(torch.nn.Module):
-            def __init__(self):
-                super(BiasUnsqueezeRelu, self).__init__()
-
             def forward(self, inputs: torch.Tensor, bias: torch.Tensor):
                 o = inputs + bias
                 o = torch.unsqueeze(o, 0)
@@ -4086,9 +4071,6 @@
 
     def _alias_bias_unsqueeze_relu_helper(self, shape, dtype, device, error):
         class BiasUnsqueezeRelu(torch.nn.Module):
-            def __init__(self):
-                super(BiasUnsqueezeRelu, self).__init__()
-
             def forward(self, inputs : torch.Tensor, bias : torch.Tensor):
                 o = torch.unsqueeze(inputs, 0)
                 inputs.add_(bias)
@@ -4760,9 +4742,6 @@
         old_guard = torch._C._jit_set_nvfuser_guard_mode(True)
 
         class ConvModule(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 return x.sin().sigmoid()
 
@@ -4931,9 +4910,6 @@
                      "Requires fusion optimization pass to be effective")
     def test_issue_1785(self):
         class Fusion(torch.nn.Module):
-            def __init__(self):
-                super(Fusion, self).__init__()
-
             def forward(self, x, a, b):
                 out = torch.mul(x.unsqueeze(-1), a)
                 out = out + b
diff --git a/test/test_jit_disabled.py b/test/test_jit_disabled.py
index 72d4146..6bb694b 100644
--- a/test/test_jit_disabled.py
+++ b/test/test_jit_disabled.py
@@ -46,9 +46,10 @@
     def test_attribute(self):
         _program_string = """
 import torch
+
 class Foo(torch.jit.ScriptModule):
     def __init__(self, x):
-        super(Foo, self).__init__()
+        super().__init__()
         self.x = torch.jit.Attribute(x, torch.Tensor)
 
     def forward(self, input):
@@ -64,8 +65,6 @@
 import torch
 
 class AModule(torch.jit.ScriptModule):
-    def __init__(self):
-        super(AModule, self).__init__()
     @torch.jit.script_method
     def forward(self, input):
         pass
@@ -80,9 +79,6 @@
 import torch
 
 class AModule(torch.nn.Module):
-    def __init__(self):
-        super(AModule, self).__init__()
-
     def forward(self, input):
         pass
 
diff --git a/test/test_jit_fuser.py b/test/test_jit_fuser.py
index ebdd2ee..ef3843d 100644
--- a/test/test_jit_fuser.py
+++ b/test/test_jit_fuser.py
@@ -512,7 +512,7 @@
     def test_fuse_decompose_normalization(self):
         class ResLike(torch.jit.ScriptModule):
             def __init__(self, norm_module):
-                super(ResLike, self).__init__()
+                super().__init__()
                 self.nm = norm_module
 
             @torch.jit.script_method
@@ -823,7 +823,7 @@
             __constants__ = ['d']
 
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.d = torch.device('cuda')
 
             @torch.jit.script_method
diff --git a/test/test_jit_fuser_te.py b/test/test_jit_fuser_te.py
index 08e2911..711a44b 100644
--- a/test/test_jit_fuser_te.py
+++ b/test/test_jit_fuser_te.py
@@ -969,7 +969,7 @@
             __constants__ = ['d']
 
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.d = torch.device('cuda')
 
             @torch.jit.script_method
@@ -1236,7 +1236,7 @@
 
         class MyMod(torch.nn.Module):
             def __init__(self, dtype):
-                super(MyMod, self).__init__()
+                super().__init__()
                 self.dtype = dtype
 
             def forward(self, x):
diff --git a/test/test_jit_llga_fuser.py b/test/test_jit_llga_fuser.py
index 12bd955..16e1bc4 100644
--- a/test/test_jit_llga_fuser.py
+++ b/test/test_jit_llga_fuser.py
@@ -174,7 +174,7 @@
     def test_eltwise(self, dtype):
         class M(nn.Module):
             def __init__(self, eltwise_fn):
-                super(M, self).__init__()
+                super().__init__()
                 self.eltwise = eltwise_fn
 
             def forward(self, x):
@@ -234,9 +234,6 @@
     @dtypes(torch.float32, torch.bfloat16)
     def test_variable_kernel_avg_pool2d(self, dtype):
         class M(nn.Module):
-            def __init__(self):
-                super(M, self).__init__()
-
             def forward(self, x):
                 x = F.avg_pool2d(x, kernel_size=(x.size(2), x.size(3)), padding=0, count_include_pad=False)
                 return x
@@ -387,7 +384,7 @@
     def test_conv2d_eltwise(self, dtype):
         class M(nn.Module):
             def __init__(self, eltwise_fn):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv1 = nn.Conv2d(32, 32, 3, padding=1, bias=True)
                 self.conv2 = nn.Conv2d(32, 32, 3, padding=1, bias=False)
                 self.eltwise = eltwise_fn
@@ -419,7 +416,7 @@
     def test_conv2d_silu(self, dtype):
         class M(nn.Module):
             def __init__(self, inplace):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv1 = nn.Conv2d(32, 32, 3, padding=1, bias=True)
                 self.conv2 = nn.Conv2d(32, 32, 3, padding=1, bias=True)
                 self.eltwise = nn.SiLU(inplace=inplace)
@@ -451,7 +448,7 @@
     def test_ensure_tensor_is_rewrapped(self, dtype):
         class M(nn.Module):
             def __init__(self, eltwise_fn):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv1 = nn.Conv2d(32, 32, 3, padding=1, bias=True)
                 self.conv2 = nn.Conv2d(32, 32, 3, padding=1, bias=True)
                 self.conv3 = nn.Conv2d(32, 32, 3, padding=1, bias=True)
@@ -490,7 +487,7 @@
     def test_conv2d_clamp(self, dtype):
         class M(nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv1 = nn.Conv2d(32, 32, 3, padding=1, bias=True)
                 self.conv2 = nn.Conv2d(32, 32, 3, padding=1, bias=True)
                 self.conv3 = nn.Conv2d(32, 32, 3, padding=1, bias=True)
@@ -523,7 +520,7 @@
     def test_conv2d_bn(self, dtype):
         class M(nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv1 = nn.Conv2d(32, 32, 3, padding=1, bias=True)
                 self.bn1 = nn.BatchNorm2d(32)
 
@@ -545,7 +542,7 @@
     def test_conv2d_bn_relu(self, dtype):
         class M(nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv1 = nn.Conv2d(32, 32, 3, padding=1, bias=True)
                 self.bn1 = nn.BatchNorm2d(32)
 
@@ -569,7 +566,7 @@
     def test_bn2d_eltwise(self, dtype):
         class M(nn.Module):
             def __init__(self, eltwise_fn):
-                super(M, self).__init__()
+                super().__init__()
                 self.eltwise = eltwise_fn
                 self.bn = nn.BatchNorm2d(32)
 
@@ -591,7 +588,7 @@
     def test_linear_eltwise(self, dtype):
         class M(nn.Module):
             def __init__(self, eltwise_fn, bias):
-                super(M, self).__init__()
+                super().__init__()
                 self.linear = nn.Linear(28, 64, bias)
                 self.eltwise = eltwise_fn
 
@@ -616,7 +613,7 @@
     def test_conv2d_sum(self, dtype):
         class M(nn.Module):
             def __init__(self, bias=False):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv1 = nn.Conv2d(32, 32, 3, padding=1, bias=bias)
                 self.bn1 = nn.BatchNorm2d(32)
                 self.conv2 = nn.Conv2d(32, 32, 3, padding=1, bias=bias)
@@ -649,7 +646,7 @@
     def test_wildcard(self, dtype):
         class M(nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv1 = nn.Conv2d(32, 32, 3, padding=1, bias=True)
                 self.eltwise = nn.ReLU()
 
@@ -678,9 +675,6 @@
     @dtypes(torch.int32)
     def test_wildcard_unsupported_dtype(self, dtype):
         class M(nn.Module):
-            def __init__(self):
-                super(M, self).__init__()
-
             def forward(self, x):
                 y = x // 2
                 return y
@@ -703,7 +697,7 @@
     def test_rewrap_tensor_input_to_pytorch(self, dtype):
         class M(nn.Module):
             def __init__(self, eltwise_fn):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv1 = nn.Conv2d(32, 32, 3, padding=1, bias=True)
                 self.conv2 = nn.Conv2d(32, 32, 3, padding=1, bias=True)
                 self.eltwise = eltwise_fn
diff --git a/test/test_metal.py b/test/test_metal.py
index 35b3ed4..6b9b29e 100644
--- a/test/test_metal.py
+++ b/test/test_metal.py
@@ -64,7 +64,7 @@
 
         class Conv2D(torch.nn.Module):
             def __init__(self):
-                super(Conv2D, self).__init__()
+                super().__init__()
                 self.weight = torch.nn.Parameter(torch.rand(conv_weight_shape), requires_grad=False)
                 self.bias = torch.nn.Parameter(torch.rand(conv_bias_shape), requires_grad=False)
                 self.strides = strides
@@ -84,7 +84,7 @@
 
         class Conv2DRelu(torch.nn.Module):
             def __init__(self):
-                super(Conv2DRelu, self).__init__()
+                super().__init__()
                 self.weight = torch.nn.Parameter(torch.rand(conv_weight_shape), requires_grad=False)
                 self.bias = torch.nn.Parameter(torch.rand(conv_bias_shape), requires_grad=False)
                 self.strides = strides
@@ -123,7 +123,7 @@
 
         class Conv2DHardtanh(torch.nn.Module):
             def __init__(self):
-                super(Conv2DHardtanh, self).__init__()
+                super().__init__()
                 self.weight = torch.nn.Parameter(torch.rand(conv_weight_shape), requires_grad=False)
                 self.bias = torch.nn.Parameter(torch.rand(conv_bias_shape), requires_grad=False)
                 self.strides = strides
diff --git a/test/test_mkldnn_fusion.py b/test/test_mkldnn_fusion.py
index 4a176ae..fad3e77 100644
--- a/test/test_mkldnn_fusion.py
+++ b/test/test_mkldnn_fusion.py
@@ -62,7 +62,7 @@
     def test_single_conv(self):
         class M(nn.Module):
             def __init__(self, in_channels, out_channels, bias, **kwargs):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(in_channels, out_channels, bias=bias, **kwargs)
 
             def forward(self, x):
@@ -101,7 +101,7 @@
     def test_conv_unary_fusion_nnc(self):
         class M(nn.Module):
             def __init__(self, unary_fn, in_channels, out_channels, bias, **kwargs):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(in_channels, out_channels, bias=bias, **kwargs)
                 self.unary = unary_fn
 
@@ -130,7 +130,7 @@
     def test_unsupported_conv(self):
         class M(nn.Module):
             def __init__(self, m, in_channels, out_channels, bias, **kwargs):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = m(in_channels, out_channels, bias=bias, **kwargs)
 
             def forward(self, x):
@@ -193,7 +193,7 @@
     def test_linear_unary_fusion_ops(self):
         class M(nn.Module):
             def __init__(self, unary_fn, in_channels, out_channels, bias, **kwargs):
-                super(M, self).__init__()
+                super().__init__()
                 self.linear = torch.nn.Linear(
                     in_channels, out_channels, bias=bias, **kwargs
                 )
@@ -223,7 +223,7 @@
     def test_conv_unary_fusion_ops(self):
         class M(nn.Module):
             def __init__(self, unary_fn, dim, in_channels, out_channels, dilation, groups, bias, **kwargs):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = CONV_MODULES[dim](in_channels, out_channels, dilation=dilation, groups=groups, bias=bias, **kwargs)
                 self.unary = unary_fn
 
@@ -259,7 +259,7 @@
     def test_conv_binary_fusion_ops(self):
         class M(nn.Module):
             def __init__(self, binary_fn, dim, in_channels, out_channels, dilation, groups, bias, **kwargs):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv = CONV_MODULES[dim](in_channels, out_channels, dilation=dilation, groups=groups, bias=bias, **kwargs)
                 self.binary = binary_fn
 
@@ -307,7 +307,7 @@
     def test_linear_binary_fusion_ops(self):
         class M(nn.Module):
             def __init__(self, binary_fn, in_channels, out_channels, bias, **kwargs):
-                super(M, self).__init__()
+                super().__init__()
                 self.linear = torch.nn.Linear(
                     in_channels, out_channels, bias=bias, **kwargs
                 )
@@ -336,7 +336,7 @@
     def test_conv_transpose_unary_fusion_ops(self):
         class M(nn.Module):
             def __init__(self, unary_fn, dim, in_channels, out_channels, kernel_size, **kwargs):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv_transpose = CONV_TRANSPOSE_MODULES[dim](in_channels, out_channels, kernel_size, **kwargs)
                 self.unary = unary_fn
 
diff --git a/test/test_mobile_optimizer.py b/test/test_mobile_optimizer.py
index e77fce3..a6c0a06 100644
--- a/test/test_mobile_optimizer.py
+++ b/test/test_mobile_optimizer.py
@@ -54,7 +54,7 @@
 
         class MyTestModule(torch.nn.Module):
             def __init__(self):
-                super(MyTestModule, self).__init__()
+                super().__init__()
                 self.conv_weight = torch.nn.Parameter(torch.rand(conv_weight_shape))
                 self.conv_bias = torch.nn.Parameter(torch.rand((conv_bias_shape)))
                 self.linear_weight = torch.nn.Parameter(torch.rand(linear_weight_shape))
@@ -86,7 +86,7 @@
 
         class BNTestModule(torch.nn.Module):
             def __init__(self):
-                super(BNTestModule, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(1, 20, 5, 1)
                 self.bn = torch.nn.BatchNorm2d(num_features=20)
                 self.bn.eps = 0.0023
@@ -167,7 +167,7 @@
 
         class MyMobileOptimizedTagTest(torch.nn.Module):
             def __init__(self):
-                super(MyMobileOptimizedTagTest, self).__init__()
+                super().__init__()
                 self.linear_weight = torch.nn.Parameter(torch.rand(linear_weight_shape))
                 self.linear_bias = torch.nn.Parameter(torch.rand((weight_output_dim)))
 
@@ -184,7 +184,7 @@
 
         class MyPreserveMethodsTest(torch.nn.Module):
             def __init__(self):
-                super(MyPreserveMethodsTest, self).__init__()
+                super().__init__()
                 self.linear_weight = torch.nn.Parameter(torch.rand(linear_weight_shape))
                 self.linear_bias = torch.nn.Parameter(torch.rand((weight_output_dim)))
 
@@ -208,7 +208,7 @@
 
         class OptimizeNoForwardTest(torch.nn.Module):
             def __init__(self):
-                super(OptimizeNoForwardTest, self).__init__()
+                super().__init__()
                 self.l = nn.Linear(10, 100)
                 self.l2 = nn.Linear(100, 1)
                 self.d = nn.Dropout(p=0.2)
@@ -234,7 +234,7 @@
 
         class BNTestNoForwardModule(torch.nn.Module):
             def __init__(self):
-                super(BNTestNoForwardModule, self).__init__()
+                super().__init__()
                 self.conv = torch.nn.Conv2d(1, 20, 5, 1)
                 self.bn = torch.nn.BatchNorm2d(num_features=20)
                 self.bn.eps = 0.0023
@@ -273,7 +273,7 @@
 
         class Child(nn.Module):
             def __init__(self):
-                super(Child, self).__init__()
+                super().__init__()
                 self.conv2 = nn.Conv2d(1, 1, 1)
 
             def forward(self, x):
@@ -282,7 +282,7 @@
 
         class Parent(nn.Module):
             def __init__(self):
-                super(Parent, self).__init__()
+                super().__init__()
                 self.quant = torch.ao.quantization.QuantStub()
                 self.conv1 = nn.Conv2d(1, 1, 1)
                 self.child = Child()
@@ -308,7 +308,7 @@
     def test_generate_mobile_module_lints(self):
         class MyTestModule(torch.nn.Module):
             def __init__(self):
-                super(MyTestModule, self).__init__()
+                super().__init__()
                 self.fc = torch.nn.Linear(4, 4)
                 self.dropout = torch.nn.Dropout(p=0.5)
 
@@ -319,7 +319,7 @@
 
         class MyBNModule(torch.nn.Module):
             def __init__(self):
-                super(MyBNModule, self).__init__()
+                super().__init__()
                 self.bn = torch.nn.BatchNorm2d(4, affine=True)
 
             def forward(self, inputs):
@@ -327,9 +327,6 @@
                 return bn
 
         class MyBundledInputModule(torch.nn.Module):
-            def __init__(self):
-                super(MyBundledInputModule, self).__init__()
-
             def forward(self, inputs):
                 return inputs
 
@@ -359,16 +356,10 @@
     @skipIfNoXNNPACK
     def test_preserve_bundled_inputs_methods(self):
         class MyBundledInputModule(torch.nn.Module):
-            def __init__(self):
-                super(MyBundledInputModule, self).__init__()
-
             def forward(self, inputs):
                 return inputs
 
         class MyIncompleteBundledInputModule(torch.nn.Module):
-            def __init__(self):
-                super(MyIncompleteBundledInputModule, self).__init__()
-
             def forward(self, inputs):
                 return inputs
 
@@ -419,7 +410,7 @@
 
         class Standalone(nn.Module):
             def __init__(self):
-                super(Standalone, self).__init__()
+                super().__init__()
                 self.quant = torch.ao.quantization.QuantStub()
                 self.conv1 = nn.Conv2d(1, 1, 1)
                 self.conv2 = nn.Conv2d(1, 1, 1)
@@ -440,7 +431,7 @@
 
         class Child(nn.Module):
             def __init__(self):
-                super(Child, self).__init__()
+                super().__init__()
                 self.conv1 = nn.Conv2d(1, 1, 1)
 
             def forward(self, x):
@@ -449,7 +440,7 @@
 
         class Parent(nn.Module):
             def __init__(self):
-                super(Parent, self).__init__()
+                super().__init__()
                 self.quant = torch.ao.quantization.QuantStub()
                 self.conv1 = nn.Conv2d(1, 1, 1)
                 self.child = Child()
@@ -521,7 +512,7 @@
     def test_clone_module_with_class(self):
         class MyInnerTestModule(torch.nn.Module):
             def __init__(self):
-                super(MyInnerTestModule, self).__init__()
+                super().__init__()
                 self.pqr = torch.Tensor([10., 20., 30.])
 
             def forward(self, inputs):
@@ -533,7 +524,7 @@
 
         class MyTestModule(torch.nn.Module):
             def __init__(self):
-                super(MyTestModule, self).__init__()
+                super().__init__()
                 self.abc = 23
                 self.pqr = torch.Tensor([1., 2., 3.])
                 self.inner = MyInnerTestModule()
diff --git a/test/test_mps.py b/test/test_mps.py
index 2ee068c..a8d17ba 100644
--- a/test/test_mps.py
+++ b/test/test_mps.py
@@ -6229,13 +6229,13 @@
     def _create_basic_net(self):
         class Layer(nn.Module):
             def __init__(self):
-                super(Layer, self).__init__()
+                super().__init__()
                 self.layer_dummy_param = Parameter(torch.empty(3, 5))
                 self.register_buffer('layer_dummy_buf', torch.zeros(1, 3, 3, 7))
 
         class Net(nn.Module):
             def __init__(self):
-                super(Net, self).__init__()
+                super().__init__()
                 self.l1 = Layer()
                 self.dummy_param = Parameter(torch.empty(3, 5))
                 self.register_buffer('dummy_buf', torch.zeros(7, 3, 3, 1))
diff --git a/test/test_multiprocessing.py b/test/test_multiprocessing.py
index 65a9dc7..5349092 100644
--- a/test/test_multiprocessing.py
+++ b/test/test_multiprocessing.py
@@ -34,7 +34,7 @@
 
 class SubProcess(mp.Process):
     def __init__(self, tensor):
-        super(SubProcess, self).__init__()
+        super().__init__()
         self.tensor = tensor
         self.daemon = True
 
diff --git a/test/test_nn.py b/test/test_nn.py
index 9b85151..fc1d623 100644
--- a/test/test_nn.py
+++ b/test/test_nn.py
@@ -172,12 +172,10 @@
                 self.mixin_init = True
 
         class MyModuleWithMixinBefore(MyMixin, nn.Module):
-            def __init__(self):
-                super().__init__()
+            pass
 
         class MyModuleWithMixinAfter(nn.Module, MyMixin):
-            def __init__(self):
-                super().__init__()
+            pass
 
         self.assertTrue(hasattr(MyModuleWithMixinBefore(), 'mixin_init'))
         self.assertFalse(hasattr(MyModuleWithMixinAfter(), 'mixin_init'))
@@ -197,7 +195,7 @@
     def test_share_memory(self):
         class Net(nn.Module):
             def __init__(self):
-                super(Net, self).__init__()
+                super().__init__()
                 self.p = nn.Parameter(torch.eye(5))
                 self.par = nn.ParameterList()
                 self.par.append(nn.Parameter(torch.randn(10)))
@@ -379,7 +377,7 @@
     def test_call_supports_python_dict_output(self):
         class Net(nn.Module):
             def __init__(self):
-                super(Net, self).__init__()
+                super().__init__()
                 self.l1 = nn.Linear(10, 20)
                 self.register_backward_hook(self.hook)
                 self.check_backward_hook_flag = False
@@ -407,7 +405,7 @@
     def test_train_errors_for_invalid_mode(self):
         class SubclassNet(nn.Module):
             def __init__(self):
-                super(SubclassNet, self).__init__()
+                super().__init__()
                 self.l1 = nn.Linear(2, 2)
 
             def forward(self, inputs):
@@ -480,7 +478,7 @@
     def test_modules(self):
         class Net(nn.Module):
             def __init__(self):
-                super(Net, self).__init__()
+                super().__init__()
                 self.l1 = l
                 self.l2 = l
                 self.param = torch.empty(3, 5)
@@ -493,7 +491,7 @@
     def test_named_modules(self):
         class Net(nn.Module):
             def __init__(self):
-                super(Net, self).__init__()
+                super().__init__()
                 self.l1 = l
                 self.l2 = l
                 self.param = torch.empty(3, 5)
@@ -2472,7 +2470,7 @@
 
         class CustomState(nn.Module):
             def __init__(self):
-                super(CustomState, self).__init__()
+                super().__init__()
                 self.param = torch.nn.Parameter(torch.ones(1))
                 self.sub = torch.nn.Linear(5, 5)
 
@@ -2562,9 +2560,6 @@
     def test_extra_state_missing_set_extra_state(self):
 
         class MyModule(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def get_extra_state(self):
                 return {
                     'foo': 5
@@ -2577,9 +2572,6 @@
     def test_extra_state_missing_get_extra_state(self):
 
         class MyModule(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def set_extra_state(self):
                 pass
 
@@ -2690,7 +2682,7 @@
     def test_container_copy(self):
         class Model(nn.Module):
             def __init__(self):
-                super(Model, self).__init__()
+                super().__init__()
                 self.linear = nn.Linear(4, 5)
 
             def forward(self, input):
@@ -7284,7 +7276,7 @@
 
 class UnpoolingNet(nn.Module):
     def __init__(self, pool, unpool):
-        super(UnpoolingNet, self).__init__()
+        super().__init__()
         self.pool = pool
         self.unpool = unpool
 
@@ -11521,7 +11513,7 @@
     def test_clip_grad_norm_multi_device(self, devices, foreach):
         class TestModel(nn.Module):
             def __init__(self):
-                super(TestModel, self).__init__()
+                super().__init__()
                 self.layer1 = nn.Linear(10, 10)
                 self.layer2 = nn.Linear(10, 10)
 
diff --git a/test/test_nnapi.py b/test/test_nnapi.py
index 60f2c89..ebc066d 100644
--- a/test/test_nnapi.py
+++ b/test/test_nnapi.py
@@ -393,9 +393,6 @@
 
     def test_detach(self):
         class DetachModule(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
             def forward(self, x):
                 y = x.detach()
                 return torch.nn.functional.relu(y)
diff --git a/test/test_optim.py b/test/test_optim.py
index 3ea7b49..3c0e18d 100644
--- a/test/test_optim.py
+++ b/test/test_optim.py
@@ -1792,7 +1792,7 @@
 
 class SchedulerTestNet(torch.nn.Module):
     def __init__(self):
-        super(SchedulerTestNet, self).__init__()
+        super().__init__()
         self.conv1 = torch.nn.Conv2d(1, 1, 1)
         self.conv2 = torch.nn.Conv2d(1, 1, 1)
 
@@ -1818,7 +1818,7 @@
     exact_dtype = True
 
     def setUp(self):
-        super(TestLRScheduler, self).setUp()
+        super().setUp()
         self.net = SchedulerTestNet()
         self.opt = SGD(
             [
@@ -3967,7 +3967,7 @@
 
 class SWATestDNN(torch.nn.Module):
     def __init__(self, input_features):
-        super(SWATestDNN, self).__init__()
+        super().__init__()
         self.n_features = 100
         self.fc1 = torch.nn.Linear(input_features, self.n_features)
         self.bn = torch.nn.BatchNorm1d(self.n_features)
@@ -3983,7 +3983,7 @@
 
 class SWATestCNN(torch.nn.Module):
     def __init__(self, input_channels):
-        super(SWATestCNN, self).__init__()
+        super().__init__()
         self.n_features = 10
         self.conv1 = torch.nn.Conv2d(
             input_channels, self.n_features, kernel_size=3, padding=1
diff --git a/test/test_serialization.py b/test/test_serialization.py
index 2a19af1..d03bc88 100644
--- a/test/test_serialization.py
+++ b/test/test_serialization.py
@@ -873,7 +873,7 @@
 
     def run(self, *args, **kwargs):
         with serialization_method(use_zip=False):
-            return super(TestOldSerialization, self).run(*args, **kwargs)
+            return super().run(*args, **kwargs)
 
 
 class TestSerialization(TestCase, SerializationMixin):
@@ -1012,7 +1012,7 @@
 
     def run(self, *args, **kwargs):
         with serialization_method(use_zip=True):
-            return super(TestSerialization, self).run(*args, **kwargs)
+            return super().run(*args, **kwargs)
 
 
 class TestWrapperSubclass(torch.Tensor):
diff --git a/test/test_sparse.py b/test/test_sparse.py
index c466dd2..bd37c21 100644
--- a/test/test_sparse.py
+++ b/test/test_sparse.py
@@ -61,7 +61,7 @@
 
 class CrossRefSparseFakeMode(torch._subclasses.CrossRefFakeMode):
     def __init__(self):
-        super(CrossRefSparseFakeMode, self).__init__(
+        super().__init__(
             self.ignore_op, check_strides=False,
             check_aliasing=False,
         )  # TODO: enable stride/alias checking
diff --git a/test/test_static_runtime.py b/test/test_static_runtime.py
index b3087ee..032e677 100644
--- a/test/test_static_runtime.py
+++ b/test/test_static_runtime.py
@@ -178,7 +178,7 @@
 
 class SubModule(nn.Module):
     def __init__(self):
-        super(SubModule, self).__init__()
+        super().__init__()
         self.a = 11
         self.b = 2
 
@@ -188,7 +188,7 @@
 
 class SubModule2(nn.Module):
     def __init__(self):
-        super(SubModule2, self).__init__()
+        super().__init__()
         self.a = 12
         self.b = 2
 
@@ -199,7 +199,7 @@
 
 class TestModule(nn.Module):
     def __init__(self):
-        super(TestModule, self).__init__()
+        super().__init__()
         self.sub1 = SubModule()
         self.sub2 = SubModule2()
         self.a = 3
diff --git a/test/test_tensorboard.py b/test/test_tensorboard.py
index f69e79c..15031c7 100644
--- a/test/test_tensorboard.py
+++ b/test/test_tensorboard.py
@@ -67,7 +67,7 @@
         return SummaryWriter(temp_dir)
 
     def tearDown(self):
-        super(BaseTestCase, self).tearDown()
+        super().tearDown()
         # Remove directories created by SummaryWriter
         for temp_dir in self.temp_dirs:
             if os.path.exists(temp_dir):
@@ -562,7 +562,7 @@
 
         class myLinear(torch.nn.Module):
             def __init__(self):
-                super(myLinear, self).__init__()
+                super().__init__()
                 self.l = torch.nn.Linear(3, 5)
 
             def forward(self, x):
@@ -682,7 +682,7 @@
         # the add_graph call and still continue.
         class myMLP(torch.nn.Module):
             def __init__(self):
-                super(myMLP, self).__init__()
+                super().__init__()
                 self.input_len = 1 * 28 * 28
                 self.fc1 = torch.nn.Linear(self.input_len, 1200)
                 self.fc2 = torch.nn.Linear(1200, 1200)
diff --git a/test/test_tensorexpr.py b/test/test_tensorexpr.py
index cf894f3..e58b577 100644
--- a/test/test_tensorexpr.py
+++ b/test/test_tensorexpr.py
@@ -15,14 +15,14 @@
 
 class BaseTestClass(JitTestCase):
     def setUp(self):
-        super(BaseTestClass, self).setUp()
+        super().setUp()
         self.tensorexpr_options = TensorExprTestOptions()
         self.devices = ['cpu'] if not torch.cuda.is_available() else ['cpu', 'cuda']
         self.dtypes = [torch.float32, torch.bfloat16] if LLVM_ENABLED else [torch.float32]
 
     def tearDown(self):
         self.tensorexpr_options.restore()
-        super(BaseTestClass, self).tearDown()
+        super().tearDown()
 
     def assertLastGraphAllFused(self):
         self.assertAllFused(torch.jit.last_executed_optimized_graph())
@@ -1532,7 +1532,7 @@
     def test_alias_analysis_module(self):
         class AliasModule(nn.Module):
             def __init__(self):
-                super(AliasModule, self).__init__()
+                super().__init__()
                 torch.manual_seed(1337)
                 self.a = torch.randn(128, 128)
                 self.b = torch.randn(128, 128)
@@ -1570,7 +1570,7 @@
     def test_alias_analysis_inputs(self):
         class AliasModule(nn.Module):
             def __init__(self):
-                super(AliasModule, self).__init__()
+                super().__init__()
                 torch.manual_seed(1337)
                 self.a = torch.randn(128, 128)
                 self.b = torch.randn(128, 128)
@@ -1603,7 +1603,7 @@
     def test_alias_analysis_input_and_module(self):
         class AliasModule(nn.Module):
             def __init__(self):
-                super(AliasModule, self).__init__()
+                super().__init__()
                 torch.manual_seed(1337)
                 self.a = torch.randn(128, 128)
                 self.b = torch.randn(128, 128)
diff --git a/test/test_throughput_benchmark.py b/test/test_throughput_benchmark.py
index 75003c9..1bfdab9 100644
--- a/test/test_throughput_benchmark.py
+++ b/test/test_throughput_benchmark.py
@@ -7,7 +7,7 @@
 
 class TwoLayerNet(torch.jit.ScriptModule):
     def __init__(self, D_in, H, D_out):
-        super(TwoLayerNet, self).__init__()
+        super().__init__()
         self.linear1 = torch.nn.Linear(D_in, H)
         self.linear2 = torch.nn.Linear(2 * H, D_out)
 
@@ -21,7 +21,7 @@
 
 class TwoLayerNetModule(torch.nn.Module):
     def __init__(self, D_in, H, D_out):
-        super(TwoLayerNetModule, self).__init__()
+        super().__init__()
         self.linear1 = torch.nn.Linear(D_in, H)
         self.linear2 = torch.nn.Linear(2 * H, D_out)
 
diff --git a/test/test_utils.py b/test/test_utils.py
index adb74d4..184e2d3 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -104,7 +104,7 @@
         class Net(nn.Module):
 
             def __init__(self):
-                super(Net, self).__init__()
+                super().__init__()
                 self.counter = 0
 
             def forward(self, input_var):
@@ -190,7 +190,7 @@
     def test_checkpoint_module_list(self):
         class ModuleListNet(nn.Module):
             def __init__(self):
-                super(ModuleListNet, self).__init__()
+                super().__init__()
                 module_list = [
                     nn.Linear(100, 50),
                     nn.ReLU(),
diff --git a/test/test_vulkan.py b/test/test_vulkan.py
index 37b52d3..a9093f4 100644
--- a/test/test_vulkan.py
+++ b/test/test_vulkan.py
@@ -67,7 +67,7 @@
 
         class Conv2D(torch.nn.Module):
             def __init__(self):
-                super(Conv2D, self).__init__()
+                super().__init__()
                 self.weight = torch.nn.Parameter(torch.rand(conv_weight_shape), requires_grad=False)
                 self.bias = torch.nn.Parameter(torch.rand(conv_bias_shape), requires_grad=False)
                 self.strides = strides
@@ -87,7 +87,7 @@
 
         class Conv2DRelu(torch.nn.Module):
             def __init__(self):
-                super(Conv2DRelu, self).__init__()
+                super().__init__()
                 self.weight = torch.nn.Parameter(torch.rand(conv_weight_shape), requires_grad=False)
                 self.bias = torch.nn.Parameter(torch.rand(conv_bias_shape), requires_grad=False)
                 self.strides = strides
@@ -126,7 +126,7 @@
 
         class Conv2DHardtanh(torch.nn.Module):
             def __init__(self):
-                super(Conv2DHardtanh, self).__init__()
+                super().__init__()
                 self.weight = torch.nn.Parameter(torch.rand(conv_weight_shape), requires_grad=False)
                 self.bias = torch.nn.Parameter(torch.rand(conv_bias_shape), requires_grad=False)
                 self.strides = strides
diff --git a/test/test_xnnpack_integration.py b/test/test_xnnpack_integration.py
index 17ac2d9..ab764a6 100644
--- a/test/test_xnnpack_integration.py
+++ b/test/test_xnnpack_integration.py
@@ -191,7 +191,7 @@
     def test_linear(self, batch_size, data_shape, weight_output_dim, use_bias):
         class Linear(torch.nn.Module):
             def __init__(self, weight, bias=None):
-                super(Linear, self).__init__()
+                super().__init__()
                 self.weight = weight
                 self.bias = bias
 
@@ -200,7 +200,7 @@
 
         class LinearPrePacked(torch.nn.Module):
             def __init__(self, weight, bias=None):
-                super(LinearPrePacked, self).__init__()
+                super().__init__()
                 self.packed_weight_bias = torch.ops.prepacked.linear_clamp_prepack(weight, bias)
 
             def forward(self, x):
@@ -266,7 +266,7 @@
                     format):
         class Conv2D(torch.nn.Module):
             def __init__(self, weight, bias, strides, paddings, dilations, groups):
-                super(Conv2D, self).__init__()
+                super().__init__()
                 self.weight = weight
                 self.bias = bias
                 self.strides = strides
@@ -280,7 +280,7 @@
 
         class Conv2DPrePacked(torch.nn.Module):
             def __init__(self, weight, bias, strides, paddings, dilations, groups):
-                super(Conv2DPrePacked, self).__init__()
+                super().__init__()
                 self.packed_weight_bias = torch.ops.prepacked.conv2d_clamp_prepack(weight, bias,
                                                                                    strides, paddings, dilations, groups)
 
@@ -367,7 +367,7 @@
                               format):
         class Conv2DT(torch.nn.Module):
             def __init__(self, weight, bias, strides, paddings, output_paddings, dilations, groups):
-                super(Conv2DT, self).__init__()
+                super().__init__()
                 self.weight = weight
                 self.bias = bias
                 self.strides = strides
@@ -382,7 +382,7 @@
 
         class Conv2DTPrePacked(torch.nn.Module):
             def __init__(self, weight, bias, strides, paddings, output_paddings, dilations, groups):
-                super(Conv2DTPrePacked, self).__init__()
+                super().__init__()
                 self.packed_weight_bias = torch.ops.prepacked.conv2d_transpose_clamp_prepack(weight, bias,
                                                                                              strides, paddings,
                                                                                              output_paddings,
@@ -475,7 +475,7 @@
         class M(torch.nn.Module):
             def __init__(self, conv_weight, conv_bias, linear_weight, linear_bias,
                          strides, paddings, dilations, groups):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv_weight = conv_weight
                 self.conv_bias = conv_bias
                 self.linear_weight = linear_weight
@@ -495,7 +495,7 @@
         class MPrePacked(torch.nn.Module):
             def __init__(self, conv_weight, conv_bias, linear_weight, linear_bias,
                          strides, paddings, dilations, groups):
-                super(MPrePacked, self).__init__()
+                super().__init__()
                 self.conv2d_clamp_run_weight_bias = \
                     torch.ops.prepacked.conv2d_clamp_prepack(conv_weight, conv_bias,
                                                              strides, paddings, dilations, groups)
@@ -623,7 +623,7 @@
 
         class Linear(torch.nn.Module):
             def __init__(self):
-                super(Linear, self).__init__()
+                super().__init__()
                 self.weight = torch.nn.Parameter(torch.rand(weight_shape), requires_grad=False)
                 self.bias = torch.nn.Parameter(torch.rand((weight_output_dim)), requires_grad=False)
 
@@ -632,7 +632,7 @@
 
         class LinearNoBias(torch.nn.Module):
             def __init__(self):
-                super(LinearNoBias, self).__init__()
+                super().__init__()
                 self.weight = torch.nn.Parameter(torch.rand(weight_shape), requires_grad=False)
 
             def forward(self, x):
@@ -670,7 +670,7 @@
 
         class Conv2D(torch.nn.Module):
             def __init__(self):
-                super(Conv2D, self).__init__()
+                super().__init__()
                 self.weight = torch.nn.Parameter(torch.rand(conv_weight_shape), requires_grad=False)
                 self.bias = torch.nn.Parameter(torch.rand(conv_bias_shape), requires_grad=False)
                 self.strides = strides
@@ -684,7 +684,7 @@
 
         class Conv2DT(torch.nn.Module):
             def __init__(self):
-                super(Conv2DT, self).__init__()
+                super().__init__()
                 self.weight = torch.nn.Parameter(torch.rand(conv_transpose_weight_shape), requires_grad=False)
                 self.bias = torch.nn.Parameter(torch.rand(conv_bias_shape), requires_grad=False)
                 self.strides = strides
@@ -720,7 +720,7 @@
 
         class M(torch.nn.Module):
             def __init__(self, activation_fn=F.relu):
-                super(M, self).__init__()
+                super().__init__()
                 self.conv_weight = torch.nn.Parameter(torch.rand(conv_weight_shape), requires_grad=False)
                 self.conv_bias = torch.nn.Parameter(torch.rand((conv_bias_shape)), requires_grad=False)
                 self.linear_weight = torch.nn.Parameter(torch.rand(linear_weight_shape), requires_grad=False)
@@ -832,7 +832,7 @@
 
         class MFusionAntiPattern(torch.nn.Module):
             def __init__(self):
-                super(MFusionAntiPattern, self).__init__()
+                super().__init__()
                 self.linear_weight = torch.nn.Parameter(torch.rand(linear_weight_shape), requires_grad=False)
                 self.linear_bias = torch.nn.Parameter(torch.rand((weight_output_dim)), requires_grad=False)
                 self.strides = strides
@@ -860,7 +860,7 @@
 
         class MFusionAntiPatternParamMinMax(torch.nn.Module):
             def __init__(self):
-                super(MFusionAntiPatternParamMinMax, self).__init__()
+                super().__init__()
                 self.linear_weight = torch.nn.Parameter(torch.rand(linear_weight_shape), requires_grad=False)
                 self.linear_bias = torch.nn.Parameter(torch.rand((weight_output_dim)), requires_grad=False)
                 self.strides = strides
@@ -893,7 +893,7 @@
 
         class DecomposedLinearAddmm(torch.nn.Module):
             def __init__(self):
-                super(DecomposedLinearAddmm, self).__init__()
+                super().__init__()
                 self.weight = torch.nn.Parameter(torch.rand(weight_shape), requires_grad=False)
                 self.bias = torch.nn.Parameter(torch.rand((weight_output_dim)), requires_grad=False)
 
@@ -903,7 +903,7 @@
 
         class DecomposedLinearMatmulAdd(torch.nn.Module):
             def __init__(self):
-                super(DecomposedLinearMatmulAdd, self).__init__()
+                super().__init__()
                 self.weight = torch.nn.Parameter(torch.rand(weight_shape), requires_grad=False)
                 self.bias = torch.nn.Parameter(torch.rand((weight_output_dim)), requires_grad=False)
 
@@ -915,7 +915,7 @@
 
         class DecomposedLinearMatmul(torch.nn.Module):
             def __init__(self):
-                super(DecomposedLinearMatmul, self).__init__()
+                super().__init__()
                 self.weight = torch.nn.Parameter(torch.rand(weight_shape), requires_grad=False)
                 self.bias = torch.nn.Parameter(torch.rand((weight_output_dim)), requires_grad=False)
 
@@ -1018,7 +1018,7 @@
 
             class Conv1D(torch.nn.Module):
                 def __init__(self):
-                    super(Conv1D, self).__init__()
+                    super().__init__()
                     self.weight = torch.nn.Parameter(torch.rand(conv_weight_shape), requires_grad=False)
                     self.bias = torch.nn.Parameter(torch.rand(conv_bias_shape), requires_grad=False)
                     self.stride = stride
@@ -1080,7 +1080,7 @@
 
             class Net(torch.nn.Module):
                 def __init__(self):
-                    super(Net, self).__init__()
+                    super().__init__()
                     self.conv_weight = torch.nn.Parameter(torch.rand(conv_weight_shape), requires_grad=False)
                     self.conv_bias = torch.nn.Parameter(torch.rand(conv_bias_shape), requires_grad=False)
                     self.stride = stride
diff --git a/torch/_dynamo/test_minifier_common.py b/torch/_dynamo/test_minifier_common.py
index 9a1e580..247e73f 100644
--- a/torch/_dynamo/test_minifier_common.py
+++ b/torch/_dynamo/test_minifier_common.py
@@ -31,12 +31,6 @@
         cls._debug_dir_obj.cleanup()
         cls._exit_stack.close()
 
-    def setUp(self):
-        super().setUp()
-
-    def tearDown(self):
-        super().tearDown()
-
     # Search for the name of the first function defined in a code string.
     def _get_fn_name(self, code):
         fn_name_match = re.search(r"def (\w+)\(", code)
diff --git a/torch/ao/pruning/_experimental/data_sparsifier/base_data_sparsifier.py b/torch/ao/pruning/_experimental/data_sparsifier/base_data_sparsifier.py
index e6d0b98..6d6cf3f 100644
--- a/torch/ao/pruning/_experimental/data_sparsifier/base_data_sparsifier.py
+++ b/torch/ao/pruning/_experimental/data_sparsifier/base_data_sparsifier.py
@@ -29,8 +29,7 @@
 
 
 class _Container(nn.Module):
-    def __init__(self):
-        super().__init__()
+    pass
 
 
 class BaseDataSparsifier(base_sparsifier.BaseSparsifier):
diff --git a/torch/jit/_script.py b/torch/jit/_script.py
index 6e5370e..553a702 100644
--- a/torch/jit/_script.py
+++ b/torch/jit/_script.py
@@ -957,8 +957,7 @@
 else:
     # TODO MAKE SURE THAT DISABLING WORKS
     class RecursiveScriptClass:  # type: ignore[no-redef]
-        def __init__(self):
-            super().__init__()
+        pass
 
     class ScriptModule(torch.nn.Module):  # type: ignore[no-redef]
         def __init__(self, arg=None):
diff --git a/torch/testing/_internal/common_quantization.py b/torch/testing/_internal/common_quantization.py
index 179b6bc..86d5876 100644
--- a/torch/testing/_internal/common_quantization.py
+++ b/torch/testing/_internal/common_quantization.py
@@ -1021,10 +1021,6 @@
         self.assertTrue(expected_name in str(q_embeddingbag))
 
 class QuantizationLiteTestCase(QuantizationTestCase):
-
-    def setUp(self):
-        super().setUp()
-
     def _create_quantized_model(self, model_class: Type[torch.nn.Module], **kwargs):
         # Creates quantized model for testing mobile script modules
         qengine = "qnnpack"