[ONNX] ONNX dev branch merge 01-06-2021 (#50163)

Summary:
[ONNX] ONNX dev branch merge 01-06-2021
- [ONNX] Support onnx if/loop sequence output in opset 13 - (https://github.com/pytorch/pytorch/issues/49270)
- Symbolic function for torch.square (https://github.com/pytorch/pytorch/issues/49446)
- [ONNX] Add checks in ONNXSetDynamicInputShape (https://github.com/pytorch/pytorch/issues/49783) …
- [ONNX] Enable export af aten::__derive_index (https://github.com/pytorch/pytorch/issues/49514) …
- [ONNX] Update symbolic for unfold (https://github.com/pytorch/pytorch/issues/49378) …
- [ONNX] Update the sequence of initializers in exported graph so that it is as same as inputs. (https://github.com/pytorch/pytorch/issues/49798)
- [ONNX] Enable opset 13 ops (https://github.com/pytorch/pytorch/issues/49612) …
- [ONNX] Improve error message for supported model input types in ONNX export API. (https://github.com/pytorch/pytorch/issues/50119)
- [ONNX] Add a post-pass for If folding (https://github.com/pytorch/pytorch/issues/49410)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/50163

Reviewed By: pbelevich

Differential Revision: D25821059

Pulled By: SplitInfinity

fbshipit-source-id: 9f511a93d9d5812d0ab0a49d61ed0fa5f8066948
diff --git a/.jenkins/caffe2/test.sh b/.jenkins/caffe2/test.sh
index 9fd031f..e6f43b6 100755
--- a/.jenkins/caffe2/test.sh
+++ b/.jenkins/caffe2/test.sh
@@ -165,7 +165,7 @@
   # JIT C++ extensions require ninja, so put it into PATH.
   export PATH="/var/lib/jenkins/.local/bin:$PATH"
   if [[ "$BUILD_ENVIRONMENT" == *py3* ]]; then
-    pip install -q --user onnxruntime==1.5.2
+    pip install -q --user onnxruntime==1.6.0
   fi
   "$ROOT_DIR/scripts/onnx/test.sh"
 fi
diff --git a/aten/src/ATen/core/interned_strings.h b/aten/src/ATen/core/interned_strings.h
index a65a48d..9b5ebbf 100644
--- a/aten/src/ATen/core/interned_strings.h
+++ b/aten/src/ATen/core/interned_strings.h
@@ -346,6 +346,7 @@
   _(onnx, ReduceL2)                  \
   _(onnx, Conv)                      \
   _(onnx, BatchNormalization)        \
+  _(onnx, ReduceProd)                \
   FORALL_ATTR_BASE_SYMBOLS(_)        \
   _(attr, Subgraph)                  \
   _(attr, ReverseSubgraph)           \
diff --git a/scripts/onnx/test.sh b/scripts/onnx/test.sh
index f0409cb..6918619 100755
--- a/scripts/onnx/test.sh
+++ b/scripts/onnx/test.sh
@@ -72,7 +72,7 @@
 fi
 if [[ "$BUILD_ENVIRONMENT" == *ort_test2* ]]; then
   # Update the loop for new opsets
-  for i in $(seq 10 12); do
+  for i in $(seq 10 13); do
     pytest "${args[@]}" \
       "$top_dir/test/onnx/test_pytorch_onnx_onnxruntime.py::TestONNXRuntime_opset$i"
   done
diff --git a/test/onnx/expect/TestOperators.test_batchnorm.expect b/test/onnx/expect/TestOperators.test_batchnorm.expect
index 40a373e..ec5d878 100644
--- a/test/onnx/expect/TestOperators.test_batchnorm.expect
+++ b/test/onnx/expect/TestOperators.test_batchnorm.expect
@@ -26,6 +26,12 @@
   initializer {
     dims: 2
     data_type: 1
+    name: "weight"
+    raw_data: "\000\000\200?\000\000\200?"
+  }
+  initializer {
+    dims: 2
+    data_type: 1
     name: "bias"
     raw_data: "\000\000\000\000\000\000\000\000"
   }
@@ -41,12 +47,6 @@
     name: "running_var"
     raw_data: "\000\000\200?\000\000\200?"
   }
-  initializer {
-    dims: 2
-    data_type: 1
-    name: "weight"
-    raw_data: "\000\000\200?\000\000\200?"
-  }
   input {
     name: "input"
     type {
diff --git a/test/onnx/expect/TestOperators.test_batchnorm_1d.expect b/test/onnx/expect/TestOperators.test_batchnorm_1d.expect
index 27b4e18..4a87406 100644
--- a/test/onnx/expect/TestOperators.test_batchnorm_1d.expect
+++ b/test/onnx/expect/TestOperators.test_batchnorm_1d.expect
@@ -26,6 +26,12 @@
   initializer {
     dims: 2
     data_type: 1
+    name: "weight"
+    raw_data: "\000\000\200?\000\000\200?"
+  }
+  initializer {
+    dims: 2
+    data_type: 1
     name: "bias"
     raw_data: "\000\000\000\000\000\000\000\000"
   }
@@ -41,12 +47,6 @@
     name: "running_var"
     raw_data: "\000\000\200?\000\000\200?"
   }
-  initializer {
-    dims: 2
-    data_type: 1
-    name: "weight"
-    raw_data: "\000\000\200?\000\000\200?"
-  }
   input {
     name: "input"
     type {
diff --git a/test/onnx/expect/TestOperators.test_batchnorm_onnx_irv4.expect b/test/onnx/expect/TestOperators.test_batchnorm_onnx_irv4.expect
index 7223161..6e6530c 100644
--- a/test/onnx/expect/TestOperators.test_batchnorm_onnx_irv4.expect
+++ b/test/onnx/expect/TestOperators.test_batchnorm_onnx_irv4.expect
@@ -26,6 +26,12 @@
   initializer {
     dims: 2
     data_type: 1
+    name: "weight"
+    raw_data: "\000\000\200?\000\000\200?"
+  }
+  initializer {
+    dims: 2
+    data_type: 1
     name: "bias"
     raw_data: "\000\000\000\000\000\000\000\000"
   }
@@ -41,12 +47,6 @@
     name: "running_var"
     raw_data: "\000\000\200?\000\000\200?"
   }
-  initializer {
-    dims: 2
-    data_type: 1
-    name: "weight"
-    raw_data: "\000\000\200?\000\000\200?"
-  }
   input {
     name: "input"
     type {
diff --git a/test/onnx/expect/TestOperators.test_batchnorm_training.expect b/test/onnx/expect/TestOperators.test_batchnorm_training.expect
index 0980885..507d109 100644
--- a/test/onnx/expect/TestOperators.test_batchnorm_training.expect
+++ b/test/onnx/expect/TestOperators.test_batchnorm_training.expect
@@ -30,6 +30,12 @@
   initializer {
     dims: 2
     data_type: 1
+    name: "weight"
+    raw_data: "\000\000\200?\000\000\200?"
+  }
+  initializer {
+    dims: 2
+    data_type: 1
     name: "bias"
     raw_data: "\000\000\000\000\000\000\000\000"
   }
@@ -45,12 +51,6 @@
     name: "running_var"
     raw_data: "fff?fff?"
   }
-  initializer {
-    dims: 2
-    data_type: 1
-    name: "weight"
-    raw_data: "\000\000\200?\000\000\200?"
-  }
   input {
     name: "input"
     type {
diff --git a/test/onnx/expect/TestOperators.test_layer_norm_aten.expect b/test/onnx/expect/TestOperators.test_layer_norm_aten.expect
index c1c4f80..1ef3fb4 100644
--- a/test/onnx/expect/TestOperators.test_layer_norm_aten.expect
+++ b/test/onnx/expect/TestOperators.test_layer_norm_aten.expect
@@ -35,15 +35,15 @@
     dims: 10
     dims: 10
     data_type: 1
-    name: "bias"
-    raw_data: "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
+    name: "weight"
+    raw_data: "\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?"
   }
   initializer {
     dims: 10
     dims: 10
     data_type: 1
-    name: "weight"
-    raw_data: "\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?"
+    name: "bias"
+    raw_data: "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
   }
   input {
     name: "input"
diff --git a/test/onnx/expect/TestOperators.test_linear.expect b/test/onnx/expect/TestOperators.test_linear.expect
index abb5a07..4eb21f3 100644
--- a/test/onnx/expect/TestOperators.test_linear.expect
+++ b/test/onnx/expect/TestOperators.test_linear.expect
@@ -28,17 +28,17 @@
   name: "torch-jit-export"
   initializer {
     dims: 5
-    data_type: 1
-    name: "bias"
-    raw_data: "\324BO\276@\245T>\350\377\245\275\374u\336\276&\212\304>"
-  }
-  initializer {
-    dims: 5
     dims: 4
     data_type: 1
     name: "weight"
     raw_data: "\212\332\356>@\265u>p\303E\275 \320\306\274\354\201\221>\004\354\261\276\2746*>8\247)\276\340\035\224>\024\2446\276\200\211\312<\224\344,>D\356\257>\320\202\226\275\364\213\351>z\226\330\276\310\250\266\275\352F\377\276\000\250)=\244K\021>"
   }
+  initializer {
+    dims: 5
+    data_type: 1
+    name: "bias"
+    raw_data: "\324BO\276@\245T>\350\377\245\275\374u\336\276&\212\304>"
+  }
   input {
     name: "input"
     type {
diff --git a/test/onnx/test_pytorch_onnx_onnxruntime.py b/test/onnx/test_pytorch_onnx_onnxruntime.py
index 26896bc..33d428b 100644
--- a/test/onnx/test_pytorch_onnx_onnxruntime.py
+++ b/test/onnx/test_pytorch_onnx_onnxruntime.py
@@ -6,6 +6,7 @@
 import io
 import itertools
 import copy
+import os
 
 from torch.nn.utils import rnn as rnn_utils
 from model_defs.lstm_flattening_result import LstmFlatteningResult
@@ -18,6 +19,13 @@
 from typing import List
 import model_defs.word_language_model as word_language_model
 import torchvision
+from torchvision import ops
+from torchvision.models.detection.image_list import ImageList
+from torchvision.models.detection.transform import GeneralizedRCNNTransform
+from torchvision.models.detection.rpn import AnchorGenerator, RPNHead, RegionProposalNetwork
+from torchvision.models.detection.roi_heads import RoIHeads
+from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, TwoMLPHead
+from collections import OrderedDict
 import onnx
 
 def to_numpy(tensor):
@@ -76,7 +84,7 @@
                    example_outputs=None, do_constant_folding=True,
                    dynamic_axes=None, test_with_inputs=None,
                    input_names=None, output_names=None,
-                   fixed_batch_size=False):
+                   fixed_batch_size=False, dict_check=True):
     model.eval()
 
     if input is None:
@@ -90,13 +98,16 @@
             input = (input,)
         input_args = copy.deepcopy(input)
         input_kwargs = {}
-        if isinstance(input_args[-1], dict):
+        if dict_check and isinstance(input_args[-1], dict):
             input_kwargs = input_args[-1]
             input_args = input_args[:-1]
         output = model(*input_args, **input_kwargs)
         if isinstance(output, torch.Tensor):
             output = (output,)
 
+        if not dict_check and isinstance(input[-1], dict):
+            input = input + ({},)
+
         ort_sess = convert_to_onnx(model, input=input, opset_version=self.opset_version,
                                    example_outputs=output, do_constant_folding=do_constant_folding,
                                    keep_initializers_as_inputs=self.keep_initializers_as_inputs,
@@ -122,6 +133,71 @@
                 ort_outs = run_ort(ort_sess, test_input)
                 ort_compare_with_pytorch(ort_outs, output, rtol, atol)
 
+def _init_test_generalized_rcnn_transform():
+    min_size = 100
+    max_size = 200
+    image_mean = [0.485, 0.456, 0.406]
+    image_std = [0.229, 0.224, 0.225]
+    transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std)
+    return transform
+
+def _init_test_rpn():
+    anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
+    aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
+    rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
+    out_channels = 256
+    rpn_head = RPNHead(out_channels, rpn_anchor_generator.num_anchors_per_location()[0])
+    rpn_fg_iou_thresh = 0.7
+    rpn_bg_iou_thresh = 0.3
+    rpn_batch_size_per_image = 256
+    rpn_positive_fraction = 0.5
+    rpn_pre_nms_top_n = dict(training=2000, testing=1000)
+    rpn_post_nms_top_n = dict(training=2000, testing=1000)
+    rpn_nms_thresh = 0.7
+
+    rpn = RegionProposalNetwork(
+        rpn_anchor_generator, rpn_head,
+        rpn_fg_iou_thresh, rpn_bg_iou_thresh,
+        rpn_batch_size_per_image, rpn_positive_fraction,
+        rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh)
+    return rpn
+
+def _init_test_roi_heads_faster_rcnn():
+    out_channels = 256
+    num_classes = 91
+
+    box_fg_iou_thresh = 0.5
+    box_bg_iou_thresh = 0.5
+    box_batch_size_per_image = 512
+    box_positive_fraction = 0.25
+    bbox_reg_weights = None
+    box_score_thresh = 0.05
+    box_nms_thresh = 0.5
+    box_detections_per_img = 100
+
+    box_roi_pool = ops.MultiScaleRoIAlign(
+        featmap_names=['0', '1', '2', '3'],
+        output_size=7,
+        sampling_ratio=2)
+
+    resolution = box_roi_pool.output_size[0]
+    representation_size = 1024
+    box_head = TwoMLPHead(
+        out_channels * resolution ** 2,
+        representation_size)
+
+    representation_size = 1024
+    box_predictor = FastRCNNPredictor(
+        representation_size,
+        num_classes)
+
+    roi_heads = RoIHeads(
+        box_roi_pool, box_head, box_predictor,
+        box_fg_iou_thresh, box_bg_iou_thresh,
+        box_batch_size_per_image, box_positive_fraction,
+        bbox_reg_weights,
+        box_score_thresh, box_nms_thresh, box_detections_per_img)
+    return roi_heads
 
 class TestONNXRuntime(unittest.TestCase):
     from torch.onnx.symbolic_helper import _export_onnx_opset_version
@@ -136,18 +212,19 @@
         if torch.cuda.is_available():
             torch.cuda.manual_seed_all(0)
         np.random.seed(seed=0)
+        os.environ['ALLOW_RELEASED_ONNX_OPSET_ONLY'] = '0'
         self.is_script_test_enabled = True
 
     def run_test(self, model, input, rtol=1e-3, atol=1e-7, do_constant_folding=True,
                  batch_size=2, use_gpu=True, dynamic_axes=None, test_with_inputs=None,
-                 input_names=None, output_names=None, fixed_batch_size=False):
+                 input_names=None, output_names=None, fixed_batch_size=False, dict_check=True):
         def _run_test(m):
             return run_model_test(self, m, batch_size=batch_size,
                                   input=input, use_gpu=use_gpu, rtol=rtol, atol=atol,
                                   do_constant_folding=do_constant_folding,
                                   dynamic_axes=dynamic_axes, test_with_inputs=test_with_inputs,
                                   input_names=input_names, output_names=output_names,
-                                  fixed_batch_size=fixed_batch_size)
+                                  fixed_batch_size=fixed_batch_size, dict_check=dict_check)
         if self.is_script_test_enabled and self.use_new_jit_passes:
             script_model = torch.jit.script(model)
             _run_test(script_model)
@@ -217,6 +294,7 @@
         x = torch.tensor([2], dtype=torch.long)
         self.run_model_test_with_external_data(model, x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)  # Because external data format was released with Opset 9.
     def test_mobilenet_v2_with_external_data(self):
         model = torchvision.models.mobilenet_v2(pretrained=True)
@@ -321,6 +399,7 @@
         # Only support CPU version, since tracer is not working in GPU RNN.
         self.run_test(model, (x, model.hidden))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     @disableScriptTest()  # Faster RCNN model is not scriptable
     def test_faster_rcnn(self):
@@ -329,6 +408,8 @@
         model.eval()
         x = torch.randn(2, 3, 200, 300, requires_grad=True)
         self.run_test(model, (x,), rtol=1e-3, atol=1e-5)
+        self.run_test(model, (x,), input_names=["images_tensors"], output_names=["outputs"],
+                      dynamic_axes={"images_tensors": [0, 1, 2, 3], "outputs": [0, 1, 2, 3]}, rtol=1e-3, atol=1e-5)
 
     def get_image_from_url(self, url):
         import os
@@ -355,6 +436,39 @@
         images = [image]
         return images
 
+    @skipIfUnsupportedOpsetVersion([13])
+    def test_paste_mask_in_image(self):
+        # disable profiling
+        torch._C._jit_set_profiling_executor(False)
+        torch._C._jit_set_profiling_mode(False)
+
+        masks = torch.rand(10, 1, 26, 26)
+        boxes = torch.rand(10, 4)
+        boxes[:, 2:] += torch.rand(10, 2)
+        boxes *= 50
+        o_im_s = (100, 100)
+        from torchvision.models.detection.roi_heads import paste_masks_in_image
+        out = paste_masks_in_image(masks, boxes, o_im_s)
+        jit_trace = torch.jit.trace(paste_masks_in_image,
+                                    (masks, boxes,
+                                     [torch.tensor(o_im_s[0]),
+                                      torch.tensor(o_im_s[1])]))
+        out_trace = jit_trace(masks, boxes, [torch.tensor(o_im_s[0]), torch.tensor(o_im_s[1])])
+
+        assert torch.all(out.eq(out_trace))
+
+        masks2 = torch.rand(20, 1, 26, 26)
+        boxes2 = torch.rand(20, 4)
+        boxes2[:, 2:] += torch.rand(20, 2)
+        boxes2 *= 100
+        o_im_s2 = (200, 200)
+        from torchvision.models.detection.roi_heads import paste_masks_in_image
+        out2 = paste_masks_in_image(masks2, boxes2, o_im_s2)
+        out_trace2 = jit_trace(masks2, boxes2, [torch.tensor(o_im_s2[0]), torch.tensor(o_im_s2[1])])
+
+        assert torch.all(out2.eq(out_trace2))
+
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     @disableScriptTest()
     def test_mask_rcnn(self):
@@ -362,7 +476,35 @@
                                                                              max_size=300)
         images = self.get_test_images()
         self.run_test(model, (images,), rtol=1e-3, atol=1e-5)
+        self.run_test(model, (images,), input_names=["images_tensors"], output_names=["boxes", "labels", "scores", "masks"],
+                      dynamic_axes={"images_tensors": [0, 1, 2], "boxes": [0, 1], "labels": [0],
+                                    "scores": [0], "masks": [0, 1, 2]}, rtol=1e-3, atol=1e-5)
 
+    def test_heatmaps_to_keypoints(self):
+        # disable profiling
+        torch._C._jit_set_profiling_executor(False)
+        torch._C._jit_set_profiling_mode(False)
+
+        maps = torch.rand(10, 1, 26, 26)
+        rois = torch.rand(10, 4)
+        from torchvision.models.detection.roi_heads import heatmaps_to_keypoints
+        out = heatmaps_to_keypoints(maps, rois)
+        jit_trace = torch.jit.trace(heatmaps_to_keypoints, (maps, rois))
+        out_trace = jit_trace(maps, rois)
+
+        assert torch.all(out[0].eq(out_trace[0]))
+        assert torch.all(out[1].eq(out_trace[1]))
+
+        maps2 = torch.rand(20, 2, 21, 21)
+        rois2 = torch.rand(20, 4)
+        from torchvision.models.detection.roi_heads import heatmaps_to_keypoints
+        out2 = heatmaps_to_keypoints(maps2, rois2)
+        out_trace2 = jit_trace(maps2, rois2)
+
+        assert torch.all(out2[0].eq(out_trace2[0]))
+        assert torch.all(out2[1].eq(out_trace2[1]))
+
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     @disableScriptTest()
     def test_keypoint_rcnn(self):
@@ -370,19 +512,27 @@
                                                                                      max_size=300)
         images = self.get_test_images()
         self.run_test(model, (images,), rtol=1e-3, atol=1e-5)
+        self.run_test(model, (images,), input_names=["images_tensors"],
+                      output_names=["outputs1", "outputs2", "outputs3", "outputs4"],
+                      dynamic_axes={"images_tensors": [0, 1, 2]},
+                      rtol=1e-3, atol=1e-5)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @disableScriptTest()
     def test_word_language_model_RNN_TANH(self):
         self.run_word_language_model("RNN_TANH")
 
+    @skipIfUnsupportedOpsetVersion([13])
     @disableScriptTest()
     def test_word_language_model_RNN_RELU(self):
         self.run_word_language_model("RNN_RELU")
 
+    @skipIfUnsupportedOpsetVersion([13])
     @disableScriptTest()
     def test_word_language_model_LSTM(self):
         self.run_word_language_model("LSTM")
 
+    @skipIfUnsupportedOpsetVersion([13])
     @disableScriptTest()
     def test_word_language_model_GRU(self):
         self.run_word_language_model("GRU")
@@ -610,6 +760,7 @@
         z = torch.randn(2, 3)
         self.run_test(Model(), (x, None, z))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_cste_script(self):
         class MyModel(torch.jit.ScriptModule):
@@ -878,37 +1029,44 @@
         else:
             self.run_test(Squeeze(d), x1)
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_squeeze_without_no_op(self):
         x = torch.randn(2, 1, 4)
         self.squeeze_model_tests(1, x, None)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     def test_squeeze_dynamic(self):
         x_squeeze = torch.randn(2, 1, 4)
         x_noop = torch.randn(2, 2, 3)
         self.squeeze_model_tests(1, x_squeeze, x_noop)
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_squeeze_neg_without_no_op(self):
         x = torch.randn(2, 1, 4)
         self.squeeze_model_tests(-2, x, None)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     def test_squeeze_neg(self):
         x_squeeze = torch.randn(2, 1, 4)
         x_noop = torch.randn(2, 2, 3)
         self.squeeze_model_tests(-2, x_squeeze, x_noop)
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_squeeze_all_dims(self):
         x_squeeze = torch.randn(2, 1, 4)
         x_noop = torch.randn(2, 2, 3)
         self.squeeze_model_tests(None, x_squeeze, x_noop)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     def test_squeeze_no_op(self):
         x_noop = torch.randn(2, 1, 4)
         x_squeeze = torch.randn(2, 2, 1)
         self.squeeze_model_tests(2, x_noop, x_squeeze)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     def test_squeeze_runtime_dim(self):
         class Squeeze(torch.nn.Module):
@@ -922,6 +1080,7 @@
         self.run_test(Squeeze(), (d1, d4), test_with_inputs=[(d3, d4)])
         self.run_test(Squeeze(), (d3, d4), test_with_inputs=[(d1, d3)])
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_unsqueeze(self):
         class Unsqueeze(torch.nn.Module):
             def forward(self, x):
@@ -1121,6 +1280,7 @@
         y = torch.randn(2, 3, 4)
         self.run_test(FloorDivModule(), (x, y))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_floordiv(self):
         class FloordivModule(torch.nn.Module):
@@ -1198,6 +1358,7 @@
         y = torch.arange(1, 2 * 3 * 4 + 1).reshape(2, 3, 4).to(torch.double)
         self.run_test(torch.jit.script(DivModule()), (x, y))
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_slice_trace(self):
         class MyModule(torch.nn.Module):
             def forward(self, x):
@@ -1206,6 +1367,7 @@
         x = torch.randn(3)
         self.run_test(MyModule(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_slice_neg(self):
         class NegSlice(torch.nn.Module):
             def forward(self, x):
@@ -1214,6 +1376,7 @@
         x = torch.randn(3, 4, 5)
         self.run_test(NegSlice(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_slice_neg_large(self):
         class NegSlice(torch.nn.Module):
             def forward(self, x):
@@ -1222,6 +1385,7 @@
         x = torch.randn(3, 4, 5, 6, 7)
         self.run_test(NegSlice(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_slice_neg_large_negone(self):
         class NegSlice(torch.nn.Module):
             def forward(self, x):
@@ -1230,6 +1394,7 @@
         x = torch.randn(3, 4, 5, 6, 7)
         self.run_test(NegSlice(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     def test_slice_with_input_index(self):
         class InputIndexSlice(torch.nn.Module):
@@ -1241,6 +1406,7 @@
         y = torch.rand((22, 256))
         self.run_test(InputIndexSlice(), (x, y))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(10)
     @disableScriptTest()  # scripting tuple/list append
     def test_slice_dynamic(self):
@@ -1259,6 +1425,7 @@
                       dynamic_axes={'input_1': [0, 1, 2],
                                     'output_1': [0, 1, 2]})
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(10)
     def test_slice_dynamic_script(self):
         class DynamicSliceModel(torch.jit.ScriptModule):
@@ -1269,6 +1436,7 @@
         x = torch.rand(1, 2)
         self.run_test(DynamicSliceModel(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(10)
     def test_slice_dynamic_shape_script(self):
         class DynamicSliceModel(torch.nn.Module):
@@ -1278,6 +1446,7 @@
         x = torch.rand(1, 2, 3, 4)
         self.run_test(DynamicSliceModel(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(10)
     @disableScriptTest()   # scripting tuple/list append
     def test_slice_dynamic_to_end(self):
@@ -1293,6 +1462,14 @@
                       dynamic_axes={'input_1': [0, 1, 2],
                                     'output_1': [0, 1, 2]})
 
+    def test_square(self):
+        class Square(torch.nn.Module):
+            def forward(self, x):
+                return torch.square(x)
+
+        x = torch.randn(2, 3, 4)
+        self.run_test(Square(), x)
+
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_arange_dynamic(self):
         class ArangeModel(torch.nn.Module):
@@ -1376,6 +1553,7 @@
         x = torch.tensor(6.2, dtype=torch.float)
         self.run_test(ArangeModel(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_size(self):
         class SizeModel(torch.nn.Module):
@@ -1385,6 +1563,7 @@
         x = torch.randn(5, 3, 2)
         self.run_test(SizeModel(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     @disableScriptTest()  # x.stride() not scriptable
     def test_as_strided(self):
@@ -1399,6 +1578,7 @@
         x = torch.randn(5, 8, 7)
         self.run_test(Model(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @disableScriptTest()  # Ellipses followed by tensor indexing not scriptable
     def test_tensor_index_advanced_indexing_ellipsis(self):
         class MyModel(torch.nn.Module):
@@ -1408,6 +1588,7 @@
         m1 = torch.randn(3, 4, 5, 6, 7)
         self.run_test(MyModel(), (m1,))
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_tensor_index_advanced_indexing(self):
         class MyModel(torch.nn.Module):
             def forward(self, input):
@@ -1428,6 +1609,7 @@
 
         self.run_test(MyModel(), (m1,))
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_tensor_index_advanced_indexing_consecutive(self):
         class MyModel(torch.nn.Module):
             def forward(self, input):
@@ -1436,6 +1618,7 @@
         m1 = torch.randn(3, 4, 5, 6, 7)
         self.run_test(MyModel(), (m1,))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     def test_index_put(self):
         class IndexPutModel(torch.nn.Module):
@@ -1448,6 +1631,7 @@
         update = torch.ones(4)
         self.run_test(IndexPutModel(), (x, ind, update))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     def test_index_put_accumulate(self):
         class IndexPutModel(torch.nn.Module):
@@ -1459,6 +1643,7 @@
         update = torch.ones(4)
         self.run_test(IndexPutModel(), (x, ind, update))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     def test_index_put_slice_index(self):
         class IndexPutModel(torch.nn.Module):
@@ -1533,6 +1718,7 @@
         update = torch.arange(3 * 5).to(torch.float).view(3, 5)
         self.run_test(IndexPutModel8(), (x, update))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     @disableScriptTest()  # Ellipses followed by tensor indexing not scriptable
     def test_index_put_ellipsis(self):
@@ -1554,6 +1740,7 @@
         update = torch.randn(4, 1, 3, 2)
         self.run_test(IndexPutModel2(), (x, update))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     def test_index_put_loop(self):
         @torch.jit.script
@@ -1584,6 +1771,7 @@
                       dynamic_axes={'x': {0: 'seq_length', 1: 'batch_size'}}, test_with_inputs=[y])
 
     @skipIfUnsupportedMinOpsetVersion(11)
+    @skipIfUnsupportedOpsetVersion([13])
     def test_copy_(self):
         class CopyModel(torch.nn.Module):
             def forward(self, x, data):
@@ -1635,6 +1823,7 @@
         data = torch.randn(4)
         self.run_test(CopyModel4(), (x, ind, data))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     @disableScriptTest()  # Model not scriptable (output with shape doesn't match the broadcast shape)
     def test_copy_tracing(self):
@@ -1647,6 +1836,7 @@
         update = torch.randn(1, 2)
         self.run_test(CopyModel(), (x, update))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     def test_copy_ellipsis(self):
         class CopyModel(torch.nn.Module):
@@ -1662,9 +1852,10 @@
         update = torch.ones(1)
         self.run_test(CopyModel(), (x, update))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     # TODO: Limited scripting support with ellipsis indexing.
-    #       Due to dependency on input tensor rank being known.
+    # Due to dependency on input tensor rank being known.
     def test_copy_ellipsis_tracing(self):
         class CopyModel(torch.nn.Module):
             def forward(self, x, update):
@@ -1700,6 +1891,7 @@
         x = torch.randn(2, 3, 4)
         self.run_test(Rand(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_random_dynamic_size(self):
         class RandN(torch.nn.Module):
@@ -1862,10 +2054,12 @@
                         self._interpolate_script(xi, mode_i, False, is_upsample, True)
                     self._interpolate_script(xi, mode_i, False, is_upsample)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @disableScriptTest()
     def test_interpolate_upsample(self):
         self._interpolate_tests(True)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @disableScriptTest()
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_interpolate_function_substitution(self):
@@ -1896,11 +2090,13 @@
 
         self.run_test(TracingModule(), (x,))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(10)
     @disableScriptTest()
     def test_interpolate_downsample(self):
         self._interpolate_tests(False)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     @disableScriptTest()
     def test_interpolate_no_shape(self):
@@ -1916,6 +2112,7 @@
         y = torch.randn(16, 16, requires_grad=True)
         self.run_test(MyModel(), (x, y))
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_interpolate_adaptive_pooling_error(self):
         x = torch.randn(1, 2, 6, requires_grad=True)
         with self.assertRaises(RuntimeError) as cm:
@@ -1924,6 +2121,7 @@
         with self.assertRaises(RuntimeError) as cm:
             self._interpolate(x, "area", False, True)
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_groupnorm(self):
         model = torch.nn.GroupNorm(3, 6, 0.002)
         x = torch.randn(4, 6, 180, 180, 180)
@@ -1937,6 +2135,7 @@
         x = torch.randn(4, 6, 180, 180)
         self.run_test(model, x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @disableScriptTest()
     def test_groupnorm_noaffine(self):
         model = torch.nn.GroupNorm(4, 8, 0.002, affine=False)
@@ -1951,6 +2150,7 @@
         x = torch.randn(4, 6, 180, 180)
         self.run_test(model, x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_listunpack(self):
         class ListUnpack(torch.jit.ScriptModule):
@@ -2254,6 +2454,7 @@
         input2 = torch.arange(24, dtype=torch.uint8).reshape(3, 4, 2)
         self.run_test(BitshiftModel(), (input, input2))
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_narrow(self):
         class NarrowModel(torch.nn.Module):
             def forward(self, input):
@@ -2262,6 +2463,7 @@
         x = torch.randn(3, 3, requires_grad=True)
         self.run_test(NarrowModel(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     def test_narrow_dynamic(self):
         class NarrowModel(torch.nn.Module):
@@ -2271,6 +2473,7 @@
         x = torch.randn(3, 3, requires_grad=True)
         self.run_test(NarrowModel(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_index_fill(self):
         class IndexFillModel(torch.nn.Module):
@@ -2281,6 +2484,7 @@
         x = torch.randn(3, 4, 5, requires_grad=True)
         self.run_test(IndexFillModel(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_index_copy(self):
         class IndexCopyModel(torch.nn.Module):
@@ -2308,8 +2512,6 @@
         x = torch.randn(3, 4)
         self.run_test(Select(), x)
 
-    # TODO: enable for opset 10 when ONNXRuntime version will be updated
-
     def test_index_select_constant_scaler_index(self):
         class IndexSelectScalerIndexModel(torch.nn.Module):
             def forward(self, x):
@@ -2524,6 +2726,7 @@
         indices = torch.tensor([[1, 0], [0, 1], [0, 1]], dtype=torch.int64)
         self.run_test(GatherModel(), input=(input, indices))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_expand(self):
         class ExpandModel(torch.nn.Module):
@@ -2595,6 +2798,7 @@
         x = torch.randn(4, 5, dtype=torch.float)
         self.run_test(ReducedOpModule(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_reduced_sum(self):
         return self._test_reduced_ops(op=torch.sum)
 
@@ -2682,6 +2886,7 @@
         x = torch.randn(3, 4, 5, requires_grad=True)
         self.run_test(Model(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     @disableScriptTest()  # scripting prim_dtype
     def test_lstm_no_hidden(self):
@@ -2696,6 +2901,7 @@
         input = torch.randn((10, 16, 16))
         self.run_test(LSTMModel(), (input,))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     @disableScriptTest()  # scripting prim_dtype
     def test_lstm_proj_no_hidden(self):
@@ -2712,6 +2918,7 @@
             self.run_test(LSTMModel(), (input,))
 
     @skipIfUnsupportedMinOpsetVersion(9)
+    @skipIfUnsupportedOpsetVersion([13])
     @disableScriptTest()
     def test_lstm(self):
         model = torch.nn.LSTM(RNN_INPUT_SIZE, RNN_HIDDEN_SIZE, 1, bidirectional=False)
@@ -2720,6 +2927,7 @@
         c0 = torch.randn(1, BATCH_SIZE, RNN_HIDDEN_SIZE)
         self.run_test(model, (input, (h0, c0)))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     @disableScriptTest()
     def test_lstm_default_init_state(self):
@@ -2727,6 +2935,7 @@
         input = torch.randn(RNN_SEQUENCE_LENGTH, BATCH_SIZE, RNN_INPUT_SIZE)
         self.run_test(model, input)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     @disableScriptTest()  # LSTMModel model not scriptable
     def test_lstm_fixed_batch_size(self):
@@ -2748,6 +2957,7 @@
         input2 = torch.randn(RNN_SEQUENCE_LENGTH, BATCH_SIZE, RNN_INPUT_SIZE)
         self.run_test(LSTMModel(), input, fixed_batch_size=True, test_with_inputs=[input2])
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     @disableScriptTest()
     def test_lstm_post_fix_init_state(self):
@@ -2772,6 +2982,7 @@
         self.run_test(model, input, dynamic_axes={'input' : {0 : 'seq', 1 : 'batch'}},
                       test_with_inputs=[input2])
 
+    @skipIfUnsupportedOpsetVersion([13])
     @disableScriptTest()
     def test_lstm_constant_folding(self):
         class LstmNet(torch.nn.Module):
@@ -2799,6 +3010,7 @@
         model2, input2 = get_LstmNet_model_and_inputs(5, 4, 3, batch_size2, 7, False)
         self.run_test(model2, input2, do_constant_folding=True)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     @disableScriptTest()
     def test_lstm_no_bias(self):
@@ -2824,6 +3036,7 @@
         for model, input in models_and_inputs:
             self.run_test(model, input)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @disableScriptTest()
     def test_rnn_no_bias(self):
         def make_model(layers, packed_sequence):
@@ -2863,6 +3076,7 @@
         for model, input in zip(models, inputs):
             self.run_test(model, input, batch_size=RNN_BATCH_SIZE)
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_gru_no_bias(self):
         class GruNet(torch.nn.Module):
             def __init__(self, input_size, hidden_size, num_layers, bidirectional):
@@ -2892,6 +3106,7 @@
         for model, input in models_and_inputs:
             self.run_test(model, input, do_constant_folding=True)
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_gru_constant_folding(self):
         class GruNet(torch.nn.Module):
             def __init__(self, input_size, hidden_size, num_layers, bidirectional):
@@ -3154,6 +3369,7 @@
         input = torch.ones(7, 3, 5)
         self._argmin_argmax_model(input)
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_repeat(self):
         class RepeatModel(torch.nn.Module):
             def forward(self, x, y):
@@ -3173,6 +3389,7 @@
         x = torch.randint(10, (4, 2, 3, 4), dtype=torch.int32)
         self.run_test(ViewModel(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_view_dynamic(self):
         class ViewModel(torch.nn.Module):
             def forward(self, input, other):
@@ -3182,6 +3399,7 @@
         shape = torch.randn(6, 4)
         self.run_test(ViewModel(), (x, shape))
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_view_dynamic_zero_dim(self):
         class ViewModel(torch.nn.Module):
             def forward(self, input):
@@ -3294,6 +3512,7 @@
         ind = torch.tensor(-2, dtype=torch.long)
         self.run_test(GetItemModel(), (x, y, z, ind))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @disableScriptTest()  # torch.nonzero(x, as_tuple=True) is not scriptable.
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_nonzero(self):
@@ -3340,6 +3559,7 @@
         self.run_test(LenModel(), x, input_names=['input'], dynamic_axes={'input': {0: 'seq'}},
                       test_with_inputs=(torch.randn(5, 5),))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_len_list(self):
         class LenListModel(torch.jit.ScriptModule):
@@ -3393,6 +3613,7 @@
         x = torch.randn(5, 4, 3)
         self.run_test(torch.jit.script(SplitModel3()), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     @disableScriptTest()
     def test_split_size_as_list(self):
@@ -3409,6 +3630,7 @@
         split_sizes = [torch.tensor(2), torch.tensor(4)]
         self.run_test(SplitModel(), (x, split_sizes))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     def test_split_size_with_slice(self):
         class SplitModule(torch.nn.Module):
@@ -3479,6 +3701,7 @@
         x = torch.randn(4, 5, 6)
         self.run_test(ConcatDynamicModel(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_stack(self):
         class StackModel(torch.nn.Module):
             def forward(self, x, y, z):
@@ -3489,6 +3712,7 @@
         z = torch.randn(3, 4, 5)
         self.run_test(StackModel(), (x, y, z))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     def test_stack_dynamic(self):
         class StackDynamicModel(torch.jit.ScriptModule):
@@ -3527,6 +3751,7 @@
         inputs = torch.zeros(1, 2, 3, dtype=torch.long)
         self.run_test(model, inputs)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     def test_loop_with_list(self):
         class ListLoopModel(torch.jit.ScriptModule):
@@ -3564,6 +3789,7 @@
         x = torch.randn(5, 3, 3)
         self.run_test(model, x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     def test_loop_multi_dim(self):
         class LoopMultiDimModel(torch.jit.ScriptModule):
@@ -3578,6 +3804,7 @@
         y = torch.ones(1, dtype=torch.long)
         self.run_test(model, (x, y))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     def test_list(self):
         class ListModel(torch.jit.ScriptModule):
@@ -3599,6 +3826,7 @@
         inputs = torch.randn(16, 1)
         self.run_test(model, inputs)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_tensor_factories(self):
         class TensorFactory(torch.nn.Module):
@@ -3608,6 +3836,7 @@
         x = torch.randn(2, 3, 4)
         self.run_test(TensorFactory(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_tensor_factories_script(self):
         class TensorFactory(torch.jit.ScriptModule):
@@ -3618,6 +3847,7 @@
         x = torch.randn(2, 3, 4)
         self.run_test(TensorFactory(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_tensor_like_factories_script(self):
         class TensorFactory(torch.jit.ScriptModule):
@@ -3630,6 +3860,7 @@
         x = torch.randn(2, 3, 4)
         self.run_test(TensorFactory(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_eye(self):
         class TensorFactory(torch.nn.Module):
@@ -3652,6 +3883,7 @@
         x = torch.randn(2, 3, 4)
         self.run_test(Zero_(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_new_zeros(self):
         class Zero_(torch.nn.Module):
@@ -3675,6 +3907,7 @@
         x = torch.randn(2, 3)
         self.run_test(List(), (x,))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     @disableScriptTest()
     def test_list_pass(self):
@@ -3714,6 +3947,7 @@
         y = torch.randn(1, 2, 3)
         self.run_test(List(), (x, y))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_new_empty(self):
         class Emtpy(torch.nn.Module):
@@ -3723,6 +3957,7 @@
         x = torch.randn(2, 3, 4)
         self.run_test(Emtpy(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_new_full(self):
         class Full(torch.nn.Module):
@@ -3732,6 +3967,7 @@
         x = torch.randn(2, 3, 4)
         self.run_test(Full(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_inplace_list(self):
         class Arithmetic(torch.jit.ScriptModule):
@@ -3828,6 +4064,7 @@
         x = torch.arange(16).view(2, 2, 4).to(torch.float32)
         self.run_test(MaskedFillModel2(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     def test_masked_scatter(self):
         class MaskedScatterModel(torch.nn.Module):
@@ -3846,6 +4083,7 @@
         x = torch.randn(3, 4, 5, requires_grad=True)
         self.run_test(MaskedSelectModel(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     @disableScriptTest()  # dtype not available
     def test_index_put_to_masked_fill(self):
@@ -3860,6 +4098,7 @@
         constant = torch.tensor(5, dtype=torch.float)
         self.run_test(MaskedFillModel(), (mask, constant))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     @disableScriptTest()  # dtype not available
     def test_index_put_to_masked_scatter(self):
@@ -3980,6 +4219,7 @@
         x = torch.randn(4, 2, 3, requires_grad=True)
         self.run_test(NormModel(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_unfold(self):
         class UnfoldModel(torch.nn.Module):
             def forward(self, x):
@@ -3992,6 +4232,7 @@
                       input_names=['x'],
                       test_with_inputs=[y])
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfONNXShapeInference(False)
     def test_unfold_infer_shape(self):
         class UnfoldModule(torch.jit.ScriptModule):
@@ -4007,6 +4248,17 @@
         x = torch.randn(32, 3, 64)
         self.run_test(UnfoldModule(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
+    @skipIfUnsupportedMinOpsetVersion(12)
+    def test_unfold_dynamic_inputs(self):
+        class UnfoldModel(torch.nn.Module):
+            def forward(self, x):
+                return x.unfold(dimension=2, size=x.shape[1], step=x.shape[1] - 1)
+
+        x = torch.randn(4, 2, 4, requires_grad=True)
+        self.run_test(UnfoldModel(), x)
+
+    @skipIfUnsupportedOpsetVersion([13])
     def test_prelu(self):
         class PReluModel(torch.nn.Module):
             def __init__(self):
@@ -4058,6 +4310,7 @@
         x = torch.randint(10, (2, 3))
         self.run_test(FModModel(), x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_glu(self):
         class GluModel(torch.nn.Module):
@@ -4141,7 +4394,7 @@
 
     @disableScriptTest()  # error in propagate as assign input shape
     @skipIfUnsupportedMinOpsetVersion(10)
-    @skipIfUnsupportedOpsetVersion([12])  # Due to ONNX Loop shape inference issue
+    @skipIfUnsupportedOpsetVersion([12, 13])  # Due to ONNX Loop shape inference issue
     def test_embedding_bag(self):
         model = torch.nn.EmbeddingBag(10, 5, mode='sum', scale_grad_by_freq=True)
         input = torch.randint(10, (7,))
@@ -4157,9 +4410,8 @@
         input = torch.randint(10, (7, 5))
         self.run_test(model, (input))
 
-    @disableScriptTest()  # scripting prim::Uninitialized, prim::dtype, prim::unchecked_cast
-    @skipIfUnsupportedMinOpsetVersion(10)
-    @skipIfUnsupportedOpsetVersion([12])  # Due to ONNX Loop shape inference issue
+    @skipIfUnsupportedMinOpsetVersion(11)
+    @skipIfUnsupportedOpsetVersion([12, 13])  # Due to ONNX Loop shape inference issue
     def test_embedding_bag_1d_per_sample_weights(self):
         class EmbeddingModel(torch.nn.Module):
             def forward(self, embedding_matrix, input, offset, weights):
@@ -4173,9 +4425,8 @@
         embedding_matrix = torch.rand(10, 15)
         self.run_test(model, (embedding_matrix, x, offset, w))
 
-    @disableScriptTest()  # scripting prim::Uninitialized, prim::dtype, prim::unchecked_cast
-    @skipIfUnsupportedMinOpsetVersion(10)
-    @skipIfUnsupportedOpsetVersion([12])  # Due to ONNX Loop shape inference issue
+    @skipIfUnsupportedMinOpsetVersion(11)
+    @skipIfUnsupportedOpsetVersion([12, 13])  # Due to ONNX Loop shape inference issue
     def test_embedding_bag_2d_per_sample_weights(self):
         class EmbeddingModel(torch.nn.Module):
             def forward(self, embedding_matrix, input, weights):
@@ -4337,6 +4588,7 @@
         model = MyModule()
         self.run_test(model, (x, y))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_ones_bool(self):
         class MyModule(torch.nn.Module):
@@ -4383,6 +4635,7 @@
         self.run_test(model, x)
 
     # Dynamic padding is added in opset 11
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     @disableScriptTest()  # Functional module not scriptable
     def test_pad_types(self):
@@ -4418,6 +4671,124 @@
         self.assertEqual('Unsupported: ONNX export of Pad in opset 9. The sizes of the padding must be constant. ' +
                          'Please try opset version 11.', the_exception.args[0])
 
+    @skipIfUnsupportedMinOpsetVersion(9)
+    def test_if_fold(self):
+        class IfFoldModel(torch.nn.Module):
+            def forward(self, y):
+                if y.dim() == 2:
+                    y = y + 4
+                    y = y + 2
+                else:
+                    y = y - 1
+                return y
+        x = torch.ones((3, 4), dtype=torch.int)
+        self.run_test(IfFoldModel(), x)
+
+        class IfFoldModel(torch.nn.Module):
+            def forward(self, y):
+                if y.numel() > 1:
+                    y = y + 4
+                else:
+                    y = y + 2
+                return y
+
+        x = torch.ones((3, 4), dtype=torch.int)
+        self.run_test(IfFoldModel(), x)
+
+        class IfFoldModel(torch.nn.Module):
+            def forward(self, y):
+                if y.dim() != 3:
+                    y = y + 4
+                    y = y + 2
+                else:
+                    return y
+                return y
+
+        x = torch.ones((3, 4), dtype=torch.int)
+        self.run_test(IfFoldModel(), x)
+
+        class IfFoldModel(torch.nn.Module):
+            def forward(self, y):
+                if y.dim() >= 1:
+                    y = y + 4
+                else:
+                    y = y - 1
+                return y
+
+        x = torch.ones((3, 4), dtype=torch.int)
+        self.run_test(IfFoldModel(), x)
+
+        class IfFoldModel(torch.nn.Module):
+            def forward(self, y):
+                if y.dim() <= 1:
+                    y = y + 4
+                else:
+                    y = y + 2
+                return y
+
+        x = torch.ones((3, 4), dtype=torch.int)
+        self.run_test(IfFoldModel(), x)
+
+        class IfFoldModel(torch.nn.Module):
+            def forward(self, y):
+                if y.dim() < 3 and y.dtype == torch.int:
+                    y = y + 4
+                    y = y + 2
+                else:
+                    return y
+                return y
+
+        x = torch.ones((3, 4), dtype=torch.int)
+        self.run_test(IfFoldModel(), x)
+
+        class IfFoldModel(torch.nn.Module):
+            def forward(self, y):
+                if y.dim() == 3 and y.dtype == torch.int:
+                    y = y + 4
+                    y = y + 2
+                else:
+                    y = y + 1
+                return y
+
+        x = torch.ones((3, 4), dtype=torch.int)
+        self.run_test(IfFoldModel(), x)
+
+        class IfFoldModel(torch.nn.Module):
+            def forward(self, y):
+                if y.numel() != 0 and y.dim() == 2:
+                    y = y + 4
+                    y = y + 2
+                else:
+                    return y
+                return y
+
+        x = torch.ones((3, 4), dtype=torch.int)
+        self.run_test(IfFoldModel(), x)
+
+        class IfFoldModel(torch.nn.Module):
+            def forward(self, x, y):
+                if x.numel() == y.numel():
+                    y = x + y
+                else:
+                    y = y - x
+                return y
+
+        x = torch.ones((3, 4), dtype=torch.int)
+        y = torch.ones((3, 4), dtype=torch.int)
+        self.run_test(IfFoldModel(), (x, y))
+
+        class IfFoldModel(torch.nn.Module):
+            def forward(self, x, y):
+                if x.numel() != y.numel():
+                    y = x + y
+                else:
+                    y = y - x
+                return y
+
+        x = torch.ones((3, 4), dtype=torch.int)
+        y = torch.ones((3, 4), dtype=torch.int)
+        self.run_test(IfFoldModel(), (x, y))
+
     @skipIfUnsupportedMinOpsetVersion(11)
     @skipIfONNXShapeInference(False)
     def test_uninitialized(self):
@@ -4469,6 +4840,7 @@
         x = torch.randn(2, 2, 4, 4)
         self.run_test(model, x)
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(11)
     def test_im2col(self):
         class Unfold(torch.nn.Module):
@@ -4492,6 +4864,7 @@
 
     # This test checks output scalar type in the ONNX graph should not be null
     # https://github.com/pytorch/pytorch/issues/28607
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(10)
     def test_trace_script(self):
         @torch.jit.script
@@ -4565,8 +4938,8 @@
         x = torch.randn(3, 4)
         self.run_test(EinsumModelTranspose(), input=(x,))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(12)
-    @disableScriptTest()  # shape/type inference
     def test_crossentropyloss(self):
         for ignore_index in [-100, 1]:
             x = torch.randn(3, 5)
@@ -4664,7 +5037,7 @@
 
         self.run_test(CrossEntropyLossMeanWeight(ignore_index), input=(x, y))
 
-
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_kldiv_loss(self):
 
@@ -4731,8 +5104,8 @@
 
         self.run_test(KLDivLossMiniBatchMean(), input=(x, y))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(12)
-    @disableScriptTest()  # shape/type inference
     def test_nllloss(self):
         class NLLModel(torch.nn.Module):
             def __init__(self):
@@ -4752,8 +5125,8 @@
         target[target == 1] = -100
         self.run_test(NLLModel(), (input, target))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(12)
-    @disableScriptTest()  # shape/type inference
     def test_nllloss_2d_none(self):
         class NLLModel(torch.nn.Module):
             def __init__(self):
@@ -4774,8 +5147,8 @@
         target[target == 1] = -100
         self.run_test(NLLModel(), (input, target))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(12)
-    @disableScriptTest()  # shape/type inference
     def test_nllloss_2d_mean(self):
         class NLLModel(torch.nn.Module):
             def __init__(self):
@@ -4796,8 +5169,8 @@
         target[target == 1] = -100
         self.run_test(NLLModel(), (input, target))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(12)
-    @disableScriptTest()  # shape/type inference
     def test_nllloss_2d_sum(self):
         class NLLModel(torch.nn.Module):
             def __init__(self):
@@ -4818,8 +5191,8 @@
         target[target == 1] = -100
         self.run_test(NLLModel(), (input, target))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(12)
-    @disableScriptTest()  # shape/type inference
     def test_nllloss_2d_mean_weights(self):
         class NLLModel(torch.nn.Module):
             def __init__(self):
@@ -4840,8 +5213,8 @@
         target[target == 1] = -100
         self.run_test(NLLModel(), (input, target))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(12)
-    @disableScriptTest()  # shape/type inference
     def test_nllloss_2d_mean_ignore_index(self):
         class NLLModel(torch.nn.Module):
             def __init__(self):
@@ -4859,8 +5232,8 @@
         target = torch.empty(N, 8, 8, dtype=torch.long).random_(0, C)
         self.run_test(NLLModel(), (input, target))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(12)
-    @disableScriptTest()  # shape/type inference
     def test_nllloss_2d_mean_ignore_index_weights(self):
         class NLLModel(torch.nn.Module):
             def __init__(self):
@@ -4888,6 +5261,7 @@
         mat2 = torch.randn(3, 3)
         self.run_test(M(), input=(mat1, mat2))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)  # Because where op is not supported for opset < 9.
     def test_where_with_bool_tensor(self):
         class M(torch.nn.Module):
@@ -4899,6 +5273,7 @@
         mat2 = torch.ones(2, 3)
         self.run_test(M(), input=(mat1, mat2))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)  # Because where op is not supported for opset < 9.
     def test_where_with_byte_tensor(self):
         class M(torch.nn.Module):
@@ -5001,6 +5376,7 @@
         z = torch.ones(2, 3, 1)
         self.run_test(Model(), (x, y, z))
 
+    @skipIfUnsupportedOpsetVersion([13])
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_where_condition(self):
         class Model1(torch.nn.Module):
@@ -5035,6 +5411,49 @@
         x = torch.randn(1, 2, 3, requires_grad=True)
         self.run_test(EmptyBranchModel(), x)
 
+    def test_derive_index(self):
+        class MyModule(torch.nn.Module):
+            def forward(self, x: torch.Tensor):
+                j = []
+                for idx in range(len(x) - 1, -len(x), -2):
+                    y = x[idx]
+                    j += [x * y]
+                return j
+
+        x = torch.randn(5, 13)
+        self.run_test(MyModule(), x)
+
+        class MyModule(torch.nn.Module):
+            def forward(self, x: torch.Tensor):
+                j = []
+                for idx in range(-len(x), len(x) - 1, 2):
+                    y = x[idx]
+                    j += [x * y]
+                return j
+
+        x = torch.randn(5, 13)
+        self.run_test(MyModule(), x)
+
+        class MyModule(torch.nn.Module):
+            def forward(self, x: torch.Tensor):
+                j = []
+                for idx in range(len(x) - 1, -len(x), -3):
+                    y = x[idx]
+                    j += [x * y]
+                return j
+
+        self.run_test(MyModule(), x)
+
+        class MyModule(torch.nn.Module):
+            def forward(self, x: torch.Tensor):
+                j = []
+                for idx in range(-len(x), len(x) - 1, 3):
+                    y = x[idx]
+                    j += [x * y]
+                return j
+
+        self.run_test(MyModule(), x)
+
     @skipIfONNXShapeInference(False)
     @skipIfUnsupportedMinOpsetVersion(11)
     def test_if_transpose(self):
@@ -5051,6 +5470,25 @@
                       output_names=['output_1'],
                       dynamic_axes={'output_1': [0, 1]})
 
+    @skipIfONNXShapeInference(False)
+    @skipIfUnsupportedMinOpsetVersion(13)
+    @skipIfUnsupportedOpsetVersion([13])
+    def test_if_list(self):
+        class IfModel(torch.nn.Module):
+            def forward(self, x, y, cond):
+                res = []
+                if cond:
+                    res = res + [x]
+                else:
+                    res = res + [y]
+                # TODO: remove torch.stack once graph sequence output is supported.
+                return torch.stack(res)
+
+        x = torch.randn(2, 3)
+        y = torch.randn(3, 3)
+        cond = torch.tensor(1, dtype=torch.bool)
+        self.run_test(torch.jit.script(IfModel()), (x, y, cond))
+
     def test_onnx_proto_checker(self):
         class Model(torch.nn.Module):
             def __init__(self):
@@ -5226,6 +5664,7 @@
         x = torch.randn(6, 4, 3, 3)
         self.run_test(FakeQuantizePerTensorModel(), (x))
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_batchnorm_training(self):
         class MyModule(torch.nn.Module):
             def __init__(self):
@@ -5349,6 +5788,7 @@
 
         np.testing.assert_allclose(ratio_pytorch, ratio_ort, rtol=0.01, atol=0.01)
 
+    @skipIfUnsupportedOpsetVersion([13])
     def test_conv_bn(self):
         class MyModule(torch.nn.Module):
             def __init__(self):
@@ -5421,6 +5861,296 @@
         [np.testing.assert_allclose(ort_out1, ort_out2, atol=1e-7, rtol=0.001) for ort_out1, ort_out2 in
          zip(ort_outs1, ort_outs2)]
 
+    @skipIfUnsupportedOpsetVersion([13])
+    def test_initializer_sequence(self):
+        class MyModule(torch.nn.Module):
+            def __init__(self, input_size, hidden_size, num_classes):
+                super(MyModule, self).__init__()
+                self.fc1 = torch.nn.Linear(input_size, hidden_size)
+                self.relu = torch.nn.ReLU()
+                self.fc2 = torch.nn.Linear(hidden_size, num_classes)
+
+            def forward(self, x):
+                out = self.fc1(x)
+                out = self.relu(out)
+                out = self.fc2(out)
+                return out
+
+        test_model = MyModule(3, 4, 10)
+        state_dict_list = [k for (k, v) in test_model.state_dict().items()]
+        named_params_list = [k for (k, v) in test_model.named_parameters()]
+
+        x = torch.randn(32, 3)
+        f = io.BytesIO()
+        torch.onnx._export(test_model, (x,), f, _retain_param_name=True)
+        loaded_model = onnx.load_from_string(f.getvalue())
+
+        actual_list = [p.name for p in loaded_model.graph.initializer]
+        assert actual_list == state_dict_list, \
+            "Initializers' sequence is not as same as state_dict(). Expected: (" \
+            + ', '.join(state_dict_list) + "). Actual:(" + ', '.join(actual_list) + ")."
+        assert actual_list == named_params_list, \
+            "Initializers' sequence is not as same as named_parameters(). Expected: (" \
+            + ', '.join(named_params_list) + "). Actual:(" + ', '.join(actual_list) + ")."
+
+    def test_initializer_sequence_script_model(self):
+        def list_is_expected(short_list, long_list) -> bool:
+            if (len(short_list) > len(long_list)):
+                return False
+
+            for i in range(len(short_list)):
+                if (short_list[i] not in long_list[i]):
+                    return False
+
+            return True
+
+        def loop(x, y):
+            for i in range(int(y)):
+                x = x + i
+            return x
+
+        class MyModule(torch.nn.Module):
+            def __init__(self, input_size, hidden_size, num_classes):
+                super(MyModule, self).__init__()
+                self.fc1 = torch.nn.Linear(input_size, hidden_size)
+                self.relu = torch.nn.ReLU()
+                self.fc2 = torch.nn.Linear(hidden_size, num_classes)
+
+            def forward(self, x, y):
+                x = loop(x, y)
+                out = self.fc1(x)
+                out = self.relu(out)
+                out = self.fc2(out)
+                return out
+
+        test_model = torch.jit.script(MyModule(3, 4, 10))
+        state_dict_list = [k for (k, v) in test_model.state_dict().items()]
+        named_params_list = [k for (k, v) in test_model.named_parameters()]
+
+        x = torch.ones(2, 3, dtype=torch.float)
+        y = torch.tensor(5, dtype=torch.long)
+        example_output = (test_model(x, y),)
+        f = io.BytesIO()
+
+        torch.onnx.export(test_model, (x, y), f, example_outputs=example_output, _retain_param_name=True)
+        loaded_model = onnx.load_from_string(f.getvalue())
+
+        actual_list = [p.name for p in loaded_model.graph.initializer]
+        assert list_is_expected(state_dict_list, actual_list), \
+            "ScriptModel - Initializers' sequence is not as same as state_dict(). Expected: (" \
+            + ', '.join(state_dict_list) + "). Actual:(" + ', '.join(actual_list) + ")."
+        assert list_is_expected(named_params_list, actual_list), \
+            "ScriptModel - Initializers' sequence is not as same as named_parameters(). Expected: (" \
+            + ', '.join(named_params_list) + "). Actual:(" + ', '.join(actual_list) + ")."
+
+    @skipIfUnsupportedOpsetVersion([13])
+    @skipIfUnsupportedMinOpsetVersion(11)
+    def test_nms(self):
+        boxes = torch.rand(5, 4)
+        boxes[:, 2:] += torch.rand(5, 2)
+        scores = torch.randn(5)
+
+        class Module(torch.nn.Module):
+            def forward(self, boxes, scores):
+                return ops.nms(boxes, scores, 0.5)
+
+        self.run_test(Module(), (boxes, scores))
+
+    @skipIfUnsupportedOpsetVersion([13])
+    @skipIfUnsupportedMinOpsetVersion(11)
+    def test_clip_boxes_to_image(self):
+        boxes = torch.randn(5, 4) * 500
+        boxes[:, 2:] += boxes[:, :2]
+        size = torch.randn(200, 300)
+
+        size_2 = torch.randn(300, 400)
+
+        class Module(torch.nn.Module):
+            def forward(self, boxes, size):
+                return ops.boxes.clip_boxes_to_image(boxes, size.shape)
+
+        self.run_test(Module(), (boxes, size),
+                      input_names=["boxes", "size"],
+                      dynamic_axes={"size": [0, 1]},
+                      test_with_inputs=[(boxes, size_2)])
+
+    @skipIfUnsupportedOpsetVersion([13])
+    @skipIfUnsupportedMinOpsetVersion(11)
+    def test_roi_align(self):
+        x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
+        single_roi = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32)
+        model = ops.RoIAlign((5, 5), 1, 2)
+        self.run_test(model, (x, single_roi))
+
+    @skipIfUnsupportedOpsetVersion([13])
+    @skipIfUnsupportedMinOpsetVersion(11)
+    def test_roi_align_aligned(self):
+        x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
+        single_roi = torch.tensor([[0, 1.5, 1.5, 3, 3]], dtype=torch.float32)
+        model = ops.RoIAlign((5, 5), 1, 2, aligned=True)
+        self.run_test(model, (x, single_roi))
+
+        x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
+        single_roi = torch.tensor([[0, 0.2, 0.3, 4.5, 3.5]], dtype=torch.float32)
+        model = ops.RoIAlign((5, 5), 0.5, 3, aligned=True)
+        self.run_test(model, (x, single_roi))
+
+        x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
+        single_roi = torch.tensor([[0, 0.2, 0.3, 4.5, 3.5]], dtype=torch.float32)
+        model = ops.RoIAlign((5, 5), 1.8, 2, aligned=True)
+        self.run_test(model, (x, single_roi))
+
+        x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
+        single_roi = torch.tensor([[0, 0.2, 0.3, 4.5, 3.5]], dtype=torch.float32)
+        model = ops.RoIAlign((2, 2), 2.5, 0, aligned=True)
+        self.run_test(model, (x, single_roi))
+
+    @skipIfUnsupportedMinOpsetVersion(11)
+    def test_roi_pool(self):
+        x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
+        rois = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32)
+        pool_h = 5
+        pool_w = 5
+        model = ops.RoIPool((pool_h, pool_w), 2)
+        self.run_test(model, (x, rois))
+
+    @skipIfUnsupportedOpsetVersion([13])
+    @skipIfUnsupportedMinOpsetVersion(11)
+    def test_resize_images(self):
+        class TransformModule(torch.nn.Module):
+            def __init__(self):
+                super(TransformModule, self).__init__()
+                self.transform = _init_test_generalized_rcnn_transform()
+
+            def forward(self, images):
+                return self.transform.resize(images, None)[0]
+
+        input = torch.rand(3, 10, 20)
+        input_test = torch.rand(3, 100, 150)
+        self.run_test(TransformModule(), (input,),
+                      input_names=["input1"], dynamic_axes={"input1": [0, 1, 2]},
+                      test_with_inputs=[(input_test,)])
+
+    @skipIfUnsupportedOpsetVersion([13])
+    @skipIfUnsupportedMinOpsetVersion(11)
+    def test_transform_images(self):
+
+        class TransformModule(torch.nn.Module):
+            def __init__(self):
+                super(TransformModule, self).__init__()
+                self.transform = _init_test_generalized_rcnn_transform()
+
+            def forward(self, images):
+                return self.transform(images)[0].tensors
+
+        input = torch.rand(3, 100, 200), torch.rand(3, 200, 200)
+        input_test = torch.rand(3, 100, 200), torch.rand(3, 200, 200)
+        self.run_test(TransformModule(), (input,), test_with_inputs=[(input_test,)])
+
+    def get_features(self, images):
+        s0, s1 = images.shape[-2:]
+        features = [
+            ('0', torch.rand(2, 256, s0 // 4, s1 // 4)),
+            ('1', torch.rand(2, 256, s0 // 8, s1 // 8)),
+            ('2', torch.rand(2, 256, s0 // 16, s1 // 16)),
+            ('3', torch.rand(2, 256, s0 // 32, s1 // 32)),
+            ('4', torch.rand(2, 256, s0 // 64, s1 // 64)),
+        ]
+        features = OrderedDict(features)
+        return features
+
+    @skipIfUnsupportedOpsetVersion([13])
+    @skipIfUnsupportedMinOpsetVersion(11)
+    def test_rpn(self):
+        class RPNModule(torch.nn.Module):
+            def __init__(self):
+                super(RPNModule, self).__init__()
+                self.rpn = _init_test_rpn()
+
+            def forward(self, images, features):
+                images = ImageList(images, [i.shape[-2:] for i in images])
+                return self.rpn(images, features)
+
+        images = torch.rand(2, 3, 150, 150)
+        features = self.get_features(images)
+        images2 = torch.rand(2, 3, 80, 80)
+        test_features = self.get_features(images2)
+
+        model = RPNModule()
+        model.eval()
+        model(images, features)
+        self.run_test(model, (images, features),
+                      input_names=["input1", "input2", "input3", "input4", "input5", "input6"],
+                      dynamic_axes={"input1": [0, 1, 2, 3], "input2": [0, 1, 2, 3],
+                                    "input3": [0, 1, 2, 3], "input4": [0, 1, 2, 3],
+                                    "input5": [0, 1, 2, 3], "input6": [0, 1, 2, 3]},
+                      test_with_inputs=[(images2, test_features)],
+                      dict_check=False)
+
+    @skipIfUnsupportedOpsetVersion([13])
+    @skipIfUnsupportedMinOpsetVersion(11)
+    def test_multi_scale_roi_align(self):
+
+        class TransformModule(torch.nn.Module):
+            def __init__(self):
+                super(TransformModule, self).__init__()
+                self.model = ops.MultiScaleRoIAlign(['feat1', 'feat2'], 3, 2)
+                self.image_sizes = [(512, 512)]
+
+            def forward(self, input, boxes):
+                return self.model(input, boxes, self.image_sizes)
+
+        i = OrderedDict()
+        i['feat1'] = torch.rand(1, 5, 64, 64)
+        i['feat2'] = torch.rand(1, 5, 16, 16)
+        boxes = torch.rand(6, 4) * 256
+        boxes[:, 2:] += boxes[:, :2]
+
+        i1 = OrderedDict()
+        i1['feat1'] = torch.rand(1, 5, 64, 64)
+        i1['feat2'] = torch.rand(1, 5, 16, 16)
+        boxes1 = torch.rand(6, 4) * 256
+        boxes1[:, 2:] += boxes1[:, :2]
+
+        self.run_test(TransformModule(), (i, [boxes],), test_with_inputs=[(i1, [boxes1],)])
+
+    @skipIfUnsupportedOpsetVersion([13])
+    @skipIfUnsupportedMinOpsetVersion(11)
+    def test_roi_heads(self):
+        class RoiHeadsModule(torch.nn.Module):
+            def __init__(self):
+                super(RoiHeadsModule, self).__init__()
+                self.transform = _init_test_generalized_rcnn_transform()
+                self.rpn = _init_test_rpn()
+                self.roi_heads = _init_test_roi_heads_faster_rcnn()
+
+            def forward(self, images, features):
+                original_image_sizes = [img.shape[-2:] for img in images]
+                images = ImageList(images, [i.shape[-2:] for i in images])
+                proposals, _ = self.rpn(images, features)
+                detections, _ = self.roi_heads(features, proposals, images.image_sizes)
+                detections = self.transform.postprocess(detections,
+                                                        images.image_sizes,
+                                                        original_image_sizes)
+                return detections
+
+        images = torch.rand(2, 3, 100, 100)
+        features = self.get_features(images)
+        images2 = torch.rand(2, 3, 150, 150)
+        test_features = self.get_features(images2)
+
+        model = RoiHeadsModule()
+        model.eval()
+        model(images, features)
+
+        self.run_test(model, (images, features),
+                      input_names=["input1", "input2", "input3", "input4", "input5", "input6"],
+                      dynamic_axes={"input1": [0, 1, 2, 3], "input2": [0, 1, 2, 3], "input3": [0, 1, 2, 3],
+                                    "input4": [0, 1, 2, 3], "input5": [0, 1, 2, 3], "input6": [0, 1, 2, 3]},
+                      test_with_inputs=[(images2, test_features)],
+                      dict_check=False)
+
+
 def make_test(name, base, layer, bidirectional, initial_state,
               variable_length, dropout,
               **extra_kwargs):
@@ -5432,6 +6162,7 @@
 
     # Cannot export with older opsets because of 'ConstantFill' op
     # ConstantFill was a temp op removed at opset 8. This is no longer supported by onnxruntime
+    @skipIfUnsupportedOpsetVersion([13])
     @disableScriptTest()  # Test code not scriptable
     @skipIfUnsupportedMinOpsetVersion(9)
     def f(self):
@@ -5556,6 +6287,12 @@
                                     dict(TestONNXRuntime.__dict__, opset_version=12,
                                          keep_initializers_as_inputs=False))
 
+# opset 13 tests
+TestONNXRuntime_opset13 = type(str("TestONNXRuntime_opset13"),
+                               (unittest.TestCase,),
+                               dict(TestONNXRuntime.__dict__, opset_version=13,
+                                    keep_initializers_as_inputs=False,
+                                    onnx_shape_inference=True))
 
 # opset 9 tests, with use_new_jit_passes=True for using new jit API,
 # and with keep_initializers_as_inputs=False for IR version 4 style export.
diff --git a/tools/build_variables.bzl b/tools/build_variables.bzl
index 33a38d5..783cc40 100644
--- a/tools/build_variables.bzl
+++ b/tools/build_variables.bzl
@@ -530,6 +530,7 @@
     "torch/csrc/jit/passes/onnx/fixup_onnx_controlflow.cpp",
     "torch/csrc/jit/passes/onnx/list_model_parameters.cpp",
     "torch/csrc/jit/passes/onnx/function_substitution.cpp",
+    "torch/csrc/jit/passes/onnx/fold_if_node.cpp",
     "torch/csrc/jit/passes/onnx/helper.cpp",
     "torch/csrc/jit/passes/onnx/peephole.cpp",
     "torch/csrc/jit/passes/onnx/preprocess_for_onnx.cpp",
diff --git a/torch/_C/__init__.pyi.in b/torch/_C/__init__.pyi.in
index 26c57a6..18b3e72 100644
--- a/torch/_C/__init__.pyi.in
+++ b/torch/_C/__init__.pyi.in
@@ -258,7 +258,6 @@
 def _jit_pass_onnx_set_dynamic_input_shape(graph: Graph, dynamic_axes: Dict[str, Dict[_int, str]], input_names: List[str]) -> None: ...
 def _jit_pass_onnx_graph_shape_type_inference(graph: Graph, opset_version: _int) -> None: ...
 def _jit_pass_onnx_assign_output_shape(graph: Graph, tensors: List[Tensor], onnx_shape_inference: _bool = False) -> None: ...
-def _jit_pass_fixup_onnx_loop_node_inputs(n: Node) -> None: ...
 def _jit_pass_onnx_remove_inplace_ops_for_onnx(graph: Graph) -> None: ...
 def _jit_pass_remove_inplace_ops(graph: Graph) -> None: ...
 def _jit_pass_canonicalize_graph_fuser_ops(graph: Graph) -> None: ...
@@ -285,6 +284,7 @@
 def _jit_pass_onnx_peephole(graph: Graph, opset_version: _int, fixed_batch_size: _bool) -> None: ...
 def _jit_pass_dce_allow_deleting_nodes_with_side_effects(graph: Graph) -> None: ...
 def _jit_pass_onnx_function_substitution(graph: Graph) -> None: ...
+def _jit_pass_onnx_fold_if(graph: Graph) -> None: ...
 def _jit_pass_lower_graph(graph: Graph, m: Module) -> Tuple[Graph, List[IValue]]: ...
 def _jit_pass_inline_fork_wait(graph: Graph) -> None: ...
 def _jit_pass_onnx_eval_peephole(graph: Graph, paramsDict: Dict[str, IValue]) -> Dict[str, IValue]: ...
diff --git a/torch/csrc/jit/passes/onnx/fixup_onnx_controlflow.h b/torch/csrc/jit/passes/onnx/fixup_onnx_controlflow.h
index e2097cc..fad7611 100644
--- a/torch/csrc/jit/passes/onnx/fixup_onnx_controlflow.h
+++ b/torch/csrc/jit/passes/onnx/fixup_onnx_controlflow.h
@@ -5,7 +5,6 @@
 namespace torch {
 namespace jit {
 
-void FixupONNXLoopNodeInputs(Node* node);
 std::vector<Value*> FixupONNXControlflowNode(Node* n, int opset_version);
 
 } // namespace jit
diff --git a/torch/csrc/jit/passes/onnx/fold_if_node.cpp b/torch/csrc/jit/passes/onnx/fold_if_node.cpp
new file mode 100644
index 0000000..dbf24bc
--- /dev/null
+++ b/torch/csrc/jit/passes/onnx/fold_if_node.cpp
@@ -0,0 +1,269 @@
+#include <torch/csrc/jit/passes/onnx/fold_if_node.h>
+#include <torch/csrc/jit/passes/onnx/helper.h>
+#include <torch/csrc/jit/passes/onnx/scalar_type_analysis.h>
+#include <torch/torch.h>
+
+#include <c10/util/Optional.h>
+#include <algorithm>
+
+namespace torch {
+namespace jit {
+
+namespace onnx {
+using namespace ::c10::onnx;
+}
+
+// This function determines wheather If Node can be folded.
+static bool isStaticCondition(Node* node) {
+  TORCH_INTERNAL_ASSERT(
+      node->kind() == onnx::If || node->kind() == onnx::Not ||
+      node->kind() == onnx::Identity);
+  auto cast_node = node->input()->node();
+  if (cast_node->kind() != onnx::Cast)
+    cast_node = node;
+  auto prev_node = cast_node->input()->node();
+
+  if (prev_node->kind() == onnx::Not || prev_node->kind() == onnx::Identity ||
+      prev_node->kind() == onnx::If)
+    return isStaticCondition(prev_node);
+
+  auto compare_node = prev_node;
+  if (compare_node->kind() == onnx::Equal ||
+      compare_node->kind() == onnx::Greater ||
+      compare_node->kind() == onnx::Less ||
+      compare_node->kind() == onnx::GreaterOrEqual ||
+      compare_node->kind() == onnx::LessOrEqual) {
+    for (size_t i = 0; i < compare_node->inputs().size(); i++) {
+      auto sym = compare_node->inputs()[i]
+                     ->type()
+                     ->cast<TensorType>()
+                     ->symbolic_sizes();
+      if (!(compare_node->inputs()[i]->node()->kind() == onnx::Constant ||
+            compare_node->inputs()[i]->node()->kind() == onnx::Size ||
+            compare_node->inputs()[i]->node()->kind() == onnx::ReduceProd))
+        return false;
+      if (compare_node->inputs()[i]->node()->kind() != onnx::Constant) {
+        auto shape_node = compare_node->inputs()[i]->node()->input()->node();
+        auto shape =
+            shape_node->input()->type()->cast<TensorType>()->symbolic_sizes();
+
+        // ONNX shape and type inference cannot determine the shape of the input
+        if (!shape.rank())
+          return false;
+
+        // If dynamic_axes are used on inputs to ReduceProd node, don't fold If
+        // node
+        auto dynamic_axes = shape.isComplete();
+        if (!dynamic_axes &&
+            compare_node->inputs()[i]->node()->kind() == onnx::ReduceProd)
+          return false;
+      }
+    }
+    return true;
+  } else if (compare_node->kind() == onnx::Constant) {
+    return true;
+  }
+  return false;
+}
+
+// find index of the block output
+static c10::optional<int> findIndex(
+    c10::ArrayRef<torch::jit::Value*> outputs,
+    Value* input) {
+  c10::optional<int> idx = c10::nullopt;
+  for (size_t i = 0; i < outputs.size(); i++) {
+    if (input == outputs[i]) {
+      idx = i;
+      break;
+    }
+  }
+  return idx;
+}
+
+// This function returns the value of the constant-folded subblock
+// that is input to the If node.
+static bool constantFoldedConditionValue(Node* node) {
+  TORCH_INTERNAL_ASSERT(node->kind() == onnx::If);
+  // usually Cast node precedes If node in the graph, but
+  // there are some rare scenarios when that is not the case.
+  auto cast_node = node->input()->node();
+  if (cast_node->kind() != onnx::Cast)
+    cast_node = node;
+  auto prev_node = cast_node->input()->node();
+  if (prev_node->kind() == onnx::If) {
+    int cond = 1 - (int)constantFoldedConditionValue(prev_node);
+    Block* block = prev_node->blocks()[cond];
+    auto outputs = cast_node->input()->node()->outputs();
+    auto cast_input = cast_node->input();
+    int idx = findIndex(outputs, cast_input).value();
+    prev_node = block->outputs()[idx]->node();
+  }
+
+  if (prev_node->kind() == onnx::Constant) {
+    const at::Tensor& val = prev_node->t(attr::value);
+    return at::is_nonzero(val);
+  }
+
+  if (prev_node->kind() == onnx::Identity &&
+      prev_node->input()->node()->kind() == onnx::Constant) {
+    auto val = prev_node->input()->node()->t(attr::value);
+    return at::is_nonzero(val);
+  }
+
+  Node* compare_node = nullptr;
+  if (prev_node->kind() == onnx::Not) {
+    compare_node = prev_node->input()->node();
+  } else if (cast_node->inputs().size() > 0) {
+    compare_node = cast_node->input()->node();
+  }
+  TORCH_INTERNAL_ASSERT(compare_node != nullptr);
+  ScalarTypeAnalysisNodeForONNX(compare_node);
+  std::vector<at::Tensor> inputs;
+  for (size_t i = 0; i < compare_node->inputs().size(); i++) {
+    auto input_node = compare_node->inputs()[i]->node();
+    if (input_node->kind() == onnx::Constant) {
+      const at::Tensor& val = input_node->t(attr::value);
+      inputs.push_back(val);
+    } else { // input_node is either onnx::Size or onnx::ReduceProd
+      auto shape_node = input_node->input()->node();
+      auto shape =
+          shape_node->input()->type()->cast<TensorType>()->symbolic_sizes();
+
+      at::Tensor val;
+      if (input_node->kind() == onnx::Size) {
+        auto rank = shape.rank();
+        val = c10::scalar_to_tensor((int64_t)*rank);
+      } else if (input_node->kind() == onnx::ReduceProd) {
+        auto sizes = shape.sizes();
+        int64_t prod = 1;
+        for (int64_t i = 0; i < (int64_t)*shape.rank(); i++) {
+          auto dim = sizes.value()[i].static_size();
+          prod *= dim;
+        }
+        val = c10::scalar_to_tensor(prod);
+      }
+
+      inputs.push_back(val);
+    }
+  }
+
+  at::Tensor res;
+  if (compare_node->kind() == onnx::Equal) {
+    res = at::eq(inputs[0], inputs[1]);
+    if (prev_node->kind() == onnx::Not)
+      res = at::not_equal(inputs[0], inputs[1]);
+  } else if (
+      compare_node->kind() == onnx::Greater && prev_node->kind() != onnx::Not) {
+    res = at::greater(inputs[0], inputs[1]);
+  } else if (
+      (prev_node->kind() == onnx::Not && compare_node->kind() == onnx::Less) ||
+      compare_node->kind() == onnx::GreaterOrEqual) {
+    res = at::greater_equal(inputs[0], inputs[1]);
+  } else if (
+      compare_node->kind() == onnx::Less && prev_node->kind() != onnx::Not) {
+    res = at::less(inputs[0], inputs[1]);
+  } else if (
+      (prev_node->kind() == onnx::Not &&
+       compare_node->kind() == onnx::Greater) ||
+      compare_node->kind() == onnx::LessOrEqual) {
+    res = at::less_equal(inputs[0], inputs[1]);
+  } else {
+    TORCH_INTERNAL_ASSERT(
+        false, "Condition value of the If node could not be constant-folded!");
+  }
+
+  return at::is_nonzero(res);
+}
+
+// This pass return then or else branch of the If node depending on the
+// value of the constant-folded sublock that is input to the If node
+//
+// Example:
+// before post pass
+// graph(%y.2 : Int(3, 4, strides=[4, 1], requires_grad=0, device=cpu)):
+//   %4 : Long(2, strides=[1], device=cpu) = onnx::Shape(%y.2)
+//   %5 : Long(device=cpu) = onnx::Size(%4)
+//   %12 : Long(requires_grad=0, device=cpu) = onnx::Constant[value={2}]()
+//   %6 : Bool(device=cpu) = onnx::Equal(%5, %12)
+//   %11 : bool = onnx::Cast[to=9](%6)
+//   %7 : Int(3, 4, strides=[4, 1], device=cpu) = onnx::If(%11)
+//     block0():
+//       %13 : Int(requires_grad=0, device=cpu) = onnx::Constant[value={4}]()
+//       %8 : Int(3, 4, strides=[4, 1], device=cpu) = onnx::Add(%y.2, %13)
+//       %14 : Int(requires_grad=0, device=cpu) = onnx::Constant[value={2}]()
+//       %9 : Int(3, 4, strides=[4, 1], device=cpu) = onnx::Add(%8, %14)
+//       -> (%9)
+//     block1():
+//       %y.1 : Int(3, 4, strides=[4, 1], requires_grad=0, device=cpu) =
+//       onnx::Identity(%y.2)
+//       -> (%y.1)
+//   return (%7)
+
+// after post pass
+// graph(%y.2 : Int(3, 4, strides=[4, 1], requires_grad=0, device=cpu)):
+//   %4 : Long(2, strides=[1], device=cpu) = onnx::Shape(%y.2)
+//   %5 : Long(device=cpu) = onnx::Size(%4)
+//   %12 : Long(requires_grad=0, device=cpu) = onnx::Constant[value={2}]()
+//   %6 : Bool(device=cpu) = onnx::Equal(%5, %12)
+//   %11 : bool = onnx::Cast[to=9](%6)
+//   %13 : Int(requires_grad=0, device=cpu) = onnx::Constant[value={4}]()
+//   %8 : Int(3, 4, strides=[4, 1], device=cpu) = onnx::Add(%y.2, %13)
+//   %14 : Int(requires_grad=0, device=cpu) = onnx::Constant[value={2}]()
+//   %9 : Int(3, 4, strides=[4, 1], device=cpu) = onnx::Add(%8, %14)
+//   return (%9)
+
+static void foldIfNode(Block* b) {
+  for (auto it = b->nodes().begin(), end = b->nodes().end(); it != end; ++it) {
+    for (auto* child_block : it->blocks()) {
+      foldIfNode(child_block);
+    }
+    if (it->kind() == onnx::If) {
+      auto if_node = *it;
+      if (isStaticCondition(if_node)) {
+        Block* then_block = it->blocks()[0];
+        Block* else_block = it->blocks()[1];
+        Block* block = else_block;
+        if (constantFoldedConditionValue(if_node))
+          block = then_block;
+
+        std::vector<Node*> nodes_in_valid_path;
+        for (auto* valid_node : block->nodes()) {
+          nodes_in_valid_path.push_back(valid_node);
+        }
+        Node* cur = if_node;
+        for (auto* valid_node : nodes_in_valid_path) {
+          valid_node->moveAfter(cur);
+          cur = valid_node;
+        }
+        for (size_t i = 0; i < block->return_node()->inputs().size(); ++i) {
+          if_node->outputs()[i]->replaceAllUsesWith(
+              block->return_node()->inputs()[i]);
+        }
+        it->removeAllInputs();
+        it.destroyCurrent();
+      }
+    }
+  }
+}
+
+// This pass is folding If node when the condition (subblock) can be
+// constant-folded. Currently ONNX Runtime is doing Shape and Type Inference on
+// both branches of the If operator, regardless of which branch is executing in
+// Runtime. This can cause runtime errors in some cases:
+// 1. Condition of the If node is based on shape / size of the input
+// 2. then and else branch have different return types
+// Folding If node can prevent Runtime errors in ONNXRuntime.
+void FoldIfNodeONNX(Block* b) {
+  foldIfNode(b);
+}
+
+bool ConditionValueONNX(Node* n) {
+  return constantFoldedConditionValue(n);
+}
+
+bool IsStaticConditionONNX(Node* n) {
+  return isStaticCondition(n);
+}
+
+} // namespace jit
+} // namespace torch
diff --git a/torch/csrc/jit/passes/onnx/fold_if_node.h b/torch/csrc/jit/passes/onnx/fold_if_node.h
new file mode 100644
index 0000000..9180142
--- /dev/null
+++ b/torch/csrc/jit/passes/onnx/fold_if_node.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <torch/csrc/jit/ir/ir.h>
+
+namespace torch {
+namespace jit {
+
+void FoldIfNodeONNX(Block* b);
+bool ConditionValueONNX(Node* n);
+bool IsStaticConditionONNX(Node* n);
+
+} // namespace jit
+
+} // namespace torch
diff --git a/torch/csrc/jit/passes/onnx/peephole.cpp b/torch/csrc/jit/passes/onnx/peephole.cpp
index 3a0bc9a..8e23024 100644
--- a/torch/csrc/jit/passes/onnx/peephole.cpp
+++ b/torch/csrc/jit/passes/onnx/peephole.cpp
@@ -697,7 +697,8 @@
         // (%10)
         origLogSoftmaxNode = prev->input(0)->node();
         auto transpose = origLogSoftmaxNode->input(0)->node();
-        origLogSoftmaxNode->replaceInput(0, transpose->inputs().at(0));
+        if (transpose->inputs().size() > 0)
+          origLogSoftmaxNode->replaceInput(0, transpose->inputs().at(0));
       } else if (
           prev->kind() == onnx::Reshape &&
           prev->input(0)->node()->kind() == onnx::Transpose &&
@@ -749,9 +750,6 @@
           // onnx::Reshape(%35, %36) return (%37)
           auto nllloss_output = origNllLossNode->output(0)->uses()[0].user;
           TORCH_INTERNAL_ASSERT(nllloss_output->kind() == onnx::Reshape);
-          TORCH_INTERNAL_ASSERT(
-              nllloss_output->inputs()[1]->node()->kind() ==
-              prim::ListConstruct);
           // make output of reshape the output of nllloss
           nllloss_output->replaceAllUsesWith(origNllLossNode);
           origNllLossNode->output(0)->copyMetadata(nllloss_output->output(0));
diff --git a/torch/csrc/jit/passes/onnx/scalar_type_analysis.cpp b/torch/csrc/jit/passes/onnx/scalar_type_analysis.cpp
index 22a0f67..cffa3a7 100644
--- a/torch/csrc/jit/passes/onnx/scalar_type_analysis.cpp
+++ b/torch/csrc/jit/passes/onnx/scalar_type_analysis.cpp
@@ -271,21 +271,25 @@
       CreateProfiledTensorTypeWithScalarType(output_tensor_type, scalar_type));
 }
 
+static void ImplicitCastNodeForONNX(Node* n) {
+  if (IsImplicitCastSupported(n->kind())) {
+    auto expected_scalar_type = InferExpectedScalarType(n);
+    if (expected_scalar_type) {
+      UpdateScalarTypeForInputs(n, *expected_scalar_type);
+      if (!IsComparisonOp(n->kind())) {
+        UpdateScalarTypeForOutput(n, *expected_scalar_type);
+      }
+    }
+  }
+}
+
 static void ImplicitCastForONNX(Block* block) {
   for (auto it = block->nodes().begin(); it != block->nodes().end(); ++it) {
     for (auto sub : it->blocks()) {
       ImplicitCastForONNX(sub);
     }
 
-    if (IsImplicitCastSupported(it->kind())) {
-      auto expected_scalar_type = InferExpectedScalarType(*it);
-      if (expected_scalar_type) {
-        UpdateScalarTypeForInputs(*it, *expected_scalar_type);
-        if (!IsComparisonOp(it->kind())) {
-          UpdateScalarTypeForOutput(*it, *expected_scalar_type);
-        }
-      }
-    }
+    ImplicitCastNodeForONNX(*it);
   }
   EliminateDeadCode(
       block, true, DCESideEffectPolicy::ALLOW_DELETING_NODES_WITH_SIDE_EFFECTS);
@@ -296,5 +300,9 @@
   ImplicitCastForONNX(graph->block());
 }
 
+void ScalarTypeAnalysisNodeForONNX(Node* n) {
+  ImplicitCastNodeForONNX(n);
+}
+
 } // namespace jit
 } // namespace torch
diff --git a/torch/csrc/jit/passes/onnx/scalar_type_analysis.h b/torch/csrc/jit/passes/onnx/scalar_type_analysis.h
index 65fc227..8a39085 100644
--- a/torch/csrc/jit/passes/onnx/scalar_type_analysis.h
+++ b/torch/csrc/jit/passes/onnx/scalar_type_analysis.h
@@ -6,6 +6,7 @@
 namespace jit {
 
 TORCH_API void ScalarTypeAnalysisForONNX(const std::shared_ptr<Graph>& graph);
+void ScalarTypeAnalysisNodeForONNX(Node* n);
 
 } // namespace jit
-} // namespace torch
\ No newline at end of file
+} // namespace torch
diff --git a/torch/csrc/jit/passes/onnx/shape_type_inference.cpp b/torch/csrc/jit/passes/onnx/shape_type_inference.cpp
index 4c2bf32..89ef8f5 100644
--- a/torch/csrc/jit/passes/onnx/shape_type_inference.cpp
+++ b/torch/csrc/jit/passes/onnx/shape_type_inference.cpp
@@ -1,6 +1,7 @@
 #include <torch/csrc/jit/passes/onnx/shape_type_inference.h>
 
 #include <torch/csrc/jit/jit_log.h>
+#include <torch/csrc/jit/passes/onnx/fold_if_node.h>
 #include <torch/csrc/jit/passes/onnx/helper.h>
 #include <torch/csrc/jit/passes/onnx/scalar_type_analysis.h>
 #include <torch/csrc/jit/serialization/export.h>
@@ -199,78 +200,89 @@
   return true;
 }
 
+Value* CloneValueFromListConstruct(Value* v, std::shared_ptr<Graph> n_graph) {
+  auto lc_node = v->node();
+  TORCH_INTERNAL_ASSERT(lc_node->kind() == ::c10::prim::ListConstruct);
+  // In jit/passes/onnx/peephole.cpp::eraseListConstruct,
+  // prim::ListConstruct is converted to onnx::Concat. The conversion should
+  // eventually be moved to symbolic. For now, treat this operator as
+  // special case, and change from list type to tensor type. The scalar type
+  // is preserved. If the elemtype is Int, insert a onnx::Concat node into
+  // the graph.
+  TypePtr elem = v->type()->cast<ListType>()->getElementType();
+  c10::optional<at::ScalarType> scalar_type = c10::nullopt;
+  if (elem->cast<IntType>()) {
+    scalar_type = at::kLong;
+
+    auto lc_node = v->node();
+    // ListConstruct Int[] output case, we need to transform to ONNX
+    // Concat to ensure the output is a single tensor(dynamic) type in
+    // order to be consumed as inputs
+    std::vector<Value*> unsqueezed;
+    for (auto* input : lc_node->inputs()) {
+      Node* unsqueezed_node =
+          n_graph->insertNode(n_graph->create(::c10::onnx::Unsqueeze, 1));
+      auto new_input = n_graph->addInput();
+      new_input->copyMetadata(input);
+      unsqueezed_node->addInput(new_input);
+      unsqueezed_node->is_(attr::axes, {0});
+      unsqueezed.emplace_back(unsqueezed_node->output());
+    }
+    Node* concat_node =
+        n_graph->insertNode(n_graph->create(::c10::onnx::Concat, 1));
+    concat_node->i_(attr::axis, 0);
+    for (auto v : unsqueezed) {
+      concat_node->addInput(v);
+    }
+    return concat_node->output();
+  } else if (elem->cast<FloatType>()) {
+    scalar_type = at::kFloat;
+  } else if (elem->cast<BoolType>()) {
+    scalar_type = at::kBool;
+  } else if (auto t_type = elem->cast<TensorType>()) {
+    scalar_type = t_type->scalarType();
+  }
+
+  auto input = n_graph->addInput();
+  if (scalar_type) {
+    auto v_type = TensorType::create(
+        scalar_type.value(),
+        at::kCPU,
+        c10::SymbolicShape(),
+        c10::VaryingShape<c10::Stride>{},
+        {});
+    input->setType(v_type);
+  }
+  return input;
+}
+
 // Clone the node n for the new graph.
 Node* CloneNodeToGraph(Node* n, std::shared_ptr<Graph> n_graph) {
   auto clone_node = n_graph->createClone(n, [&n_graph](Value* v) {
     auto v_n = v->node();
-    if (v_n->kind() == ::c10::onnx::Constant) {
-      // Clone the input if it is constant.
-      auto constant_n = n_graph->insertNode(
-          n_graph->createClone(v_n, [](Value* v) { return v; }));
-      return constant_n->output();
-    } else if (v_n->kind() == ::c10::prim::ListConstruct) {
-      // In jit/passes/onnx/peephole.cpp::eraseListConstruct,
-      // prim::ListConstruct is converted to onnx::Concat. The conversion should
-      // eventually be moved to symbolic. For now, treat this operator as
-      // special case, and change from list type to tensor type. The scalar type
-      // is preserved. If the elemtype is Int, insert a onnx::Concat node into
-      // the graph.
-      TypePtr elem = v->type()->cast<ListType>()->getElementType();
-      c10::optional<at::ScalarType> scalar_type = c10::nullopt;
-      if (elem->cast<IntType>()) {
-        scalar_type = at::kLong;
-
-        auto lc_node = v->node();
-        // ListConstruct Int[] output case, we need to transform to ONNX
-        // Concat to ensure the output is a single tensor(dynamic) type in
-        // order to be consumed as inputs
-        std::vector<Value*> unsqueezed;
-        for (auto* input : lc_node->inputs()) {
-          Node* unsqueezed_node =
-              n_graph->insertNode(n_graph->create(::c10::onnx::Unsqueeze, 1));
-          auto new_input = n_graph->addInput();
-          new_input->copyMetadata(input);
-          unsqueezed_node->addInput(new_input);
-          unsqueezed_node->is_(attr::axes, {0});
-          unsqueezed.emplace_back(unsqueezed_node->output());
-        }
-        Node* concat_node =
-            n_graph->insertNode(n_graph->create(::c10::onnx::Concat, 1));
-        concat_node->i_(attr::axis, 0);
-        for (auto v : unsqueezed) {
-          concat_node->addInput(v);
-        }
-        return concat_node->output();
-      } else if (elem->cast<FloatType>()) {
-        scalar_type = at::kFloat;
-      } else if (elem->cast<BoolType>()) {
-        scalar_type = at::kBool;
-      } else if (auto t_type = elem->cast<TensorType>()) {
-        scalar_type = t_type->scalarType();
+    switch (v_n->kind()) {
+      case ::c10::onnx::Constant: {
+        // Clone the input if it is constant.
+        auto constant_n = n_graph->insertNode(
+            n_graph->createClone(v_n, [](Value* v) { return v; }));
+        return constant_n->output();
       }
-
-      auto input = n_graph->addInput();
-      if (scalar_type) {
-        auto v_type = TensorType::create(
-            scalar_type.value(),
-            at::kCPU,
-            c10::SymbolicShape(),
-            c10::VaryingShape<c10::Stride>{},
-            {});
-        input->setType(v_type);
+      case ::c10::prim::ListConstruct: {
+        return CloneValueFromListConstruct(v, n_graph);
       }
-      return input;
-    } else if (v_n->kind() == ::c10::prim::PackPadded) {
-      auto input = n_graph->addInput();
-      input->copyMetadata(v_n->input(0));
-      return input;
-    } else {
-      // If the input is not constant, we cannot depend on its value
-      // in shape inference. Set it to graph input in the new graph,
-      // and copy over metadata, such as datatype and shape.
-      auto input = n_graph->addInput();
-      input->copyMetadata(v);
-      return input;
+      case ::c10::prim::PackPadded: {
+        auto input = n_graph->addInput();
+        input->copyMetadata(v_n->input(0));
+        return input;
+      }
+      default: {
+        // If the input is not constant, we cannot depend on its value
+        // in shape inference. Set it to graph input in the new graph,
+        // and copy over metadata, such as datatype and shape.
+        auto input = n_graph->addInput();
+        input->copyMetadata(v);
+        return input;
+      }
     }
   });
   return clone_node;
@@ -325,9 +337,41 @@
   }
 }
 
+// this function checks wheather the blocks of If node have the same return
+// type.
+bool IsBlockReturnTypeSame(Node* n) {
+  TORCH_INTERNAL_ASSERT(n->kind() == ::c10::onnx::If);
+  auto then_block = n->blocks()[0];
+  auto else_block = n->blocks()[1];
+  for (size_t i = 0; i < n->outputs().size(); i++) {
+    // check the type
+    auto then_block_type = then_block->outputs()[i]->type();
+    auto else_block_type = else_block->outputs()[i]->type();
+    if (then_block_type->cast<TensorType>() &&
+        else_block_type->cast<TensorType>()) {
+      if (then_block_type->cast<TensorType>()->scalarType() !=
+          else_block_type->cast<TensorType>()->scalarType()) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
 // Any additional post process that are specific to individual node kind.
 void SpecialPostProcess(Node* n) {
   switch (n->kind()) {
+    case ::c10::onnx::If: {
+      if (!IsBlockReturnTypeSame(n) && IsStaticConditionONNX(n)) {
+        auto cond = ConditionValueONNX(n);
+        auto block_idx = cond ? 0 : 1;
+        for (size_t i = 0; i < n->outputs().size(); i++) {
+          n->outputs()[i]->setType(
+              n->blocks()[block_idx]->outputs()[i]->type());
+        }
+      }
+      break;
+    }
     case ::c10::onnx::SequenceInsert: {
       // Special case when input sequence to SequenceInsert is empty.
       // onnx Sequence type requires element type to be set.
@@ -378,6 +422,26 @@
   }
 }
 
+void FetchBlockInputMetadataFromParent(Block* b) {
+  auto n = b->owningNode();
+  if (nullptr != n && n->kind() == ::c10::onnx::Loop) {
+    // Copy node input metadata to subgraph input.
+    for (size_t i = 0; i < n->inputs().size(); ++i) {
+      b->inputs().at(i)->copyMetadata(n->inputs().at(i));
+    }
+  }
+}
+
+void ONNXShapeTypeInference(Block* b, int opset_version) {
+  FetchBlockInputMetadataFromParent(b);
+  for (auto n : b->nodes()) {
+    for (auto subblock : n->blocks()) {
+      ONNXShapeTypeInference(subblock, opset_version);
+    }
+    ONNXShapeTypeInference(n, opset_version);
+  }
+}
+
 } // namespace
 
 void ONNXShapeTypeInference(Node* n, int opset_version) {
@@ -464,7 +528,10 @@
         continue;
       }
 
-      auto shape = input_tensor_type->symbolic_sizes().sizes().value();
+      auto shape_ref = input_tensor_type->symbolic_sizes().sizes();
+      TORCH_CHECK(
+          shape_ref.has_value(), "Input tensor shape should have value.");
+      auto shape = shape_ref.value();
 
       for (auto pair : axes_names) {
         auto axis = pair.first;
@@ -472,6 +539,10 @@
         if (name_to_sym.find(name) == name_to_sym.end()) {
           name_to_sym[name] = ::c10::ShapeSymbol::newSymbol();
         }
+        TORCH_CHECK(
+            axis < shape.size(),
+            "Dynamic shape axis should be no more than the shape dimension for ",
+            name);
         shape[axis] = name_to_sym[name];
       }
 
@@ -497,9 +568,7 @@
 }
 
 void ONNXShapeTypeInference(std::shared_ptr<Graph>& graph, int opset_version) {
-  for (auto n : graph->nodes()) {
-    ONNXShapeTypeInference(n, opset_version);
-  }
+  ONNXShapeTypeInference(graph->block(), opset_version);
 }
 
 } // namespace jit
diff --git a/torch/csrc/jit/python/init.cpp b/torch/csrc/jit/python/init.cpp
index 99a0330..8a3bdba 100644
--- a/torch/csrc/jit/python/init.cpp
+++ b/torch/csrc/jit/python/init.cpp
@@ -40,6 +40,7 @@
 #include <torch/csrc/jit/passes/onnx/eliminate_unused_items.h>
 #include <torch/csrc/jit/passes/onnx/eval_peephole.h>
 #include <torch/csrc/jit/passes/onnx/fixup_onnx_controlflow.h>
+#include <torch/csrc/jit/passes/onnx/fold_if_node.h>
 #include <torch/csrc/jit/passes/onnx/function_substitution.h>
 #include <torch/csrc/jit/passes/onnx/list_model_parameters.h>
 #include <torch/csrc/jit/passes/onnx/peephole.h>
@@ -153,6 +154,11 @@
       .def("_jit_pass_lower_all_tuples", LowerAllTuples)
       .def("_jit_pass_onnx_function_substitution", ONNXFunctionCallSubstitution)
       .def(
+          "_jit_pass_onnx_fold_if",
+          [](std::shared_ptr<Graph>& graph) {
+            return FoldIfNodeONNX(graph->block());
+          })
+      .def(
           "_jit_pass_onnx_peephole",
           [](std::shared_ptr<Graph>& graph,
              int opset_version,
@@ -478,7 +484,6 @@
           })
       .def("_jit_pass_onnx_block", BlockToONNX)
       .def("_jit_pass_fixup_onnx_controlflow_node", FixupONNXControlflowNode)
-      .def("_jit_pass_fixup_onnx_loop_node_inputs", FixupONNXLoopNodeInputs)
       .def("_jit_pass_canonicalize_graph_fuser_ops", CanonicalizeOps)
       .def("_jit_pass_decompose_ops", DecomposeOps)
       .def("_jit_pass_specialize_autogradzero", specializeAutogradZero)
@@ -955,7 +960,7 @@
           "get_record",
           [](PyTorchStreamReader& self, const std::string& key) {
             at::DataPtr data;
-            size_t size;
+            size_t size = 0;
             std::tie(data, size) = self.getRecord(key);
             return py::bytes(reinterpret_cast<const char*>(data.get()), size);
           })
@@ -1210,7 +1215,7 @@
       auto fork_node = graph->insertNode(graph->create(prim::TracedFork, 1));
       auto body_block = fork_node->addBlock();
 
-      Value* node_output;
+      Value* node_output = nullptr;
       py::object py_func_output;
       // Insert new trace ops into the fork op's sub-block
       WithInsertPoint guard(body_block);
diff --git a/torch/csrc/jit/python/python_arg_flatten.cpp b/torch/csrc/jit/python/python_arg_flatten.cpp
index adb77ea..ea7fac0 100644
--- a/torch/csrc/jit/python/python_arg_flatten.cpp
+++ b/torch/csrc/jit/python/python_arg_flatten.cpp
@@ -67,9 +67,9 @@
     args.desc.structure.push_back(D::NoneType);
   } else {
     std::string msg =
-        "Only tuples, lists and Variables supported as JIT inputs/outputs. "
-        "Dictionaries and strings are also accepted but their usage is not "
-        "recommended. But got unsupported type ";
+        "Only tuples, lists and Variables are supported as JIT inputs/outputs. "
+        "Dictionaries and strings are also accepted, but their usage is not "
+        "recommended. Here, received an input of unsupported type: ";
     msg += THPUtils_typename(obj);
     throw std::runtime_error(msg);
   }
diff --git a/torch/csrc/jit/serialization/export.cpp b/torch/csrc/jit/serialization/export.cpp
index 2afccb3..ae0f41f 100644
--- a/torch/csrc/jit/serialization/export.cpp
+++ b/torch/csrc/jit/serialization/export.cpp
@@ -221,6 +221,14 @@
       bool use_external_data_format = false,
       const std::string& onnx_file_path = std::string());
 
+  void AddInitializersIntoGraphProto(
+      onnx::GraphProto* graph_proto,
+      const Block* block,
+      const std::map<std::string, at::Tensor>& initializers =
+          std::map<std::string, at::Tensor>(),
+      bool use_external_data_format = false,
+      const std::string& onnx_file_path = std::string());
+
   virtual void EncodeTensor(
       onnx::TensorProto* tensor_proto,
       const at::Tensor& tensor,
@@ -553,14 +561,33 @@
           onnx_file_path);
     }
   }
+  AddInitializersIntoGraphProto(
+      graph_proto,
+      block,
+      initializers,
+      use_external_data_format,
+      onnx_file_path);
+}
+
+void EncoderBase::AddInitializersIntoGraphProto(
+    onnx::GraphProto* graph_proto,
+    const Block* block,
+    const std::map<std::string, at::Tensor>& initializers,
+    bool use_external_data_format,
+    const std::string& onnx_file_path) {
   AT_ASSERT(block->inputs().size() >= initializers.size());
-  for (auto& name_tensor_pair : initializers) {
+
+  for (auto input : block->inputs()) {
+    auto name_tensor_pair = initializers.find(input->debugName());
+    if (name_tensor_pair == initializers.end()) {
+      continue;
+    }
     auto p = graph_proto->add_initializer();
-    p->set_name(name_tensor_pair.first);
+    p->set_name(name_tensor_pair->first);
     EncodeTensor(
         p,
-        name_tensor_pair.second,
-        name_tensor_pair.first,
+        name_tensor_pair->second,
+        name_tensor_pair->first,
         use_external_data_format,
         onnx_file_path);
   }
diff --git a/torch/onnx/__init__.py b/torch/onnx/__init__.py
index e1ee00d..5b3aaea 100644
--- a/torch/onnx/__init__.py
+++ b/torch/onnx/__init__.py
@@ -173,7 +173,7 @@
             opset version of the onnx submodule. Since ONNX's latest opset may
             evolve before next stable release, by default we export to one stable
             opset version. Right now, supported stable opset version is 9.
-            The opset_version must be _onnx_master_opset or in _onnx_stable_opsets
+            The opset_version must be _onnx_main_opset or in _onnx_stable_opsets
             which are defined in torch/onnx/symbolic_helper.py
         do_constant_folding (bool, default False): If True, the constant-folding
             optimization is applied to the model during export. Constant-folding
diff --git a/torch/onnx/symbolic_helper.py b/torch/onnx/symbolic_helper.py
index 4cc3f47..187dcfc 100644
--- a/torch/onnx/symbolic_helper.py
+++ b/torch/onnx/symbolic_helper.py
@@ -553,7 +553,7 @@
 
 
 _default_onnx_opset_version = 9
-_onnx_master_opset = 10
+_onnx_main_opset = 13
 _onnx_stable_opsets = [7, 8, 9, 10, 11, 12]
 _export_onnx_opset_version = _default_onnx_opset_version
 
@@ -563,7 +563,7 @@
     if opset_version == _default_onnx_opset_version:
         _export_onnx_opset_version = opset_version
         return
-    if opset_version in _onnx_stable_opsets + [_onnx_master_opset]:
+    if opset_version in _onnx_stable_opsets + [_onnx_main_opset]:
         _export_onnx_opset_version = opset_version
         return
     raise ValueError("Unsupported ONNX opset version: " + str(opset_version))
diff --git a/torch/onnx/symbolic_opset11.py b/torch/onnx/symbolic_opset11.py
index 8a729d2..85c7bf9 100644
--- a/torch/onnx/symbolic_opset11.py
+++ b/torch/onnx/symbolic_opset11.py
@@ -898,6 +898,7 @@
         return sym_help._onnx_unsupported('embedding_bag with scale_grad_by_freq for training mode')
 
     loop_condition = g.op("Constant", value_t=torch.tensor(1))
+    loop_condition = g.op("Cast", loop_condition, to_i=9)
     zero = g.op("Constant", value_t=torch.tensor([0]))
 
     indices_len = g.op("Unsqueeze",
@@ -918,6 +919,7 @@
 
     loop_block = _add_block(loop.node())
     block_input_iter = _add_input_to_block(loop_block)
+    cond = _add_input_to_block(loop_block)
 
     indices_start = loop_block.op("Gather", offsets_starts, block_input_iter, axis_i=0)
     indices_end = loop_block.op("Gather", offsets_ends, block_input_iter, axis_i=0)
@@ -940,10 +942,9 @@
     else:
         embeddings = loop_block.op("ReduceMax", embeddings, axes_i=[0], keepdims_i=0)
 
-    _add_output_to_block(loop_block, loop_condition)
+    cond_out = loop_block.op("Cast", loop_condition, to_i=9)
+    _add_output_to_block(loop_block, cond_out)
     _add_output_to_block(loop_block, embeddings)
-    # This pass does all required type casting for loop inputs (condition and iter)
-    torch._C._jit_pass_fixup_onnx_loop_node_inputs(loop.node())
 
     # aten::embedding_bag returns a tuple of 4 elements: output, offset2bag, bag_size, max_indices.
     # But the last three outputs are not used in torch.nn.EmbeddingBag or torch.nn.functional.embedding_bag.
diff --git a/torch/onnx/symbolic_opset12.py b/torch/onnx/symbolic_opset12.py
index e3a6c41..cd67fd5 100644
--- a/torch/onnx/symbolic_opset12.py
+++ b/torch/onnx/symbolic_opset12.py
@@ -1,7 +1,8 @@
 
 import torch
 import torch.onnx.symbolic_helper as sym_help
-from torch.onnx.symbolic_helper import parse_args, _parse_arg
+from torch.onnx.symbolic_helper import parse_args, _parse_arg, _unimplemented
+from torch.onnx.utils import _add_block, _add_input_to_block, _add_output_to_block
 
 
 # EDITING THIS FILE? READ THIS FIRST!
@@ -92,3 +93,63 @@
 
 def le(g, input, other):
     return g.op('LessOrEqual', input, other)
+
+@parse_args('v', 'i', 'v', 'v')
+def unfold(g, input, dimension, size, step):
+    size = sym_help._maybe_get_const(size, 'i')
+    step = sym_help._maybe_get_const(step, 'i')
+    if not sym_help._is_value(size) and not sym_help._is_value(step):
+        from torch.onnx.symbolic_opset9 import unfold as _unfold
+        return _unfold(g, input, dimension, size, step)
+    if sym_help._operator_export_type == torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK:
+        return g.op("ATen", input, operator_s="unfold", dimension_i=dimension, size_i=size, step_i=step)
+
+    sizedim = sym_help._get_tensor_dim_size(input, dimension)
+    if sizedim is not None:
+        low_start = g.op("Constant", value_t=torch.tensor(0))
+        low_end = g.op("Constant", value_t=torch.tensor(sizedim))
+        hi_end = g.op("Constant", value_t=torch.tensor(sizedim + 1))
+        low_indices = g.op("Range", low_start, low_end, step)
+        hi_indices = g.op("Range", size, hi_end, step)
+
+        low_size = sym_help._size_helper(g, low_indices, g.op("Constant", value_t=torch.tensor(0)))
+        hi_size = sym_help._size_helper(g, hi_indices, g.op("Constant", value_t=torch.tensor(0)))
+
+        ndim = sym_help._get_tensor_rank(input)
+        perm = list(range(0, ndim))
+        perm.append(perm.pop(dimension))
+
+        unsqueeze_list = []
+        loop_condition = g.op("Constant", value_t=torch.tensor(1))
+        loop_condition = g.op("Cast", loop_condition, to_i=9)
+        loop_len = g.op("Min", low_size, hi_size)
+        loop = g.op("Loop", loop_len, loop_condition)
+
+        loop_block = _add_block(loop.node())
+        block_input_iter = _add_input_to_block(loop_block)
+        cond = _add_input_to_block(loop_block)
+
+        starts = loop_block.op("Gather", low_indices, block_input_iter)
+        ends = loop_block.op("Gather", hi_indices, block_input_iter)
+        axes = loop_block.op("Constant", value_t=torch.tensor([2]))
+        starts = loop_block.op("Unsqueeze", starts, axes_i=[0])
+        ends = loop_block.op("Unsqueeze", ends, axes_i=[0])
+        stack = loop_block.op("Slice", input, starts, ends, axes)
+
+        unsqueeze = loop_block.op("Unsqueeze", loop_block.op("Transpose", stack, perm_i=perm), axes_i=[dimension])
+        unsqueeze_list.append(unsqueeze)
+        concat = loop_block.op("Concat", *unsqueeze_list, axis_i=0)
+
+        cond_out = loop_block.op("Cast", loop_condition, to_i=9)
+        _add_output_to_block(loop_block, cond_out)
+        _add_output_to_block(loop_block, concat)
+
+        loop_output = loop.node().output()
+        perm = [0, 1, 2, 3, 4]
+        perm[0], perm[dimension + 1] = perm[dimension + 1], perm[0]
+        transpose = g.op("Transpose", loop_output, perm_i=perm)
+        squeeze = g.op("Squeeze", transpose, axes_i=[0])
+
+        return squeeze
+    else:
+        return _unimplemented("Unfold", "input size not accessible")
diff --git a/torch/onnx/symbolic_opset13.py b/torch/onnx/symbolic_opset13.py
new file mode 100644
index 0000000..001a201
--- /dev/null
+++ b/torch/onnx/symbolic_opset13.py
@@ -0,0 +1,110 @@
+# EDITING THIS FILE? READ THIS FIRST!
+# see Note [Edit Symbolic Files] in symbolic_helper.py
+
+# This file exports ONNX ops for opset 13
+from torch.onnx.symbolic_helper import _block_list_in_opset
+import torch
+import torch.onnx.symbolic_helper as sym_help
+from torch.onnx.symbolic_helper import parse_args
+
+block_listed_operators = ['embedding_bag']
+
+for block_listed_op in block_listed_operators:
+    vars()[block_listed_op] = _block_list_in_opset(block_listed_op)
+
+
+@parse_args('v', 'i', 'none')
+def softmax(g, input, dim, dtype=None):
+    softmax = g.op('Softmax', input, axis_i=dim)
+    if dtype and dtype.node().kind() != 'prim::Constant':
+        parsed_dtype = sym_help._get_const(dtype, 'i', 'dtype')
+        softmax = g.op("Cast", softmax, to_i=sym_help.scalar_type_to_onnx[parsed_dtype])
+
+    return softmax
+
+
+@parse_args('v', 'i', 'none')
+def log_softmax(g, input, dim, dtype=None):
+    return_op = g.op("LogSoftmax", input, axis_i=dim)
+    if dtype and dtype.node().kind() != 'prim::Constant':
+        parsed_dtype = sym_help._get_const(dtype, 'i', 'dtype')
+        return_op = g.op("Cast", return_op, to_i=sym_help.scalar_type_to_onnx[parsed_dtype])
+    return return_op
+
+
+@parse_args('v', 'v', 'i')
+def frobenius_norm(g, self, dim=None, keepdim=False):
+    dim_val = sym_help._maybe_get_const(dim, 'is')
+    if not sym_help._is_value(dim_val) and len(dim_val) == 0:
+        return g.op("ReduceL2", self, keepdims_i=0)
+    sqr = g.op('Mul', self, self)
+    sumsqr = g.op('ReduceSum', sqr, dim, keepdims_i=keepdim)
+    return g.op('Sqrt', sumsqr)
+
+
+@parse_args('v', 'v', 'i', 'i')
+def split(g, self, split_size_or_sizes, dim, _outputs=None):
+    if not sym_help._is_split_static(split_size_or_sizes, _outputs):
+        split_out = g.op("SplitToSequence", self, split_size_or_sizes, axis_i=dim)
+        if _outputs is None:
+            return split_out
+        # Convert to multiple slice nodes iff number of splits and number of outputs are statically known.
+        if sym_help._is_packed_list(split_size_or_sizes) and \
+                len(sym_help._unpack_list(split_size_or_sizes)) == _outputs:
+            split_sizes = [g.op("Unsqueeze", v, g.op("Constant", value_t=torch.tensor([0])))
+                           for v in sym_help._unpack_list(split_size_or_sizes)]
+            start = g.op("Constant", value_t=torch.tensor([0], dtype=torch.long))
+            axis = g.op("Constant", value_t=torch.tensor([dim], dtype=torch.long))
+            res = []
+            for i in range(_outputs):
+                end = g.op("Add", start, split_sizes[i])  # split_sizes is a list of same length as _outputs
+                res.append(g.op("Slice", self, start, end, axis))
+                start = end
+            return res
+        return [g.op("SequenceAt", split_out, g.op("Constant", value_t=torch.tensor([i], dtype=torch.long)))
+                for i in range(_outputs)]
+
+    split_val = split_size_or_sizes.node()['value']
+    if split_val.dim() > 0:
+        return g.op("Split", self, split_size_or_sizes, axis_i=dim, outputs=_outputs)
+    split_size = sym_help._get_const(split_size_or_sizes, 'i', 'split_size')
+
+    size = self.type().sizes()[dim]
+    splits = [split_size] * (size // split_size)
+    leftover = size % split_size
+    if leftover:
+        splits.append(leftover)
+    splits = g.op("Constant", value_t=torch.tensor(splits))
+    return g.op("Split", self, splits, axis_i=dim, outputs=_outputs)
+
+
+def split_with_sizes(g, self, split_sizes, dim, _outputs=None):
+    return split(g, self, split_sizes, dim, _outputs)
+
+
+def unsafe_split(g, self, split_size_or_sizes, dim, _outputs=None):
+    return split(g, self, split_size_or_sizes, dim, _outputs)
+
+
+def unsafe_split_with_sizes(g, self, split_sizes, dim, _outputs=None):
+    return split_with_sizes(g, self, split_sizes, dim, _outputs)
+
+
+@parse_args('v', 'i', 'i')
+def unbind(g, self, dim=0, _outputs=None):
+    if _outputs is None:
+        return g.op("SplitToSequence",
+                    self,
+                    g.op("Constant", value_t=torch.tensor(1, dtype=torch.long)),
+                    axis_i=dim, keepdims_i=0)
+
+    splits = g.op("Constant", value_t=torch.tensor([1] * _outputs))
+    outputs = g.op("Split", self, splits, axis_i=dim, outputs=_outputs)
+    outputs = [outputs] if _outputs == 1 else outputs
+    squeezed_outputs = [g.op("Squeeze", out, g.op("Constant", value_t=torch.tensor([dim]))) for out in outputs]
+    return squeezed_outputs
+
+
+def glu(g, input, dim):
+    first, second = g.op('Split', input, dim, outputs=2)
+    return g.op('Mul', first, g.op('Sigmoid', second))
diff --git a/torch/onnx/symbolic_opset9.py b/torch/onnx/symbolic_opset9.py
index 2b72add..ada7318 100644
--- a/torch/onnx/symbolic_opset9.py
+++ b/torch/onnx/symbolic_opset9.py
@@ -610,6 +610,10 @@
         return g.op("Gather", self, index, axis_i=dim)
 
 
+def square(g, self):
+    return g.op("Mul", self, self)
+
+
 def squeeze(g, self, dim=None):
     if dim is None:
         return g.op("Squeeze", self)
@@ -2327,10 +2331,28 @@
     return g.op("Constant", value_t=torch.BoolTensor([0]))
 
 
+def __isnot_(g, self, other):
+    if sym_help._is_none(other):
+        if sym_help._is_none(self):
+            return g.op("Constant", value_t=torch.BoolTensor([0]))
+        return g.op("Constant", value_t=torch.BoolTensor([1]))
+    return ne(g, self, other)
+
+
+# exists to refine the type of the Value
+# if x is an optional Tensor, unchecked_cast will cast
+# x to Tensor, so the rest of the graph knows that x is a Tensor
+# this doesn't do anything in runtime and is a noop in ONNX
+def prim_unchecked_cast(g, self):
+    return self
+
+
 def prim_dtype(g, self):
     dtype = sym_help._try_get_scalar_type(self)
+    if dtype is None:
+        dtype = "Float"
     dtype = sym_help.scalar_type_to_onnx.index(sym_help.cast_pytorch_to_onnx[dtype])
-    return g.op("Constant", value_t=torch.IntTensor([dtype]))
+    return g.op("Constant", value_t=torch.tensor(dtype))
 
 
 # tolist is currently supported only for 1D input tensors.
@@ -2819,3 +2841,21 @@
         if offset:
             ind = g.op("Add", ind, g.op("Constant", torch.tensor([offset])))
         return g.op("Gather", self_1d, ind)
+
+
+def __derive_index(g, index, start, step):
+    return g.op("Add", start, g.op("Mul", index, step))
+
+
+# Source code for aten op can be found here: pytorch/torch/csrc/jit/runtime/register_prim_ops.cpp
+# if (step > 0 && lo < hi) {
+#   push(stack, 1 + (hi - 1 - lo) / step);
+# } else if (step < 0 && lo > hi) {
+#   push(stack, 1 + (lo - 1 - hi) / (0 - step));
+# } else {
+#  push(stack, 0);
+# }
+def __range_length(g, lo, hi, step):
+    sub = g.op("Sub", hi, lo)
+    div = g.op("Ceil", true_divide(g, sub, step))
+    return g.op("Cast", div, to_i=sym_help.cast_pytorch_to_onnx['Long'])
diff --git a/torch/onnx/symbolic_registry.py b/torch/onnx/symbolic_registry.py
index c059e8f..f748f63 100644
--- a/torch/onnx/symbolic_registry.py
+++ b/torch/onnx/symbolic_registry.py
@@ -12,8 +12,8 @@
 _registry: Dict[Tuple[str, int], Dict] = {}
 
 _symbolic_versions: Dict[Union[int, str], Any] = {}
-from torch.onnx.symbolic_helper import _onnx_stable_opsets
-for opset_version in _onnx_stable_opsets:
+from torch.onnx.symbolic_helper import _onnx_stable_opsets, _onnx_main_opset
+for opset_version in _onnx_stable_opsets + [_onnx_main_opset]:
     module = importlib.import_module('torch.onnx.symbolic_opset{}'.format(opset_version))
     _symbolic_versions[opset_version] = module
 
@@ -91,7 +91,7 @@
 
 def get_op_supported_version(opname, domain, version):
     iter_version = version
-    while iter_version <= _onnx_stable_opsets[-1]:
+    while iter_version <= _onnx_main_opset:
         ops = [op[0] for op in get_ops_in_version(iter_version)]
         if opname in ops:
             return iter_version
diff --git a/torch/onnx/utils.py b/torch/onnx/utils.py
index 7872e78..4e98608 100644
--- a/torch/onnx/utils.py
+++ b/torch/onnx/utils.py
@@ -207,6 +207,8 @@
         torch._C._jit_pass_onnx_scalar_type_analysis(graph)
         torch._C._jit_pass_lint(graph)
 
+        torch._C._jit_pass_onnx_fold_if(graph)
+
         from torch.onnx.symbolic_helper import _export_onnx_opset_version
         torch._C._jit_pass_onnx_peephole(graph, _export_onnx_opset_version, fixed_batch_size)
         torch._C._jit_pass_lint(graph)
@@ -1136,9 +1138,9 @@
         raise RuntimeError("Failed to register operator {}. The domain {} is already a used domain."
                            .format(symbolic_name, ns))
     import torch.onnx.symbolic_registry as sym_registry
-    from torch.onnx.symbolic_helper import _onnx_stable_opsets
+    from torch.onnx.symbolic_helper import _onnx_stable_opsets, _onnx_main_opset
 
-    for version in _onnx_stable_opsets:
+    for version in _onnx_stable_opsets + [_onnx_main_opset]:
         if version >= opset_version:
             sym_registry.register_op(op_name, symbolic_fn, ns, version)