Move ONNX integration tests from onnx-fb-universe to PyTorch repo (#7397)

* Move ONNX integration tests from onnx-fb-universe to PyTorch repo

* Switch to use torchvision

* Delete single rnn operator tests, they have been covered in e2e tests in test_caffe2.py

* Mirror the fix in onnx-fb-universe to bypass cuda check

https://github.com/onnxbot/onnx-fb-universe/commit/667326d84b31183f5cacdcba4b3861150bd79ad6
diff --git a/scripts/onnx/install-develop.sh b/scripts/onnx/install-develop.sh
new file mode 100755
index 0000000..7a61210
--- /dev/null
+++ b/scripts/onnx/install-develop.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+set -ex
+
+# realpath might not be available on MacOS
+script_path=$(python -c "import os; import sys; print(os.path.realpath(sys.argv[1]))" "${BASH_SOURCE[0]}")
+top_dir=$(dirname $(dirname $(dirname "$script_path")))
+tp2_dir="$top_dir/third_party"
+
+pip install ninja
+
+# Install caffe2
+pip install -r "$top_dir/caffe2/requirements.txt"
+python setup_caffe2.py develop
+
+# Install onnx
+pip install -e "$tp2_dir/onnx"
+
+# Install pytorch
+pip install -r "$top_dir/requirements.txt"
+python setup.py build develop
diff --git a/scripts/onnx/install.sh b/scripts/onnx/install.sh
new file mode 100755
index 0000000..a253642
--- /dev/null
+++ b/scripts/onnx/install.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+set -ex
+
+# realpath might not be available on MacOS
+script_path=$(python -c "import os; import sys; print(os.path.realpath(sys.argv[1]))" "${BASH_SOURCE[0]}")
+top_dir=$(dirname $(dirname $(dirname "$script_path")))
+tp2_dir="$top_dir/third_party"
+BUILD_DIR="$top_dir/build"
+mkdir -p "$BUILD_DIR"
+
+_pip_install() {
+    if [[ -n "$CI" ]]; then
+        if [[ -z "${SCCACHE_BUCKET}" ]]; then
+            ccache -z
+        fi
+    fi
+    if [[ -n "$CI" ]]; then
+        time pip install "$@"
+    else
+        pip install "$@"
+    fi
+    if [[ -n "$CI" ]]; then
+        if [[ -n "${SCCACHE_BUCKET}" ]]; then
+            sccache --show-stats
+        else
+            ccache -s
+        fi
+    fi
+}
+
+pip install -r "$top_dir/caffe2/requirements.txt"
+python setup_caffe2.py install
+
+# Install onnx
+_pip_install -b "$BUILD_DIR/onnx" "file://$tp2_dir/onnx#egg=onnx"
+
+# Install pytorch
+pip install -r "$top_dir/requirements.txt"
+_pip_install -b "$BUILD_DIR/pytorch" "file://$top_dir#egg=torch"
diff --git a/scripts/onnx/test.sh b/scripts/onnx/test.sh
new file mode 100755
index 0000000..e58d1fc
--- /dev/null
+++ b/scripts/onnx/test.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+set -ex
+
+UNKNOWN=()
+
+# defaults
+PARALLEL=0
+
+while [[ $# -gt 0 ]]
+do
+    arg="$1"
+    case $arg in
+        -p|--parallel)
+            PARALLEL=1
+            shift # past argument
+            ;;
+        *) # unknown option
+            UNKNOWN+=("$1") # save it in an array for later
+            shift # past argument
+            ;;
+    esac
+done
+set -- "${UNKNOWN[@]}" # leave UNKNOWN
+
+pip install pytest
+if [[ $PARALLEL == 1 ]]; then
+    pip install pytest-xdist
+fi
+
+# realpath might not be available on MacOS
+script_path=$(python -c "import os; import sys; print(os.path.realpath(sys.argv[1]))" "${BASH_SOURCE[0]}")
+top_dir=$(dirname $(dirname $(dirname "$script_path")))
+test_paths=(
+    "$top_dir/test/onnx"
+)
+
+if hash catchsegv 2>/dev/null; then
+    PYTEST="catchsegv pytest"
+else
+    PYTEST="pytest"
+fi
+
+cd "$top_dir"
+if [[ $PARALLEL == 1 ]]; then
+    $PYTEST -n 3 "${test_paths[@]}"
+else
+    $PYTEST "${test_paths[@]}"
+fi
diff --git a/test/onnx/debug_embed_params.py b/test/onnx/debug_embed_params.py
new file mode 100644
index 0000000..e5c3e34
--- /dev/null
+++ b/test/onnx/debug_embed_params.py
@@ -0,0 +1,65 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import sys
+import itertools
+
+import torch
+import torch.jit
+from torch.autograd import Variable
+import torch.autograd.function as function
+
+import onnx
+import caffe2.python.onnx.backend as c2
+from test_pytorch_common import flatten
+
+
+torch.set_default_tensor_type('torch.FloatTensor')
+try:
+    import torch
+except ImportError:
+    print('Cannot import torch, hence caffe2-torch test will not run.')
+    sys.exit(0)
+
+
+def run_embed_params(proto, model, input, state_dict=None, use_gpu=True):
+    """
+    This is only a helper debug function so we can test embed_params=False
+    case as well on pytorch front
+    This should likely be removed from the release version of the code
+    """
+    device = 'CPU'
+    if use_gpu:
+        device = 'CUDA'
+    model_def = onnx.ModelProto.FromString(proto)
+    onnx.checker.check_model(model_def)
+    prepared = c2.prepare(model_def, device=device)
+
+    if state_dict:
+        parameters = []
+        # Passed in state_dict may have a different order.  Make
+        # sure our order is consistent with the model's order.
+        # TODO: Even better: keyword arguments!
+        for k in model.state_dict():
+            if k not in state_dict:
+                # Once PyTorch Module adds unnecessary paramter, the old pre-trained model does not have it.
+                # Just simply pass the new one.
+                # TODO: Please don't export unnecessary parameter.
+                parameters.append(model.state_dict()[k])
+            else:
+                parameters.append(state_dict[k])
+    else:
+        parameters = list(model.state_dict().values())
+
+    W = {}
+    for k, v in zip(model_def.graph.input, flatten((input, parameters))):
+        if isinstance(v, Variable):
+            W[k.name] = v.data.cpu().numpy()
+        else:
+            W[k.name] = v.cpu().numpy()
+
+    caffe2_out = prepared.run(inputs=W)
+
+    return caffe2_out
diff --git a/test/onnx/expect/TestOperators.test_add_broadcast.expect b/test/onnx/expect/TestOperators.test_add_broadcast.expect
new file mode 100644
index 0000000..9e01aeb
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_add_broadcast.expect
@@ -0,0 +1,70 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    input: "1"
+    output: "2"
+    op_type: "Add"
+    attribute {
+      name: "broadcast"
+      i: 1
+      type: INT
+    }
+    attribute {
+      name: "axis"
+      i: 1
+      type: INT
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: DOUBLE
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: DOUBLE
+        shape {
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "2"
+    type {
+      tensor_type {
+        elem_type: DOUBLE
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_add_size1_broadcast.expect b/test/onnx/expect/TestOperators.test_add_size1_broadcast.expect
new file mode 100644
index 0000000..9dd22ca
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_add_size1_broadcast.expect
@@ -0,0 +1,73 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    input: "1"
+    output: "2"
+    op_type: "Add"
+    attribute {
+      name: "broadcast"
+      i: 1
+      type: INT
+    }
+    attribute {
+      name: "axis"
+      i: 0
+      type: INT
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: DOUBLE
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: DOUBLE
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "2"
+    type {
+      tensor_type {
+        elem_type: DOUBLE
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_add_size1_right_broadcast.expect b/test/onnx/expect/TestOperators.test_add_size1_right_broadcast.expect
new file mode 100644
index 0000000..9e01aeb
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_add_size1_right_broadcast.expect
@@ -0,0 +1,70 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    input: "1"
+    output: "2"
+    op_type: "Add"
+    attribute {
+      name: "broadcast"
+      i: 1
+      type: INT
+    }
+    attribute {
+      name: "axis"
+      i: 1
+      type: INT
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: DOUBLE
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: DOUBLE
+        shape {
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "2"
+    type {
+      tensor_type {
+        elem_type: DOUBLE
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_add_size1_singleton_broadcast.expect b/test/onnx/expect/TestOperators.test_add_size1_singleton_broadcast.expect
new file mode 100644
index 0000000..204ba9d
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_add_size1_singleton_broadcast.expect
@@ -0,0 +1,73 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    input: "1"
+    output: "2"
+    op_type: "Add"
+    attribute {
+      name: "broadcast"
+      i: 1
+      type: INT
+    }
+    attribute {
+      name: "axis"
+      i: 0
+      type: INT
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: DOUBLE
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: DOUBLE
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "2"
+    type {
+      tensor_type {
+        elem_type: DOUBLE
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_addconstant.expect b/test/onnx/expect/TestOperators.test_addconstant.expect
new file mode 100644
index 0000000..2d4a555
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_addconstant.expect
@@ -0,0 +1,64 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    output: "1"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        data_type: DOUBLE
+        raw_data: "\000\000\000\000\000\000\360?"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "0"
+    input: "1"
+    output: "2"
+    op_type: "Add"
+    attribute {
+      name: "broadcast"
+      i: 1
+      type: INT
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: DOUBLE
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "2"
+    type {
+      tensor_type {
+        elem_type: DOUBLE
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_addmm.expect b/test/onnx/expect/TestOperators.test_addmm.expect
new file mode 100644
index 0000000..76c5271
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_addmm.expect
@@ -0,0 +1,109 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    input: "1"
+    input: "2"
+    output: "3"
+    op_type: "Gemm"
+    attribute {
+      name: "alpha"
+      f: 1
+      type: FLOAT
+    }
+    attribute {
+      name: "beta"
+      f: 1
+      type: FLOAT
+    }
+    attribute {
+      name: "broadcast"
+      i: 1
+      type: INT
+    }
+  }
+  node {
+    input: "0"
+    input: "1"
+    input: "3"
+    output: "4"
+    op_type: "Gemm"
+    attribute {
+      name: "alpha"
+      f: 1
+      type: FLOAT
+    }
+    attribute {
+      name: "beta"
+      f: 1
+      type: FLOAT
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "2"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "4"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_at_op.expect b/test/onnx/expect/TestOperators.test_at_op.expect
new file mode 100644
index 0000000..f618a92
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_at_op.expect
@@ -0,0 +1,52 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    input: "0"
+    output: "1"
+    op_type: "ATen"
+    attribute {
+      name: "operator"
+      s: "add"
+      type: STRING
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_basic.expect b/test/onnx/expect/TestOperators.test_basic.expect
new file mode 100644
index 0000000..ebcf22b
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_basic.expect
@@ -0,0 +1,75 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    input: "1"
+    output: "2"
+    op_type: "Add"
+  }
+  node {
+    input: "0"
+    input: "2"
+    output: "3"
+    op_type: "Mul"
+  }
+  node {
+    input: "3"
+    output: "4"
+    op_type: "Tanh"
+  }
+  node {
+    input: "4"
+    output: "5"
+    op_type: "Sigmoid"
+  }
+  node {
+    input: "5"
+    output: "6"
+    op_type: "Neg"
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "6"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_batchnorm.expect b/test/onnx/expect/TestOperators.test_batchnorm.expect
new file mode 100644
index 0000000..f01abbb
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_batchnorm.expect
@@ -0,0 +1,168 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    input: "1"
+    input: "2"
+    input: "3"
+    input: "4"
+    output: "6"
+    op_type: "BatchNormalization"
+    attribute {
+      name: "epsilon"
+      f: 9.99999974737875e-06
+      type: FLOAT
+    }
+    attribute {
+      name: "is_test"
+      i: 1
+      type: INT
+    }
+    attribute {
+      name: "momentum"
+      f: 1
+      type: FLOAT
+    }
+  }
+  name: "torch-jit-export"
+  initializer {
+    dims: 2
+    data_type: FLOAT
+    name: "1"
+    raw_data: "\330=\221>|\037(?"
+  }
+  initializer {
+    dims: 2
+    data_type: FLOAT
+    name: "2"
+    raw_data: "\000\000\000\000\000\000\000\000"
+  }
+  initializer {
+    dims: 2
+    data_type: FLOAT
+    name: "3"
+    raw_data: "\000\000\000\000\000\000\000\000"
+  }
+  initializer {
+    dims: 2
+    data_type: FLOAT
+    name: "4"
+    raw_data: "\000\000\200?\000\000\200?"
+  }
+  initializer {
+    data_type: INT64
+    name: "5"
+    raw_data: "\000\000\000\000\000\000\000\000"
+  }
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "2"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "3"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "4"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "5"
+    type {
+      tensor_type {
+        elem_type: INT64
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "6"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_batchnorm_1d.expect b/test/onnx/expect/TestOperators.test_batchnorm_1d.expect
new file mode 100644
index 0000000..1de9b35
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_batchnorm_1d.expect
@@ -0,0 +1,176 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    output: "6"
+    op_type: "Unsqueeze"
+    attribute {
+      name: "axes"
+      ints: 2
+      type: INTS
+    }
+  }
+  node {
+    input: "6"
+    input: "1"
+    input: "2"
+    input: "3"
+    input: "4"
+    output: "7"
+    op_type: "BatchNormalization"
+    attribute {
+      name: "epsilon"
+      f: 9.99999974737875e-06
+      type: FLOAT
+    }
+    attribute {
+      name: "is_test"
+      i: 1
+      type: INT
+    }
+    attribute {
+      name: "momentum"
+      f: 1
+      type: FLOAT
+    }
+  }
+  node {
+    input: "7"
+    output: "8"
+    op_type: "Squeeze"
+    attribute {
+      name: "axes"
+      ints: 2
+      type: INTS
+    }
+  }
+  name: "torch-jit-export"
+  initializer {
+    dims: 2
+    data_type: FLOAT
+    name: "1"
+    raw_data: "\217~,?b\265\251>"
+  }
+  initializer {
+    dims: 2
+    data_type: FLOAT
+    name: "2"
+    raw_data: "\000\000\000\000\000\000\000\000"
+  }
+  initializer {
+    dims: 2
+    data_type: FLOAT
+    name: "3"
+    raw_data: "\000\000\000\000\000\000\000\000"
+  }
+  initializer {
+    dims: 2
+    data_type: FLOAT
+    name: "4"
+    raw_data: "\000\000\200?\000\000\200?"
+  }
+  initializer {
+    data_type: INT64
+    name: "5"
+    raw_data: "\000\000\000\000\000\000\000\000"
+  }
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "2"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "3"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "4"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "5"
+    type {
+      tensor_type {
+        elem_type: INT64
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "8"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_batchnorm_training.expect b/test/onnx/expect/TestOperators.test_batchnorm_training.expect
new file mode 100644
index 0000000..9bdadb5
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_batchnorm_training.expect
@@ -0,0 +1,172 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    input: "1"
+    input: "2"
+    input: "3"
+    input: "4"
+    output: "6"
+    output: "7"
+    output: "8"
+    output: "batch_norm_dead_output-9"
+    output: "batch_norm_dead_output-10"
+    op_type: "BatchNormalization"
+    attribute {
+      name: "epsilon"
+      f: 9.99999974737875e-06
+      type: FLOAT
+    }
+    attribute {
+      name: "is_test"
+      i: 0
+      type: INT
+    }
+    attribute {
+      name: "momentum"
+      f: 0.899999976158142
+      type: FLOAT
+    }
+  }
+  name: "torch-jit-export"
+  initializer {
+    dims: 2
+    data_type: FLOAT
+    name: "1"
+    raw_data: "\330=\221>|\037(?"
+  }
+  initializer {
+    dims: 2
+    data_type: FLOAT
+    name: "2"
+    raw_data: "\000\000\000\000\000\000\000\000"
+  }
+  initializer {
+    dims: 2
+    data_type: FLOAT
+    name: "3"
+    raw_data: "\315\314\314=\315\314\314="
+  }
+  initializer {
+    dims: 2
+    data_type: FLOAT
+    name: "4"
+    raw_data: "fff?fff?"
+  }
+  initializer {
+    data_type: INT64
+    name: "5"
+    raw_data: "\001\000\000\000\000\000\000\000"
+  }
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "2"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "3"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "4"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "5"
+    type {
+      tensor_type {
+        elem_type: INT64
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "6"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_chunk.expect b/test/onnx/expect/TestOperators.test_chunk.expect
new file mode 100644
index 0000000..2806bd8
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_chunk.expect
@@ -0,0 +1,65 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    output: "1"
+    output: "2"
+    op_type: "Split"
+    attribute {
+      name: "axis"
+      i: 0
+      type: INT
+    }
+    attribute {
+      name: "split"
+      ints: 2
+      ints: 1
+      type: INTS
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "2"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_clip.expect b/test/onnx/expect/TestOperators.test_clip.expect
new file mode 100644
index 0000000..97958cc
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_clip.expect
@@ -0,0 +1,56 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    output: "1"
+    op_type: "Clip"
+    attribute {
+      name: "max"
+      f: 0.5
+      type: FLOAT
+    }
+    attribute {
+      name: "min"
+      f: -0.5
+      type: FLOAT
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_concat2.expect b/test/onnx/expect/TestOperators.test_concat2.expect
new file mode 100644
index 0000000..78e9643
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_concat2.expect
@@ -0,0 +1,68 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    input: "1"
+    output: "2"
+    op_type: "Concat"
+    attribute {
+      name: "axis"
+      i: 1
+      type: INT
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "2"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 6
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_conv.expect b/test/onnx/expect/TestOperators.test_conv.expect
new file mode 100644
index 0000000..c7be917
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_conv.expect
@@ -0,0 +1,121 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    input: "1"
+    output: "2"
+    op_type: "Conv"
+    attribute {
+      name: "dilations"
+      ints: 1
+      ints: 1
+      type: INTS
+    }
+    attribute {
+      name: "group"
+      i: 1
+      type: INT
+    }
+    attribute {
+      name: "kernel_shape"
+      ints: 3
+      ints: 3
+      type: INTS
+    }
+    attribute {
+      name: "pads"
+      ints: 0
+      ints: 0
+      ints: 0
+      ints: 0
+      type: INTS
+    }
+    attribute {
+      name: "strides"
+      ints: 1
+      ints: 1
+      type: INTS
+    }
+  }
+  name: "torch-jit-export"
+  initializer {
+    dims: 13
+    dims: 16
+    dims: 3
+    dims: 3
+    data_type: FLOAT
+    name: "1"
+    raw_data: "\022\254`=\314$\241\275\"2\032=\342\322:=\022kx=\300<\313\272@\035\234\275@\362w\275\3564n=\000\351\217:\210\305\025\274\026\332c=\316\222I\275;fO\275\276\337\231\275\211 \252=\246\344[=;~`\275\233c\005\275f@\361\274\272\351\\=\310\003\030\275\300\216\337\274\360o\230\274z\262{=\320\n\216\274\000z\366;P\254\217;^\023]=d\326\234\274\027d\202\275\"\027R=\000\2673\275+\220\230=>\r9=\313.w\275h\020\030\2756\312\031\2755\007\243=v\010\001\275\240\026k\274\242/t=\215\347\222=\336e1=i\316\210=f\325\373\274\006\364\005\275\316\215(=0+\035=\246\016K\275K\'\004\275\362\177\001=2Pi=9\271\221==$\231=R^_=\220_\200;\300\305\035\274\256p@=\034\260\375\274\260X\355;\241^\243=\300\361\305:8\236\226\275;\227\013\275\212{.=D\366\213\275\2500\234\275\336%4=\033.\232=L\345\211\274f\344^=\276\351,=\271\274\245=\256e\033\275\310\3441\275\300H\341\272 \200b\274\200\303F\273\360\333U\275\233\021\226=\026da=(#\036\275E\204\236=\253e\246\275P\347%=.*\023=\357\271\203=\2739\221=\300\020\305:\216\3368=\273h1\275\266\360\247\275@\337C\274\254\240\213<Pc\301\274P\245\361;\\0\210\274\000!\245:\n\307\030=\340Ux\274\330\305\010\275\350\373M\274\216\244B\275\250x>\274\006\306h\275O\026\234\275\220\202\207\275J\224e=\345\344\240=\374\337\274\274z\263Y=h\324R<\\:\245\275\242\000n=r\026\032=\274\316\364<\230\261\025<\343\020&\275P\277\322\273x\376$\274N\227\006=P]\344\273\000e\201\272`\023P<\024\324\216<X\232&=\\\221\351<W\255\234\275L\330\323<\205@\227=\376\347D\275\325\r\235=\246kX=\262Jc=&\"o=\333\025\000\275\3343\351\274\236h9=\004\240\313<\340\312\222<\026I\247\275\020\233\251;\340g\366\274\200=\327\274]\260\213=\232<*=L\\\226\274l\265\332\274\003$\242=\343\367\214\275\217\212\247=\000/\215\272R>\216\275\206\312\326\274`\267\013;\036\355\240\275Pdi\275X\210.\274\353-&\275\002q==\3261\001=FUG=Xa8\274\305\031\200=\326\017&\275\030\007\005=\020\262\351;D\333\230\274\203\305\206\275\256\335N=\206\3408=xM{\275oM\224=\300-k\275\345\264\234=\01686\275x\202\014=\350\246\027\274d\325\212\275\020$V\275V\256\257\274r\263k=\203\243\017\275\241w\212=\264\232\327<6\035\003\275HPl\275Ha\213\275\254\337\251<`b_\273D\263\227< \377\347\274\377G\223\275\342Z.=>Ii=8\257M\275\032\325C=LV\360<|\t\344<\306\325\016\275\0262T\275\200\rB\273\033\324\\\275$<\251<\326G\t=@[\204\272\320i\247<\246~%\2752(d=+\366\244\275\324N\251\275\030\262\222\275\2374\217\275 ^\245\2740\t\244\273\276H]=\326\250\312\274\370\017O\275\266\305B=\3064\212\275\316\350t\275V\025\251\275\000\206)\271\240\220\016;\220\034\207;\372\377[=z\026x= \227\005\274\030\350\"\275\216\223Y\275\0208\347;\000C\255;\223j\232\275\222d0=\240l0\273\240\235}\273\277\234\221\275rb0=\235\255\241=\250\202\031<\252\345\\=\203Y\221=Ve\375\274s:)\275\273\034\240=\020\372\220;\300\245\211:\253\250\020\275\000\340\336\270\240\327e<J\376c=\020A\356<;\013?\2750\027\337;\030\355\035\275@\230\302<lu\350<`NA\275e6\244=\312\024r=\300\347\263\274,M\251\275`\177s\274\200%8\274\035\353\221=6Y\020\275\235\212\243=\2302*\275\202\367\242\275J\250\233\275\034\331\272\274uJ\234=L&\365\274d\323\234\275\243\324F\275@\253,\275*\245\225\275\013?\252\275\3510\232=\342\206P=\200\331g<\033\2714\275\3304U\275\277r\230=*:/=\347\035\242\275 \270y\274\256\000\027=nC\033\275\030G<<\266B\033=\334\364\224<\200R\007;6y\204\275\014\221\326<h>3\275\034C\232\274S\232\020\275\254\255\365<\332\350\247\275\300\371\256:\356\3246=\020\230\377<F\226\335\274Fh\003=\220\334\357\273$p\351<\332\247S= \365\302\274E~\247=?\013\244=\034?\340<\373\360\226=p\250\251<\303IT\275n\221\002\275\232\340b=\034\311\371<\322\200#=\334\230\274\274\216\314\001=\306\346\342\274>\312H\275\264\001\264<\366b\364\274[;[\2754O\241\275\276\204\021=\340\344F;\222]n=\325%\250=`\017\016\275@\372Z<K\025\215=\355\206\230=\200\272\005;\226\236\235\275P\376j\275\212N\014=\300\273\374\274&+\013=2]q=\032\010\232\275/\366\223=\024\262\227\275c`\217\2759\033\224=\340\302j\273v\031b=\240\315m\273\227\330\210=@W\005\275\274\237\223\274\240\255C\273\333\034\033\275\236\324p\275 \212K\273\242;\022=`\003\023=/3\247\275<\257\255\274\200e\242\274\013\025\226\275\263\267!\275\322\204w=\030\320k\275\001\343\215=\010\364z\274\260\033\242\273\000\224\005\272p|\260;@\231\340\272z\212\006=@\023\036=i\n\215=\320\021\373<d\357\246<\306\321\214\275\tP\223=P`l\275K\"v\275\200\003{<\332/\003=\"\335-=L\246\252\275\217\247\207\275\303hz\275\344N\247\275@i\020=s\220\033\275\030\324_\274\320\231\232<\205\307\222=\270\3757\274\224\317\244<\002\326Q=\216&}=\2400I<\346G\336\274}\323\211=\276\232d=\034\200\237\275\n\344X=0\276\214\274\274\320\235\275b)f=\266f\010=\2000\310;\240\301N\273\223\210\250\275\000\224+\273\235W\222=Fg\026\275\344\177\207\275\276$\031=\250\355L\274\332`B=\234\376\366<\253\221\234=\210\362@\274\004\357\246\274\326\202/=\234\016\355\274\235\310\202=\333(\223\275\306V\342\274\256\031N=x\232C\274 )\023\273\000\361\340\271+LU\275\000\2424\2744)\362<\266OZ\275\361`\210=g\214\246=@\325\274;\300\227\005\274\212{a=E9\217=xXA\274\340\321n;;\247\215\275\300nK\274\030\315i\274p\301\370;`\023^\275bUz=\342\240\243\275h\036%<\330\304[\274\327$\211=\220\260\230;\214L\257\274\372\336M=\211\223\234=T4\216<\260=\212\275v\270`=N\2751=\036.n=T0\306<!p\210=\344\251\353<\206M\n\275P\'{\275b\207n=\000h\373;\353\004k\275\336\225p\275\206G\273\274\256\344(=|l\344<\231\363\233=\220\264\373\273\310\337f\275pX\243\273\n\341\013=fq&=\203\217\204\275\230\266\021\274\263\334\t\275$\233\320<\340O\"\275\"\206K=<<\347\274 E\334<8\025\033=\233\002U\275\\\203\254\2749\226\223=\265\341\204={\236\207=\254\211\372<\326\023S\275\220[\324\273\361\300\235=\2405X<\224\270\246\274\347\324\246= \320\032\274X\377\010<\000\260\264\273x\273\"\275\"\206{=\350\264\030\275\341\215\200=E\203\237=\360p\221\273\266\"D\275y\217\212=L\256\340<,\335\215<\356i_\275\007\221\240\275\264\033\351<\330\2636<\3661@\275\230E|\274\340\2007\275\306\242<\275j\203]=\232D\002=xg.\274\234\276\346<\036\'k\275:\204l=\264q\377<\233P\\\275 \013%=\014\245\235\275\252\2048=\367\031\232=\300\216\"<\030\374\016=\226\306Q=N\206A\275\235)\246=f\203X=\016\376\014=xx\000=\000W6\275\\g\352<\226\312\335\274\000G\325\273\373R7\275\253\2677\275\3067u\275\374\333\247<\207z\230=\230\037g<\200\207^\275\226\005C\275\026-\204\275\320R\212\275\330\320:\275[\270\204=\340@\034\274m\204\240=@\3344<\340\325\016\274|\306\224\274 \274\364\274\032U.= \2433\274\006Lg\275\301\272\221=C\220\241\2754\024\234<\303\3655\275\037\215\230=h\257@\274\020\360\366;>^\023\275XS0<\001\231\247=\354X\340<\243\316\215=\360\346\004=H\265#<\260\361\201\275w\240\212= ._\274M\022\206=\0047\226<\303\302-\275\204\367\364<C\371\\\275\307B\220\275\240MV<\300)\275\273\310Sx<\316\256\026\275`(\010\274\340|\013\274\274\344\227\274\334\346\230\275\2009\242<\330\351\"<\006#\363\274\022\027m=\374#\344<\222\265\t=k\217\005\275\232<2=\240\031&\275\232\036\233\275\206\226\220\275u\034\222=\326\224%\275\310\343\250\2754\365\377<\332@4=\020\275\214;8\263r<\227\367\204=\316\013\245\275\326a\000=pT\220;\323\327\225\275\310\022Z<\014\353\365<t\001\205\274,\265\314<\254\315\214\275H9;\275Px\255;\025\r\242=@\373N\274\025\201\211=\036#@=\370\227\r<(\204\240\275WJ\244=t\366\241\274,n\223\275\3466t\2753\235\247\275\000jK\274Q\350\215=\020\230\351<\024\271\262<\240\2749\273\026Q\007\275\200\234\006:0\364\265;\325\230\231=\320\002\266\274>bT=\233\027V\275\006yv=\334m\375<\322\013]=rEx=80b<\200\222F<0\267\357\273Nm|\275\366\302K=\234{\357< z+;\000\370\247<`v ;\034o\226\275\346,\024=\033]\215=\314\233\320\274\002\237F=\200\034T\2750\024\330;\024L\207<@\376\357:\002\261:=\276\246\202\275\016g\016\275\002\312j=\206L\312\274\177\237\226=\234x\300\274\000\010{\274\274$\344\274\006\033;\275\213\352\r\275\300\223\274<\352_{=&\266X= \250\t\2740\332\204<\000\3702:\371\"\214=\026|\240\275\326\313\247\275\035\177\201=lp\262\274\324\222\261<\272\314]=\314\002\224\274\3209\375;\003?A\275\256\247\035\275\215i\235=Z\326,=\022ua=\240R\034=\220\367e\275\004\370\210<`aN\275\026\0137\2754\261\331<l\242\212<\376V9= \216\\\274\000y\365:\032A\002=\240;h;\262t\022=(\373\004\275X\\\000<\230\270W\274@U\367<>\243\020=3\314\227\275\312\002+=y\001\237=\334\377\241<\004/\236<\330\005\177\275\r\007\225=P\332\274\273/\005\251\275B \032=\274\255\312\274+;j\275`\2347\274\252\242==\334)\205<\300\365\320<\266P\024=z\316X=~{Q=\277:\241=\216\333\205\275\000g\2469\351\255\245=h\2529\275\200!\212<\016.V=vD\245\275\001q\224=T\225\332<\\/\255\274\370\325\232\275\240\357C<>\277F=\243!W\275h\262~\275\000\030{;\035\346\226=\\\277\310\274X\325\016=$G\240\275$\247\207\274\270`+\275\3000C<\350\371 =X\316(<0w\225<\270\244\241\275,\330\332\274F\250\025\275\323\013\224=F\236p\275\212\3235=%\371\203=\236\316N\275\200Rd\272\347\007\230\275\030\227a\274\036\220\r\275\370:\007=\006\016\030\275p8\353;{Z\232=\344\031\204\275\372\035\021=\224\036\231<\334\'\247<\252>1=<_\310<\020\035\260\274@\2463\275\346V\263\274\344K\242\275\300\327\000\275\006K0=\327\004\201=P\021\211;\340\236-\274\264\036\246\274m`\220=\213\037\246\275\317c\251\275R\367.=\013\241\'\275\314\r\275<\320\010\317;\034\255\272\274\310\245\002\275\200\327\251<\311\363\245=D\220\301<\332\320!=\233;\234\275\223\363\224\275\201\356\224=\240\245&<\250\324\025\275a\202\246=bt1=^y\017=\340v\345<V\224\244\275\366\035A=\340\247:\275+\037:\275\372\022\"=\000\177)<$\005\315<\010\017><\304,\250\274\366\255\226\275V\345L=\222\261D=\0302w\275s\352\035\275\020\316\364\273\377\342\245\275\010\247V<(\2774<@\271\263:\000\236H\273@2\216<\010\370y\275LO\250\27405\350;\343K8\275\243\360\206=d\266\251<\000\234\3329\026\213\033=`\rp\275\332\202J=\313\320-\275\333\035m\275\275E\246=\373Z\233=\321\300\240=\374]\207\275\272/\245\275\370\304\n=\200\304<\273\362J\016=\324\333\235\275M\252\212=\333l\027\2750\346\300\274J\360v=,/\277<\017\372\220=D\370\211\275\314M\213\275,\213\245<\260\360\217\275\240\215J\274\000\271\2269\232Qh=0?\262\273\340T\016<8\263_\274\212r/=r\263}=L\307\231<`\314\001\275R+\001=w:\236=\353[\225=\210d\034\275k%\021\275\200\233t\272w\342\223=o\200\212=\026\036d=`\001\311<;\343\244=\312\037\242\275\036\"[=\2706*\275\374\302\361\274\350\216\032\275p\263\214\275|9\252<8\177w\274\370)\004\274\270\301c\275\242\331F=\014r\212<`\307e\273\265\232\211=Z\315B=\333vi\275K\365/\275\000\177\327<\n[}=\240\266\002\274x\234\000<\014^\220\275N\301\242\275\335\237\222=\2408t\274\300\3277<\034\236\240<Hf$=\246l\373\274\236\"\210\275{\340\227=m\252\237=\253\024H\275P\366\277\274\222)k=\246\027+\275\000\317\330<\200\270\223;\000\233\225\274P\255\003\275\016\206\003=@\366}\274`1\020<8\336z\275KS\235=\200\276\364;3\010G\2751\r\220=03m\275\026\255$=@\364\314\272\000\233\336\271l{\245<\246bu=S\017\243\275\274\023\363\274\260%\366\274&!\r\275\372\3137=+\226\243=`!R<\230\262K\274\257\223\240\2758\235>\274q\036\201=5\356\236=\226\300\246\275\300*\206<vH<\275\356\367\007=\252\341Y=@A\211;\204K\222\275\314h\225<\326z$\275\202YZ=$^\234\274\226#P=\000\037\270;\362\034\036=\036\233\030\275\300~\247\273`\230\013;W\005\243\275`\375\311<P\013\374<\024\235\310<|\344\347\274\220$\233\275I\270\245=^\353J=+\247J\275\274\264\321<\262aF=\242\317\242\275F,3\275\202\362\006=\235U\200=\253 S\275\200\326\343;\375\214\225=:\3600=\2563]=\266\225a\275\350/d\274\376x1\275\226@\366\274\\\334\250\274\236\220W\275$\r\226\275\004\337\246\275\202\2521=j\272x=\216q:\275[\233*\275\256O\240\275L\363\203<\271\'\225=0\014\216\275\270\240\227\275|\222\311<&\233\273\274I\322\213=4\326\275<\314\353\233\274\352Q\221\2750\267\220\274\217\230\203=\375\026\235=#H\201\275\253\030\233=\320\017\301<V\354\017=\213-P\275@?\303\272@\303\200<|\n\317<\376\315X\275F\226G={dP\275\227E\207=\306\262\021\275+\335\243\275\266+\270\274\305\302\203=\016\300B=H\236\'<\354v\267<\310I\"<\020v\341;\252\001\037=;\356\232\275(|@<N\2640\275\310#c\274\342\265F=\360%\324\273\374g\371\274\0064K\275\210\332\"=1\014\245= \366F\273\260\252\245;\332\014s=\222%X=T#\345<f\266\317\2740\367\252\273\320\314\362\274|\030\302\274\214?\346<D\213\220\275L\202\251\275X=\n\274\344U\206\275\231?\227=p\271\234\274`\005\\\273\265]\244=\314\346\207\275\373\250\001\275\300\216\363<\332r\n=\326\370\237\275J\316==\016 \003=\276qW=\273\352G\275\233\256\233\275\030$b\275\000}!\275\235\311\203=\340\232?\273!\346\230=\026\202@=\2730n\275\311\242\247=\200\025]\274\377\231\225=\373\372\211=n=\247\275\030\017\030=\032W%=\253\325x\2758\331[\274\274\331\261<$\026\257<\214\004\203\2746\010\345\274C\010\210\275.\r0=js?=2\323,=\000\303<\274\000t\317<P\375f\275\326\0307\275\314V\230<JMT=\274\203\227<p\327\365;_p\226\275\n\377E=\314\222\255\274{\300\226=\214\261\221\274:\301t=\260w\231\275\366\253\373\2747\360\221=\245\244\211=\302\004x==Z\226=\230\316\032<\322\366E=l5\253<\360\347\353\273^\2515\275d\324\206<\036\215.=\364\371\371<\261\023\214=\200\000d;v\035K=\206\"\037\275\323\241\227\275\206>\255\274+\373y\275\336\316;=\350\3401<D\364\243<oy\217=H\212%\275\320\304\223\275+yg\275\232\345X=\2053\235=\305Z\227=\251f\206=[\005\234\275\000\343\336\273Hx3<@C\261\273\245\335\233=\240\r\232\275\340T\037<\024m\247\274\267?\200\275N\374\234\275\260T\364\274=\360\211=+\022$\275\304\267\245\275&\313i\275\321G\234=\006yD=(+!\2748\'\n=2x\000=\323\374\244\275\220\020\223<\200jC<\340\2677\275\270\345\021<\002\364>=\014\276\207\275@\320#<\220\270\316<\257\307\223=\361\334\247=\005\003\235=\000>\345:\230\320%\275\212\306J=(\0353<\032\225\005=\213\\\213=\300\033\314<\203nE\275\0323\'=\266Q$=\223\342\247=c\327\204\2754o\254<\360Q\265\273\260\325X\275z\266D=r\007X=\310\3731<\227\352\242=\177\353\204=\n#N=\240\204\361<^\224\010=f\237\376\274\224B\360<u\214\250=h5^<\360\262\275<\346@\353\274I:\211=\272\276;=j\272A=\336\201\021\275\245\250\234=4V\340<p\216H\275@>\243\274,\354\203\275\274\351\234<\224\370\231\274\000]\346\271\223\247\020\275\260\363\243\275\362\036z=)\217\250=\333\326A\275R\022i=>a\033\2750t\341<\227\211\227=\002\266{=\035\263\232=\200\267\366\274\204I\250\275\370\372W\274D\347\242\274\0014\233=\362\204]=.\033&\275]\355\203=\370\035n\275\300\000l\274\020*\315<\374\363\313<\332MG=l\266\327\274!\003\212=y\225\200=\254{\235\274\306\377\033\275\374\300\351<\000\327\362:8\342&\2742\311[=\347\205\226\275\213\307\220=\243\3657\275\363$\201=ey\243=\374\221\257<\330ZJ<\334\275\376<\336\323\234\275\323h\222=\342\360I=\01662=\275\345\203=\"\265\203\275F\3662=P|\205;\351\344\234=>A\031=44\221\2758j\205\275\350\241K\274\026\363\340\274`\3308\274\252s/=~B-=\254\260\252\274\032\247P=\014K\247\275G\032\214=&\342\020\275U\300\231=\265\227\243=nt\037=I\260\246=\300G\321;\214\256\275\274v\327{=\253d\233=\352\360j=\270\035U\274\252\223\177=\300\372\305:@\031\276\272\323\206\000\275\330\036\221\275\230XG<\234Y\251\275\306\262\025\275f\216\200\275&<\202\275vIc\275\310W/\2750\355\332<\250\315\205\275\354-\354\274b\n/=\246\267\350\274\256^\026\2758\226\016=z\0027=\020q\177\275F\205p\275sA\201=@|\364\274\003]$\275\014\006\335< \261\014\274\324A\342<>\347\222\275!\360\236=\240\266}\274\270\360\035\275\200\325m;\240\"\373\274b@.=~\310s\275\203\305d\275H\3649\275\000\'\3369\334\377\206<oo\224\275@T\253;6!a\275\210\214\234\275\003\205\211=\3321Q=\023\326;\275\236C!\275\266,&\275\243\3338\275\023PP\275\250\350N\274c\375\022\275\\l\257\274E\355\241=.\311:=8\242@<\020\030\370;\000\370\020\275 \310\343<\355@\212=\3636h\275\320\357\214\274\223\272|\275\274#\270\274-\254\225=\n\321\r=2\245D=\024\377\201<@\327\277:\000F\023<\353\0339\275\364Y\307<Z\332Z=z\202o=\356\000U\275\273Co\275\306\021\231\275do\201\274\323X\212=^vV\275\321\200\252=\200H\007\272z\0207=\200\344\033\272\"\234_=\266\332B\275<\260\217<hR\231\275\000\370=<\n\216\236\275~Qv\275p\275\215\274\273\373w\275\233\372\"\275q\332\217=X\357\025<\000\260^\275v{\037\275\200\224\226\273\310@\\\275\036-\026=\362\242j=\220-\346\273\034\301\260\274\020\336\332<\340\301;<\300)\364\274\346K1\275\200\374\242<\266BQ=\340pz\273l\003\333\274\234\352\352\274D\214\303<h\272q<\242\265!=\020\240$\275\353?\001\275\3704\024\275\226@\016\275\370\231\n<\272\016X=\300\360\225\274\323jK\275(\037p<V\362\004\275`B\271<\246\372V=\033\252F\275\004\303\217\275$\362\355<\340ux\274\006\177\"=CV\232=b}[=\0309\032=\223L\222=\006\002\341\274<%\200\274\264\004\211<\220k(\275p\t\241\275J\303E=6\"l\275^\361g=N\206i=\202c8=Z\031g=\353\353\223\275\360\003\306<\277\037\227\275\273>l\275H\274 <\036\247]\275\343\"2\275\\^\250<\304\353\330<;\214Z\2753\003h\2758\211\n\275@q\354\273h\243\037= D\033;\226Tq\275qn\233=\376\007\213\275P\263\266<\020\355\206<\275\222\222=RC4=<\217\231<\274J\203\275 0\016<\334\334\210\274\032\0024=\251/\226=\274f\310<\000\353\316\272\016\345\000=\022Jf=2*4=\210\374#\275\206L\001\275\005\007\227=\242\227:=\360\307\357<ZN<=X\230D<\333\351a\275P!\252<\240\246r\275\242\203G=\242\2173=0\240\321\273\243\260F\275\032\t5=\200\n\203<\206\311A=\000\203e\274D\336\241\274\360\310\277\273\200\001?;\262\374\235\275N\300J=\202x.=\335\016\201=\355o\224=\214\031\267<(\363\005=\206\266P=@V\202;\201\352\221=\340H\260\274\260\322\355;[\350\223\275*}R=\266\306\360\274\346!_=:6\222\275\300m\224\275\362c\210\275\031\254\241=(\277E\274\360\013\360;\300\010\321\273#\240\"\275\323\027\030\275\373\016P\275JQ\252\275P6\272<C\356\237\275\364\t\214\2746\017\024=\303\230\213\275G\317\212\275@\220_\274\276\336\036=\014#\214\274\316U!\275\365^\212=*\250\033=\314\304\200\275\031,\205=\375\010\224=p\025\357\273\310f&\275\316,:\275\20062\274Y\367\215=\2315\236=>\223M\275x\027\017\274\301\264\245=\302\322$=<\377\241<\273e\233=\n(.=p\370\352<\303\240+\275 \253\367\274\302\037\n=\220j\320\274mP\204=<\355\305<@\027\241<\302\033Z=\316\303\027=\343\005E\2750`\237\274\223w\234\275\310\3579<\177\013\244\275`\222\315<H\215G\275H\370\003\274\353\022_\275\330\312\222\275\340\336P\275\010\002\027\275\200\310S\274`\201l\273\347\277\213\275de\252\275\272\020\022=\323\265\236=J\302O=\314\002\210\275P\277\332;\305\354\215=\240v\217\275k\303X\275\230p\037\2746\235\355\274\373\017\231=\206Bl\275\240\215\010<\266\300\256\274VN\023=\364\237\270<\246\241\363\274\240\213+;\223\241\216\275{\376\014\275\342`\\=\026]\321\274\250\243G\275\312\347z=\003/\213\275k\303\224=\346\316\001\275$\317\300<X<\222\275<*\246<\200\375\361\273\263\244\202=\206\321S\275Kv\226=\006\260\222\275\002\211\211\275ZD-=\000\237\313:\323\242\r\275\232-v=\202\013\216\275P\312\356;\266\245\235\275\251\272\222=`\000\301<\360\004\333<\361\306\220=\313\366\007\275z\2220=|`\307\274\340\316\235<U\366\232=`\326)<J\336;=\360\316\201\274\2700F\2754S\371<X\251c<\000>\314\274[\274*\275\250\003I<\021\352\240=[.\234=\360\232\227\273\262\241/=\203\315\031\275\346\305~\275H\354\037<PXM\275}\036\215=\270\301\231\275s\261\215\275%\346\236=\276\2764=\210\341V\274;\336\240=\020;;\275 \316\230\274\330bz\275\200\261N\275\253S2\275g\262\237\275hv\225\275\020\343X\275\000v\353\274&r\323\274\364\204\246\275\262\201\243\275\364,\243<h\263\013\275Y\010\201=F\220\035\275\260\035\003\275\341\005\217=\026@>\275\230\3054\275\020i\205\274\332\023\177= \353y\275\222_\251\2753f\237=\205\254\240=d!\375<p\006\236\275\000\307O<\302\316\237\275lh\305\274\220\250\214;\204c\321<\020\362\335<\373M\217=\266F\356\274xR\037=\000\224D\273\010\2632<xY\025=\334s\224<\340(\023<`\227\243<\210\224\215\275\216\320z\275\023B[\275 \275\014\273\340\300O\273\333\354\245=\347<\241\275\322zE=\024\010\245<\032\022@=\374\262\277\274\000,@<\340S\003\275\220%\245<D\325\215\274\220\374\320\274\375\335\203=\302B\241\275\000\310\372\272\274\233\244\275\360\020\272\274 \2706;{QE\275\370\0204<#:\231=\236hI\275\342\')=\300\022\227:\336\023\010=hiT\275\243f \275\260\225\235<\350o\013\275\365j\252=\000\020\274;\300>1\275@\310\371<\365s\233=~ZT\275\000\326\200\272\324b\340<\323V\226=\000\374\2659>\264\006\275^\"q\275\376p-=\300\357\256\274\216\352Q\275H\t8\274\220\362\323;\274s\213<v\207I=\034\311\325\274\001J\220=\304\354\223<\266\266E=\034\240\314\274\360*\340;/\304\245\275\357\371\221=\320\223\212\274\243gV\275\316\371q=Lu\201\274\036\370e= \'\034=\254V\361\274,y\212\27434\025\275tG\301<\240\347B<b\022g=\355\'\211= \001\222\274g\254\226\275@#\337;\324\266\234\274B\374f=\'\274\224=\370{\000\275\036F==\223\010\242=@\232\267:+\364`\275\242\302#=\362xh=\356\342J=\360\257\t\275\034G\362<XDs\274&\2743\275\204J\235<`w9\273\345\210\221=\316h\016\275\346\210\365\274v\345\314\274\030\255\006\274\023~I\275\230\026\001<4\'\270<\200eZ\273\326cs\275\310\\\026<\222\357b= \023\016;hjV\275?\302\202=(\210Y< \244q<\320\224*\275\360\271;\275\330J\013\275X\337\034=A\233\240=\340\036\200\275\300\262?<\372c^=\213\027\204=x[6\275\236\304]\275\306Y\021=\200=\\\275\340cO\275\252aZ=\036Yw\275\022[0=\366\264#=\000\326c\271=\350\206=\200\300\223;\202\271?=8\3606<\314\351\203<,\275\242\274\006\223;\275\254\244\264<w\241\245=\242\010\237\275.r\024\275\342\266\177=\314F\206<\000(\2529\002\350\225\275\330\272H\2750KI\275\000v\241\2747\r\204\275N\330\236\275\362;L=\332\364H=v\252z\275D\333\225\275\250\374\247\275\312MD=z\260\'=u\366\224=\\5\344<\260\242\376;\334]\353\274\\\213\364\274`\366\373<&\021,\275\233\362=\275\262\352a=+mg\275`~\016<\266C =\202:\210\275"
+  }
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 20
+          }
+          dim {
+            dim_value: 16
+          }
+          dim {
+            dim_value: 50
+          }
+          dim {
+            dim_value: 40
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 13
+          }
+          dim {
+            dim_value: 16
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "2"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 20
+          }
+          dim {
+            dim_value: 13
+          }
+          dim {
+            dim_value: 48
+          }
+          dim {
+            dim_value: 38
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_convtranspose.expect b/test/onnx/expect/TestOperators.test_convtranspose.expect
new file mode 100644
index 0000000..bfecf52
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_convtranspose.expect
@@ -0,0 +1,127 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    input: "1"
+    output: "2"
+    op_type: "ConvTranspose"
+    attribute {
+      name: "dilations"
+      ints: 1
+      ints: 1
+      type: INTS
+    }
+    attribute {
+      name: "group"
+      i: 1
+      type: INT
+    }
+    attribute {
+      name: "kernel_shape"
+      ints: 3
+      ints: 3
+      type: INTS
+    }
+    attribute {
+      name: "output_padding"
+      ints: 2
+      ints: 2
+      type: INTS
+    }
+    attribute {
+      name: "pads"
+      ints: 1
+      ints: 1
+      ints: 1
+      ints: 1
+      type: INTS
+    }
+    attribute {
+      name: "strides"
+      ints: 3
+      ints: 3
+      type: INTS
+    }
+  }
+  name: "torch-jit-export"
+  initializer {
+    dims: 3
+    dims: 3
+    dims: 3
+    dims: 3
+    data_type: FLOAT
+    name: "1"
+    raw_data: "\306\306\264=\020\351c\275\020\362\301<)\341\026>\326\253,\276v,\375=\214\350l\275\014] \275\352\215\273=x\267#\276H+\356\274\315R<>j\332\215=l\275\221\275\360\263+\275\327i\033>\206\350\224=w\226\005>2\316\375\275(Cc=$\351\014=\210h\t=\031\3756>\362.\332=\023\327\007>\336\017\255=b\246\245=D\035\t\275\016\356-\276Qx&\276\247z\365\275\211.\377\275\346\212\271=\304\005t=\330\304`\275z\036\026\276\343\001B>\201\026\024>\336q\273=\317\020C>\221\327\321\275\035t\340\275\231\264\346\275\260\247\030\274\331\365-\276\007\002\034>\2409\242;^G\304=\367r\330\275Pk\242\274\000\\\317<X\301W\275i\003:>K\022\264\275\264\301t=\204\266\005\276\020\233\272\274\020!Q\275\306\r\252=7\251\t>\360\266?=\240$\217\274P\332I\275\375\201\032>\260B:\275qD\016>Z]\257\275\361j\">\251GD>\217\026\007>J\016\342=\326\262\222=\352\313\301\275\300\206\026;\322\266\207=\310\371M\275\354O\r=R\326\346=>\2276\276`_3\276r\326\353\275"
+  }
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "2"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 12
+          }
+          dim {
+            dim_value: 15
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_equal.expect b/test/onnx/expect/TestOperators.test_equal.expect
new file mode 100644
index 0000000..3ff6001
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_equal.expect
@@ -0,0 +1,63 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    input: "1"
+    output: "2"
+    op_type: "Equal"
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: INT32
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: INT32
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "2"
+    type {
+      tensor_type {
+        elem_type: INT8
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_exp.expect b/test/onnx/expect/TestOperators.test_exp.expect
new file mode 100644
index 0000000..53ae0ce
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_exp.expect
@@ -0,0 +1,46 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    output: "1"
+    op_type: "Exp"
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_flatten.expect b/test/onnx/expect/TestOperators.test_flatten.expect
new file mode 100644
index 0000000..a55811b
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_flatten.expect
@@ -0,0 +1,220 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    output: "1"
+    op_type: "Shape"
+  }
+  node {
+    input: "1"
+    output: "2"
+    op_type: "Slice"
+    attribute {
+      name: "axes"
+      ints: 0
+      type: INTS
+    }
+    attribute {
+      name: "ends"
+      ints: 1
+      type: INTS
+    }
+    attribute {
+      name: "starts"
+      ints: 0
+      type: INTS
+    }
+  }
+  node {
+    input: "2"
+    output: "3"
+    op_type: "Squeeze"
+    attribute {
+      name: "axes"
+      ints: 0
+      type: INTS
+    }
+  }
+  node {
+    input: "0"
+    output: "4"
+    op_type: "Shape"
+  }
+  node {
+    input: "4"
+    output: "5"
+    op_type: "Slice"
+    attribute {
+      name: "axes"
+      ints: 0
+      type: INTS
+    }
+    attribute {
+      name: "ends"
+      ints: 1
+      type: INTS
+    }
+    attribute {
+      name: "starts"
+      ints: 0
+      type: INTS
+    }
+  }
+  node {
+    input: "5"
+    output: "6"
+    op_type: "Squeeze"
+    attribute {
+      name: "axes"
+      ints: 0
+      type: INTS
+    }
+  }
+  node {
+    input: "6"
+    output: "7"
+    op_type: "Cast"
+    attribute {
+      name: "to"
+      i: 11
+      type: INT
+    }
+  }
+  node {
+    output: "8"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        data_type: DOUBLE
+        raw_data: "\000\000\000\000\000\000\360?"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "8"
+    input: "7"
+    output: "9"
+    op_type: "Div"
+    attribute {
+      name: "broadcast"
+      i: 1
+      type: INT
+    }
+  }
+  node {
+    output: "10"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        data_type: DOUBLE
+        raw_data: "\000\000\000\000\000\0008@"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "9"
+    input: "10"
+    output: "11"
+    op_type: "Mul"
+    attribute {
+      name: "broadcast"
+      i: 1
+      type: INT
+    }
+  }
+  node {
+    input: "11"
+    output: "12"
+    op_type: "Cast"
+    attribute {
+      name: "to"
+      i: 11
+      type: INT
+    }
+  }
+  node {
+    input: "3"
+    output: "13"
+    op_type: "Unsqueeze"
+    attribute {
+      name: "axes"
+      ints: 0
+      type: INTS
+    }
+  }
+  node {
+    input: "12"
+    output: "14"
+    op_type: "Unsqueeze"
+    attribute {
+      name: "axes"
+      ints: 0
+      type: INTS
+    }
+  }
+  node {
+    input: "13"
+    input: "14"
+    output: "15"
+    op_type: "Concat"
+    attribute {
+      name: "axis"
+      i: 0
+      type: INT
+    }
+  }
+  node {
+    input: "0"
+    input: "15"
+    output: "16"
+    op_type: "Reshape"
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "16"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 24
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_index.expect b/test/onnx/expect/TestOperators.test_index.expect
new file mode 100644
index 0000000..73cbac2
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_index.expect
@@ -0,0 +1,68 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    output: "1"
+    op_type: "Slice"
+    attribute {
+      name: "axes"
+      ints: 0
+      type: INTS
+    }
+    attribute {
+      name: "ends"
+      ints: 1
+      type: INTS
+    }
+    attribute {
+      name: "starts"
+      ints: 0
+      type: INTS
+    }
+  }
+  node {
+    input: "1"
+    output: "2"
+    op_type: "Squeeze"
+    attribute {
+      name: "axes"
+      ints: 0
+      type: INTS
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "2"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_logsoftmax.expect b/test/onnx/expect/TestOperators.test_logsoftmax.expect
new file mode 100644
index 0000000..7f4d7d9
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_logsoftmax.expect
@@ -0,0 +1,63 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    output: "1"
+    op_type: "LogSoftmax"
+    attribute {
+      name: "axis"
+      i: 2
+      type: INT
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_max.expect b/test/onnx/expect/TestOperators.test_max.expect
new file mode 100644
index 0000000..e046e02
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_max.expect
@@ -0,0 +1,63 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    input: "1"
+    output: "2"
+    op_type: "Max"
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "2"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_maxpool.expect b/test/onnx/expect/TestOperators.test_maxpool.expect
new file mode 100644
index 0000000..4100a93
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_maxpool.expect
@@ -0,0 +1,68 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    output: "1"
+    op_type: "MaxPool"
+    attribute {
+      name: "kernel_shape"
+      ints: 3
+      type: INTS
+    }
+    attribute {
+      name: "pads"
+      ints: 0
+      ints: 0
+      type: INTS
+    }
+    attribute {
+      name: "strides"
+      ints: 2
+      type: INTS
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 20
+          }
+          dim {
+            dim_value: 16
+          }
+          dim {
+            dim_value: 50
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 20
+          }
+          dim {
+            dim_value: 16
+          }
+          dim {
+            dim_value: 24
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_mean.expect b/test/onnx/expect/TestOperators.test_mean.expect
new file mode 100644
index 0000000..52f34fb
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_mean.expect
@@ -0,0 +1,46 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    output: "1"
+    op_type: "Mean"
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_min.expect b/test/onnx/expect/TestOperators.test_min.expect
new file mode 100644
index 0000000..0e80742
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_min.expect
@@ -0,0 +1,63 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    input: "1"
+    output: "2"
+    op_type: "Min"
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "2"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_mm.expect b/test/onnx/expect/TestOperators.test_mm.expect
new file mode 100644
index 0000000..b5fb325
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_mm.expect
@@ -0,0 +1,92 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    output: "2"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 1
+        data_type: FLOAT
+        raw_data: "\000\000\000\000"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "0"
+    input: "1"
+    input: "2"
+    output: "3"
+    op_type: "Gemm"
+    attribute {
+      name: "alpha"
+      f: 1
+      type: FLOAT
+    }
+    attribute {
+      name: "beta"
+      f: 0
+      type: FLOAT
+    }
+    attribute {
+      name: "broadcast"
+      i: 1
+      type: INT
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "3"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_non_float_params.expect b/test/onnx/expect/TestOperators.test_non_float_params.expect
new file mode 100644
index 0000000..cc3db60
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_non_float_params.expect
@@ -0,0 +1,76 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    input: "1"
+    output: "2"
+    op_type: "Add"
+  }
+  node {
+    input: "0"
+    input: "2"
+    output: "3"
+    op_type: "Mul"
+  }
+  name: "torch-jit-export"
+  initializer {
+    dims: 2
+    dims: 2
+    data_type: INT64
+    name: "1"
+    raw_data: "\001\000\000\000\000\000\000\000\002\000\000\000\000\000\000\000\003\000\000\000\000\000\000\000\004\000\000\000\000\000\000\000"
+  }
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: INT64
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: INT64
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "3"
+    type {
+      tensor_type {
+        elem_type: INT64
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_pad.expect b/test/onnx/expect/TestOperators.test_pad.expect
new file mode 100644
index 0000000..40abf3c
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_pad.expect
@@ -0,0 +1,75 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    output: "1"
+    op_type: "Pad"
+    attribute {
+      name: "mode"
+      s: "reflect"
+      type: STRING
+    }
+    attribute {
+      name: "pads"
+      ints: 0
+      ints: 0
+      ints: 0
+      ints: 2
+      ints: 0
+      ints: 0
+      ints: 1
+      ints: 3
+      type: INTS
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 9
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_params.expect b/test/onnx/expect/TestOperators.test_params.expect
new file mode 100644
index 0000000..6998f1e
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_params.expect
@@ -0,0 +1,91 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    input: "1"
+    output: "2"
+    op_type: "Add"
+  }
+  node {
+    input: "0"
+    input: "2"
+    output: "3"
+    op_type: "Mul"
+  }
+  node {
+    input: "3"
+    output: "4"
+    op_type: "Tanh"
+  }
+  node {
+    input: "4"
+    output: "5"
+    op_type: "Sigmoid"
+  }
+  node {
+    input: "5"
+    output: "6"
+    op_type: "Neg"
+  }
+  name: "torch-jit-export"
+  initializer {
+    dims: 2
+    dims: 2
+    data_type: FLOAT
+    name: "1"
+    raw_data: "\000\000\200?\000\000\000@\000\000@@\000\000\200@"
+  }
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "6"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_permute2.expect b/test/onnx/expect/TestOperators.test_permute2.expect
new file mode 100644
index 0000000..cfdba0b
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_permute2.expect
@@ -0,0 +1,80 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    output: "1"
+    op_type: "Transpose"
+    attribute {
+      name: "perm"
+      ints: 0
+      ints: 1
+      ints: 4
+      ints: 2
+      ints: 5
+      ints: 3
+      type: INTS
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_pow.expect b/test/onnx/expect/TestOperators.test_pow.expect
new file mode 100644
index 0000000..8be8e75
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_pow.expect
@@ -0,0 +1,81 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    input: "1"
+    output: "2"
+    op_type: "Pow"
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "2"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_prod.expect b/test/onnx/expect/TestOperators.test_prod.expect
new file mode 100644
index 0000000..de286f1
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_prod.expect
@@ -0,0 +1,51 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    output: "1"
+    op_type: "ReduceProd"
+    attribute {
+      name: "keepdims"
+      i: 0
+      type: INT
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_reduced_mean.expect b/test/onnx/expect/TestOperators.test_reduced_mean.expect
new file mode 100644
index 0000000..b8b8a7d
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_reduced_mean.expect
@@ -0,0 +1,65 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    output: "1"
+    op_type: "ReduceMean"
+    attribute {
+      name: "axes"
+      ints: 2
+      type: INTS
+    }
+    attribute {
+      name: "keepdims"
+      i: 0
+      type: INT
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_reduced_mean_keepdim.expect b/test/onnx/expect/TestOperators.test_reduced_mean_keepdim.expect
new file mode 100644
index 0000000..f40b1b5
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_reduced_mean_keepdim.expect
@@ -0,0 +1,68 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    output: "1"
+    op_type: "ReduceMean"
+    attribute {
+      name: "axes"
+      ints: 2
+      type: INTS
+    }
+    attribute {
+      name: "keepdims"
+      i: 1
+      type: INT
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_reduced_prod.expect b/test/onnx/expect/TestOperators.test_reduced_prod.expect
new file mode 100644
index 0000000..de047b6
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_reduced_prod.expect
@@ -0,0 +1,65 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    output: "1"
+    op_type: "ReduceProd"
+    attribute {
+      name: "axes"
+      ints: 2
+      type: INTS
+    }
+    attribute {
+      name: "keepdims"
+      i: 0
+      type: INT
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_reduced_prod_keepdim.expect b/test/onnx/expect/TestOperators.test_reduced_prod_keepdim.expect
new file mode 100644
index 0000000..c026d21
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_reduced_prod_keepdim.expect
@@ -0,0 +1,68 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    output: "1"
+    op_type: "ReduceProd"
+    attribute {
+      name: "axes"
+      ints: 2
+      type: INTS
+    }
+    attribute {
+      name: "keepdims"
+      i: 1
+      type: INT
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_reduced_sum.expect b/test/onnx/expect/TestOperators.test_reduced_sum.expect
new file mode 100644
index 0000000..7f12a0d
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_reduced_sum.expect
@@ -0,0 +1,65 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    output: "1"
+    op_type: "ReduceSum"
+    attribute {
+      name: "axes"
+      ints: 2
+      type: INTS
+    }
+    attribute {
+      name: "keepdims"
+      i: 0
+      type: INT
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_reduced_sum_keepdim.expect b/test/onnx/expect/TestOperators.test_reduced_sum_keepdim.expect
new file mode 100644
index 0000000..01dff8b
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_reduced_sum_keepdim.expect
@@ -0,0 +1,68 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    output: "1"
+    op_type: "ReduceSum"
+    attribute {
+      name: "axes"
+      ints: 2
+      type: INTS
+    }
+    attribute {
+      name: "keepdims"
+      i: 1
+      type: INT
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_repeat.expect b/test/onnx/expect/TestOperators.test_repeat.expect
new file mode 100644
index 0000000..7b6d687
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_repeat.expect
@@ -0,0 +1,72 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    output: "1"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 4
+        data_type: INT64
+        raw_data: "\001\000\000\000\000\000\000\000\002\000\000\000\000\000\000\000\003\000\000\000\000\000\000\000\004\000\000\000\000\000\000\000"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "0"
+    input: "1"
+    output: "2"
+    op_type: "Tile"
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "2"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 9
+          }
+          dim {
+            dim_value: 16
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_repeat_dim_overflow.expect b/test/onnx/expect/TestOperators.test_repeat_dim_overflow.expect
new file mode 100644
index 0000000..6070932
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_repeat_dim_overflow.expect
@@ -0,0 +1,85 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    output: "1"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 4
+        data_type: INT64
+        raw_data: "\001\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000\002\000\000\000\000\000\000\000"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "0"
+    input: "1"
+    output: "2"
+    op_type: "Reshape"
+  }
+  node {
+    output: "3"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 4
+        data_type: INT64
+        raw_data: "\001\000\000\000\000\000\000\000\002\000\000\000\000\000\000\000\003\000\000\000\000\000\000\000\004\000\000\000\000\000\000\000"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "2"
+    input: "3"
+    output: "4"
+    op_type: "Tile"
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "4"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 8
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_selu.expect b/test/onnx/expect/TestOperators.test_selu.expect
new file mode 100644
index 0000000..44ee8c9
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_selu.expect
@@ -0,0 +1,58 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    output: "1"
+    op_type: "Selu"
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_sqrt.expect b/test/onnx/expect/TestOperators.test_sqrt.expect
new file mode 100644
index 0000000..1a78045
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_sqrt.expect
@@ -0,0 +1,46 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    output: "1"
+    op_type: "Sqrt"
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_sum.expect b/test/onnx/expect/TestOperators.test_sum.expect
new file mode 100644
index 0000000..cc0e807
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_sum.expect
@@ -0,0 +1,46 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    output: "1"
+    op_type: "Sum"
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_symbolic_override.expect b/test/onnx/expect/TestOperators.test_symbolic_override.expect
new file mode 100644
index 0000000..16cdb9a
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_symbolic_override.expect
@@ -0,0 +1,103 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    input: "1"
+    input: "2"
+    output: "3"
+    op_type: "InstanceNormalization"
+    attribute {
+      name: "epsilon"
+      f: 9.99999971718069e-10
+      type: FLOAT
+    }
+  }
+  name: "torch-jit-export"
+  initializer {
+    dims: 10
+    data_type: FLOAT
+    name: "1"
+    raw_data: "\340e\355<\246\305\315>L\n\205>j\270\273>\300\314n=t]3?\200)T=\362\257\357>\217~,?b\265\251>"
+  }
+  initializer {
+    dims: 10
+    data_type: FLOAT
+    name: "2"
+    raw_data: "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
+  }
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 10
+          }
+          dim {
+            dim_value: 32
+          }
+          dim {
+            dim_value: 32
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 10
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "2"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 10
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "3"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 10
+          }
+          dim {
+            dim_value: 32
+          }
+          dim {
+            dim_value: 32
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_symbolic_override_nested.expect b/test/onnx/expect/TestOperators.test_symbolic_override_nested.expect
new file mode 100644
index 0000000..6782bebb
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_symbolic_override_nested.expect
@@ -0,0 +1,104 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    input: "1"
+    input: "2"
+    output: "3"
+    op_type: "Sum"
+  }
+  node {
+    input: "0"
+    output: "4"
+    op_type: "Neg"
+  }
+  node {
+    input: "1"
+    output: "5"
+    op_type: "Neg"
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "2"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "3"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "4"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "5"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_transpose.expect b/test/onnx/expect/TestOperators.test_transpose.expect
new file mode 100644
index 0000000..4298fd2
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_transpose.expect
@@ -0,0 +1,41 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_type_as.expect b/test/onnx/expect/TestOperators.test_type_as.expect
new file mode 100644
index 0000000..697f6b7
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_type_as.expect
@@ -0,0 +1,35 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestOperators.test_view.expect b/test/onnx/expect/TestOperators.test_view.expect
new file mode 100644
index 0000000..aa76b6f
--- /dev/null
+++ b/test/onnx/expect/TestOperators.test_view.expect
@@ -0,0 +1,48 @@
+ir_version: 3
+producer_name: "pytorch"
+producer_version: "0.3"
+graph {
+  node {
+    input: "0"
+    output: "1"
+    op_type: "Flatten"
+    attribute {
+      name: "axis"
+      i: 1
+      type: INT
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "0"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "1"
+    type {
+      tensor_type {
+        elem_type: FLOAT
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/test/onnx/expect/TestVerify.test_dynamic_model_structure.expect b/test/onnx/expect/TestVerify.test_dynamic_model_structure.expect
new file mode 100644
index 0000000..e39be34
--- /dev/null
+++ b/test/onnx/expect/TestVerify.test_dynamic_model_structure.expect
@@ -0,0 +1 @@
+When I exported your model with different inputs, the result
\ No newline at end of file
diff --git a/test/onnx/expect/TestVerify.test_embedded_constant_difference.expect b/test/onnx/expect/TestVerify.test_embedded_constant_difference.expect
new file mode 100644
index 0000000..5283f21
--- /dev/null
+++ b/test/onnx/expect/TestVerify.test_embedded_constant_difference.expect
@@ -0,0 +1,42 @@
+When I exported your model with different inputs, the result was different.
+(To get more information, run torch.onnx.verify(..., verbose=True))
+----------------------------------------------------------------------
+ERROR: Strings are not equal:
+
+  graph torch-jit-export (%name: "0"
+  type {
+    tensor_type {
+      elem_type: FLOAT
+      shape {
+        dim {
+          dim_value: 2
+        }
+        dim {
+          dim_value: 2
+        }
+      }
+    }
+  }
+  ) {
+-   %1 = Slice[axes = [0], ends = [1], starts = [0]](%0)
+?                                  ^             ^
++   %1 = Slice[axes = [0], ends = [2], starts = [1]](%0)
+?                                  ^             ^
+    %2 = Squeeze[axes = [0]](%1)
+    return %name: "2"
+  type {
+    tensor_type {
+      elem_type: FLOAT
+      shape {
+        dim {
+          dim_value: 2
+        }
+      }
+    }
+  }
+  
+  }
+
+  * A difference in model structure usually means that
+    your model has dynamic control flow.  These models are not
+    currently supported by the exporter.
\ No newline at end of file
diff --git a/test/onnx/expect/TestVerify.test_explicit_test_args.expect b/test/onnx/expect/TestVerify.test_explicit_test_args.expect
new file mode 100644
index 0000000..e39be34
--- /dev/null
+++ b/test/onnx/expect/TestVerify.test_explicit_test_args.expect
@@ -0,0 +1 @@
+When I exported your model with different inputs, the result
\ No newline at end of file
diff --git a/test/onnx/expect/TestVerify.test_jumbled_params.expect b/test/onnx/expect/TestVerify.test_jumbled_params.expect
new file mode 100644
index 0000000..2996bea
--- /dev/null
+++ b/test/onnx/expect/TestVerify.test_jumbled_params.expect
@@ -0,0 +1,8 @@
+When I exported your model with different inputs, the result was different.
+(To get more information, run torch.onnx.verify(..., verbose=True))
+----------------------------------------------------------------------
+ERROR: Parameters list differs: [] != [u'1']
+
+  * This is really strange! The second time I exported your model,
+    it had a different set of parameters.  Are you assigning Parameters
+    in the forward() of your model definition?
\ No newline at end of file
diff --git a/test/onnx/expect/TestVerify.test_modifying_params.expect b/test/onnx/expect/TestVerify.test_modifying_params.expect
new file mode 100644
index 0000000..7ddc4b4
--- /dev/null
+++ b/test/onnx/expect/TestVerify.test_modifying_params.expect
@@ -0,0 +1,5 @@
+
+Arrays are not equal
+
+(mismatch 100.0%)
+ x: array([3.], dty
\ No newline at end of file
diff --git a/test/onnx/expect/TestVerify.test_result_different.expect b/test/onnx/expect/TestVerify.test_result_different.expect
new file mode 100644
index 0000000..1320367
--- /dev/null
+++ b/test/onnx/expect/TestVerify.test_result_different.expect
@@ -0,0 +1,4 @@
+
+Not equal to tolerance rtol=0.001, atol=1e-07
+
+(mismatch 10
\ No newline at end of file
diff --git a/test/onnx/model_defs/__init__.py b/test/onnx/model_defs/__init__.py
new file mode 100644
index 0000000..8f07b0a
--- /dev/null
+++ b/test/onnx/model_defs/__init__.py
@@ -0,0 +1,4 @@
+from .squeezenet import *
+from .super_resolution import *
+from .op_test import *
+from .srresnet import *
diff --git a/test/onnx/model_defs/dcgan.py b/test/onnx/model_defs/dcgan.py
new file mode 100644
index 0000000..b65cd10
--- /dev/null
+++ b/test/onnx/model_defs/dcgan.py
@@ -0,0 +1,90 @@
+import torch
+import torch.nn as nn
+
+
+# configurable
+bsz = 64
+imgsz = 64
+nz = 100
+ngf = 64
+ndf = 64
+nc = 3
+
+
+# custom weights initialization called on netG and netD
+def weights_init(m):
+    classname = m.__class__.__name__
+    if classname.find('Conv') != -1:
+        m.weight.data.normal_(0.0, 0.02)
+    elif classname.find('BatchNorm') != -1:
+        m.weight.data.normal_(1.0, 0.02)
+        m.bias.data.fill_(0)
+
+
+class _netG(nn.Module):
+    def __init__(self, ngpu):
+        super(_netG, self).__init__()
+        self.ngpu = ngpu
+        self.main = nn.Sequential(
+            # input is Z, going into a convolution
+            nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
+            nn.BatchNorm2d(ngf * 8),
+            nn.ReLU(True),
+            # state size. (ngf*8) x 4 x 4
+            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
+            nn.BatchNorm2d(ngf * 4),
+            nn.ReLU(True),
+            # state size. (ngf*4) x 8 x 8
+            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
+            nn.BatchNorm2d(ngf * 2),
+            nn.ReLU(True),
+            # state size. (ngf*2) x 16 x 16
+            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
+            nn.BatchNorm2d(ngf),
+            nn.ReLU(True),
+            # state size. (ngf) x 32 x 32
+            nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
+            nn.Tanh()
+            # state size. (nc) x 64 x 64
+        )
+
+    def forward(self, input):
+        if self.ngpu > 1 and isinstance(input.data, torch.cuda.FloatTensor):
+            output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
+        else:
+            output = self.main(input)
+        return output
+
+
+class _netD(nn.Module):
+    def __init__(self, ngpu):
+        super(_netD, self).__init__()
+        self.ngpu = ngpu
+        self.main = nn.Sequential(
+            # input is (nc) x 64 x 64
+            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
+            nn.LeakyReLU(0.2, inplace=True),
+            # state size. (ndf) x 32 x 32
+            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
+            nn.BatchNorm2d(ndf * 2),
+            nn.LeakyReLU(0.2, inplace=True),
+            # state size. (ndf*2) x 16 x 16
+            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
+            nn.BatchNorm2d(ndf * 4),
+            nn.LeakyReLU(0.2, inplace=True),
+            # state size. (ndf*4) x 8 x 8
+            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
+            nn.BatchNorm2d(ndf * 8),
+            nn.LeakyReLU(0.2, inplace=True),
+            # state size. (ndf*8) x 4 x 4
+            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
+            nn.Sigmoid()
+        )
+
+    def forward(self, input):
+        if self.ngpu > 1 and isinstance(input.data, torch.cuda.FloatTensor):
+            output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
+        else:
+            output = self.main(input)
+
+        return output.view(-1, 1)
diff --git a/test/onnx/model_defs/lstm_flattening_result.py b/test/onnx/model_defs/lstm_flattening_result.py
new file mode 100644
index 0000000..c55d25e
--- /dev/null
+++ b/test/onnx/model_defs/lstm_flattening_result.py
@@ -0,0 +1,7 @@
+from torch import nn
+
+
+class LstmFlatteningResult(nn.LSTM):
+    def forward(self, input, *fargs, **fkwargs):
+        output, (hidden, cell) = nn.LSTM.forward(self, input, *fargs, **fkwargs)
+        return output, hidden, cell
diff --git a/test/onnx/model_defs/mnist.py b/test/onnx/model_defs/mnist.py
new file mode 100644
index 0000000..dd54560
--- /dev/null
+++ b/test/onnx/model_defs/mnist.py
@@ -0,0 +1,22 @@
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class MNIST(nn.Module):
+
+    def __init__(self):
+        super(MNIST, self).__init__()
+        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
+        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
+        self.conv2_drop = nn.Dropout2d()
+        self.fc1 = nn.Linear(320, 50)
+        self.fc2 = nn.Linear(50, 10)
+
+    def forward(self, x):
+        x = F.relu(F.max_pool2d(self.conv1(x), 2))
+        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
+        x = x.view(-1, 320)
+        x = F.relu(self.fc1(x))
+        x = F.dropout(x, training=self.training)
+        x = self.fc2(x)
+        return F.log_softmax(x)
diff --git a/test/onnx/model_defs/op_test.py b/test/onnx/model_defs/op_test.py
new file mode 100644
index 0000000..47c3bc2
--- /dev/null
+++ b/test/onnx/model_defs/op_test.py
@@ -0,0 +1,48 @@
+import torch
+import torch.nn as nn
+
+
+class DummyNet(nn.Module):
+
+    def __init__(self, num_classes=1000):
+        super(DummyNet, self).__init__()
+        self.features = nn.Sequential(
+            nn.LeakyReLU(0.02),
+            nn.BatchNorm2d(3),
+            nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False)
+        )
+
+    def forward(self, x):
+        output = self.features(x)
+        return output.view(-1, 1).squeeze(1)
+
+
+class ConcatNet(nn.Module):
+
+    def __init__(self):
+        super(ConcatNet, self).__init__()
+
+    def forward(self, inputs):
+        return torch.cat(inputs, 1)
+
+
+class PermuteNet(nn.Module):
+
+    def __init__(self):
+        super(PermuteNet, self).__init__()
+
+    def forward(self, input):
+        return input.permute(2, 3, 0, 1)
+
+
+class PReluNet(nn.Module):
+
+    def __init__(self):
+        super(PReluNet, self).__init__()
+        self.features = nn.Sequential(
+            nn.PReLU(3),
+        )
+
+    def forward(self, x):
+        output = self.features(x)
+        return output
diff --git a/test/onnx/model_defs/rnn_model_with_packed_sequence.py b/test/onnx/model_defs/rnn_model_with_packed_sequence.py
new file mode 100644
index 0000000..ee57984
--- /dev/null
+++ b/test/onnx/model_defs/rnn_model_with_packed_sequence.py
@@ -0,0 +1,17 @@
+from torch import nn
+from torch.nn.utils import rnn as rnn_utils
+
+
+class RnnModelWithPackedSequence(nn.Module):
+    def __init__(self, model, batch_first):
+        super(RnnModelWithPackedSequence, self).__init__()
+        self.model = model
+        self.batch_first = batch_first
+
+    def forward(self, input, *args):
+        args, seq_lengths = args[:-1], args[-1]
+        input = rnn_utils.pack_padded_sequence(input, seq_lengths, self.batch_first)
+        rets = self.model(input, *args)
+        ret, rets = rets[0], rets[1:]
+        ret, _ = rnn_utils.pad_packed_sequence(ret, self.batch_first)
+        return tuple([ret] + list(rets))
diff --git a/test/onnx/model_defs/squeezenet.py b/test/onnx/model_defs/squeezenet.py
new file mode 100644
index 0000000..3db99b3
--- /dev/null
+++ b/test/onnx/model_defs/squeezenet.py
@@ -0,0 +1,91 @@
+import math
+import torch
+import torch.nn as nn
+import torch.nn.init as init
+
+
+class Fire(nn.Module):
+
+    def __init__(self, inplanes, squeeze_planes,
+                 expand1x1_planes, expand3x3_planes):
+        super(Fire, self).__init__()
+        self.inplanes = inplanes
+        self.squeeze = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1)
+        self.squeeze_activation = nn.ReLU(inplace=True)
+        self.expand1x1 = nn.Conv2d(squeeze_planes, expand1x1_planes,
+                                   kernel_size=1)
+        self.expand1x1_activation = nn.ReLU(inplace=True)
+        self.expand3x3 = nn.Conv2d(squeeze_planes, expand3x3_planes,
+                                   kernel_size=3, padding=1)
+        self.expand3x3_activation = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.squeeze_activation(self.squeeze(x))
+        return torch.cat([
+            self.expand1x1_activation(self.expand1x1(x)),
+            self.expand3x3_activation(self.expand3x3(x))
+        ], 1)
+
+
+class SqueezeNet(nn.Module):
+
+    def __init__(self, version=1.0, num_classes=1000, ceil_mode=False):
+        super(SqueezeNet, self).__init__()
+        if version not in [1.0, 1.1]:
+            raise ValueError("Unsupported SqueezeNet version {version}:"
+                             "1.0 or 1.1 expected".format(version=version))
+        self.num_classes = num_classes
+        if version == 1.0:
+            self.features = nn.Sequential(
+                nn.Conv2d(3, 96, kernel_size=7, stride=2),
+                nn.ReLU(inplace=True),
+                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=ceil_mode),
+                Fire(96, 16, 64, 64),
+                Fire(128, 16, 64, 64),
+                Fire(128, 32, 128, 128),
+                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=ceil_mode),
+                Fire(256, 32, 128, 128),
+                Fire(256, 48, 192, 192),
+                Fire(384, 48, 192, 192),
+                Fire(384, 64, 256, 256),
+                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=ceil_mode),
+                Fire(512, 64, 256, 256),
+            )
+        else:
+            self.features = nn.Sequential(
+                nn.Conv2d(3, 64, kernel_size=3, stride=2),
+                nn.ReLU(inplace=True),
+                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=ceil_mode),
+                Fire(64, 16, 64, 64),
+                Fire(128, 16, 64, 64),
+                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=ceil_mode),
+                Fire(128, 32, 128, 128),
+                Fire(256, 32, 128, 128),
+                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=ceil_mode),
+                Fire(256, 48, 192, 192),
+                Fire(384, 48, 192, 192),
+                Fire(384, 64, 256, 256),
+                Fire(512, 64, 256, 256),
+            )
+        # Final convolution is initialized differently form the rest
+        final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1)
+        self.classifier = nn.Sequential(
+            nn.Dropout(p=0.5),
+            final_conv,
+            nn.ReLU(inplace=True),
+            nn.AvgPool2d(13)
+        )
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                if m is final_conv:
+                    init.normal(m.weight.data, mean=0.0, std=0.01)
+                else:
+                    init.kaiming_uniform(m.weight.data)
+                if m.bias is not None:
+                    m.bias.data.zero_()
+
+    def forward(self, x):
+        x = self.features(x)
+        x = self.classifier(x)
+        return x.view(x.size(0), self.num_classes)
diff --git a/test/onnx/model_defs/srresnet.py b/test/onnx/model_defs/srresnet.py
new file mode 100644
index 0000000..0328d39
--- /dev/null
+++ b/test/onnx/model_defs/srresnet.py
@@ -0,0 +1,81 @@
+import math
+
+from torch import nn
+from torch.nn import init
+
+
+def _initialize_orthogonal(conv):
+    prelu_gain = math.sqrt(2)
+    init.orthogonal(conv.weight, gain=prelu_gain)
+    if conv.bias is not None:
+        conv.bias.data.zero_()
+
+
+class ResidualBlock(nn.Module):
+    def __init__(self, n_filters):
+        super(ResidualBlock, self).__init__()
+        self.conv1 = nn.Conv2d(n_filters, n_filters, kernel_size=3, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(n_filters)
+        self.prelu = nn.PReLU(n_filters)
+        self.conv2 = nn.Conv2d(n_filters, n_filters, kernel_size=3, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(n_filters)
+
+        # Orthogonal initialisation
+        _initialize_orthogonal(self.conv1)
+        _initialize_orthogonal(self.conv2)
+
+    def forward(self, x):
+        residual = self.prelu(self.bn1(self.conv1(x)))
+        residual = self.bn2(self.conv2(residual))
+        return x + residual
+
+
+class UpscaleBlock(nn.Module):
+    def __init__(self, n_filters):
+        super(UpscaleBlock, self).__init__()
+        self.upscaling_conv = nn.Conv2d(n_filters, 4 * n_filters, kernel_size=3, padding=1)
+        self.upscaling_shuffler = nn.PixelShuffle(2)
+        self.upscaling = nn.PReLU(n_filters)
+        _initialize_orthogonal(self.upscaling_conv)
+
+    def forward(self, x):
+        return self.upscaling(self.upscaling_shuffler(self.upscaling_conv(x)))
+
+
+class SRResNet(nn.Module):
+    def __init__(self, rescale_factor, n_filters, n_blocks):
+        super(SRResNet, self).__init__()
+        self.rescale_levels = int(math.log(rescale_factor, 2))
+        self.n_filters = n_filters
+        self.n_blocks = n_blocks
+
+        self.conv1 = nn.Conv2d(3, n_filters, kernel_size=9, padding=4)
+        self.prelu1 = nn.PReLU(n_filters)
+
+        for residual_block_num in range(1, n_blocks + 1):
+            residual_block = ResidualBlock(self.n_filters)
+            self.add_module('residual_block' + str(residual_block_num), nn.Sequential(residual_block))
+
+        self.skip_conv = nn.Conv2d(n_filters, n_filters, kernel_size=3, padding=1, bias=False)
+        self.skip_bn = nn.BatchNorm2d(n_filters)
+
+        for upscale_block_num in range(1, self.rescale_levels + 1):
+            upscale_block = UpscaleBlock(self.n_filters)
+            self.add_module('upscale_block' + str(upscale_block_num), nn.Sequential(upscale_block))
+
+        self.output_conv = nn.Conv2d(n_filters, 3, kernel_size=9, padding=4)
+
+        # Orthogonal initialisation
+        _initialize_orthogonal(self.conv1)
+        _initialize_orthogonal(self.skip_conv)
+        _initialize_orthogonal(self.output_conv)
+
+    def forward(self, x):
+        x_init = self.prelu1(self.conv1(x))
+        x = self.residual_block1(x_init)
+        for residual_block_num in range(2, self.n_blocks + 1):
+            x = getattr(self, 'residual_block' + str(residual_block_num))(x)
+        x = self.skip_bn(self.skip_conv(x)) + x_init
+        for upscale_block_num in range(1, self.rescale_levels + 1):
+            x = getattr(self, 'upscale_block' + str(upscale_block_num))(x)
+        return self.output_conv(x)
diff --git a/test/onnx/model_defs/super_resolution.py b/test/onnx/model_defs/super_resolution.py
new file mode 100644
index 0000000..d0ba46a
--- /dev/null
+++ b/test/onnx/model_defs/super_resolution.py
@@ -0,0 +1,30 @@
+import torch
+import torch.nn as nn
+import torch.nn.init as init
+
+
+class SuperResolutionNet(nn.Module):
+    def __init__(self, upscale_factor):
+        super(SuperResolutionNet, self).__init__()
+
+        self.relu = nn.ReLU()
+        self.conv1 = nn.Conv2d(1, 64, (5, 5), (1, 1), (2, 2))
+        self.conv2 = nn.Conv2d(64, 64, (3, 3), (1, 1), (1, 1))
+        self.conv3 = nn.Conv2d(64, 32, (3, 3), (1, 1), (1, 1))
+        self.conv4 = nn.Conv2d(32, upscale_factor ** 2, (3, 3), (1, 1), (1, 1))
+        self.pixel_shuffle = nn.PixelShuffle(upscale_factor)
+
+        self._initialize_weights()
+
+    def forward(self, x):
+        x = self.relu(self.conv1(x))
+        x = self.relu(self.conv2(x))
+        x = self.relu(self.conv3(x))
+        x = self.pixel_shuffle(self.conv4(x))
+        return x
+
+    def _initialize_weights(self):
+        init.orthogonal(self.conv1.weight, init.calculate_gain('relu'))
+        init.orthogonal(self.conv2.weight, init.calculate_gain('relu'))
+        init.orthogonal(self.conv3.weight, init.calculate_gain('relu'))
+        init.orthogonal(self.conv4.weight)
diff --git a/test/onnx/model_defs/word_language_model.py b/test/onnx/model_defs/word_language_model.py
new file mode 100644
index 0000000..8f3b4a2
--- /dev/null
+++ b/test/onnx/model_defs/word_language_model.py
@@ -0,0 +1,74 @@
+# The model is from here:
+#   https://github.com/pytorch/examples/blob/master/word_language_model/model.py
+
+import torch
+import torch.nn as nn
+from torch.autograd import Variable
+
+
+class RNNModel(nn.Module):
+    """Container module with an encoder, a recurrent module, and a decoder."""
+
+    def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers,
+                 dropout=0.5, tie_weights=False, batchsize=2):
+        super(RNNModel, self).__init__()
+        self.drop = nn.Dropout(dropout)
+        self.encoder = nn.Embedding(ntoken, ninp)
+        if rnn_type in ['LSTM', 'GRU']:
+            self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout)
+        else:
+            try:
+                nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type]
+            except KeyError:
+                raise ValueError("""An invalid option for `--model` was supplied,
+                                 options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""")
+            self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout)
+        self.decoder = nn.Linear(nhid, ntoken)
+
+        # Optionally tie weights as in:
+        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
+        # https://arxiv.org/abs/1608.05859
+        # and
+        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
+        # https://arxiv.org/abs/1611.01462
+        if tie_weights:
+            if nhid != ninp:
+                raise ValueError('When using the tied flag, nhid must be equal to emsize')
+            self.decoder.weight = self.encoder.weight
+
+        self.init_weights()
+
+        self.rnn_type = rnn_type
+        self.nhid = nhid
+        self.nlayers = nlayers
+        self.hidden = self.init_hidden(batchsize)
+
+    @staticmethod
+    def repackage_hidden(h):
+        """Detach hidden states from their history."""
+        if isinstance(h, torch.Tensor):
+            return h.detach()
+        else:
+            return tuple(RNNModel.repackage_hidden(v) for v in h)
+
+    def init_weights(self):
+        initrange = 0.1
+        self.encoder.weight.data.uniform_(-initrange, initrange)
+        self.decoder.bias.data.fill_(0)
+        self.decoder.weight.data.uniform_(-initrange, initrange)
+
+    def forward(self, input, hidden):
+        emb = self.drop(self.encoder(input))
+        output, hidden = self.rnn(emb, hidden)
+        output = self.drop(output)
+        decoded = self.decoder(output.view(output.size(0) * output.size(1), output.size(2)))
+        self.hidden = RNNModel.repackage_hidden(hidden)
+        return decoded.view(output.size(0), output.size(1), decoded.size(1))
+
+    def init_hidden(self, bsz):
+        weight = next(self.parameters()).data
+        if self.rnn_type == 'LSTM':
+            return (Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()),
+                    Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()))
+        else:
+            return Variable(weight.new(self.nlayers, bsz, self.nhid).zero_())
diff --git a/test/onnx/pytorch_helper.py b/test/onnx/pytorch_helper.py
new file mode 100644
index 0000000..17afa53
--- /dev/null
+++ b/test/onnx/pytorch_helper.py
@@ -0,0 +1,85 @@
+import io
+import torch.onnx
+import onnx
+from caffe2.python.onnx.backend import Caffe2Backend
+from caffe2.python.core import BlobReference, Net
+
+
+_next_idx = 0
+# Clone net takes a dict instead of a lambda
+# It should probably take a lambda, it is more flexible
+# We fake dict here
+
+
+class _FakeDict(object):
+    def __init__(self, fn):
+        self.fn = fn
+
+    def get(self, name, _):
+        return self.fn(name)
+
+
+def PyTorchModule(helper, model, sample_arguments, caffe2_inputs, prefix_name=None):
+    """
+    Embed an ONNX-exportable PyTorch Model into a Caffe2 model being built.
+
+    Arguments:
+        helper (caffe2.python.core.ModelHelder): the model helper where
+            this imported network should be inserted
+        model (torch.nn.Module): the model to be exported
+        sample_arguments (tuple of arguments): the inputs to
+            the model, e.g., such that ``model(*args)`` is a valid
+            invocation of the model.  Any non-Variable arguments will
+            be hard-coded into the exported model; any Variable arguments
+            will become inputs of the exported model, in the order they
+            occur in args.  If args is a Variable, this is equivalent
+            to having called it with a 1-ary tuple of that Variable.
+            (Note: passing keyword arguments to the model is not currently
+            supported.  Give us a shout if you need it.)
+        caffe2_inputs (list of str or caffe2.python.core.BlobReference): the
+           caffe2 Blobs that should be inputs to this network. Must be
+           the same length as sample_arguments
+        prefix_name: prefix name to add to each member of the blob, if None then
+           a fresh prefix pytorch_input_N/ is used
+    Returns:
+        A tuple of caffe2.python.core.BlobReference objects referring to the
+        models outputs, or a single BlobReference when the model returns a single
+        value.
+    """
+    if prefix_name is None:
+        global _next_idx
+        prefix_name = 'pytorch_import_' + str(_next_idx) + '/'
+        _next_idx += 1
+
+    # TODO: handle the case where model cannot be exported
+    # and embed as a Python op in Caffe2
+    f = io.BytesIO()
+    torch.onnx.export(
+        model, sample_arguments, f, export_params=True)
+    onnx_model = onnx.load(io.BytesIO(f.getvalue()))
+    init_net, predict_net = Caffe2Backend.onnx_graph_to_caffe2_net(
+        onnx_model)
+
+    initialized = set([x.name for x in onnx_model.graph.initializer])
+    uninitialized_inputs = {x.name: i for i, x in enumerate(
+        onnx_model.graph.input) if x.name not in initialized}
+
+    if(len(uninitialized_inputs) != len(caffe2_inputs)):
+        raise ValueError('Expected {} inputs but found {}'.format(
+            len(uninitialized_inputs), len(caffe2_inputs)))
+
+    def remap_blob_name(name):
+        if name in uninitialized_inputs:
+            idx = uninitialized_inputs[name]
+            return str(caffe2_inputs[idx])
+        return prefix_name + name
+
+    predict_net = Net(predict_net).Clone('anon', _FakeDict(remap_blob_name))
+    helper.net.AppendNet(predict_net)
+
+    init_net = Net(init_net).Clone('anon', _FakeDict(remap_blob_name))
+    helper.param_init_net.AppendNet(init_net)
+
+    results = tuple([BlobReference(remap_blob_name(x.name), helper.net)
+                     for x in onnx_model.graph.output])
+    return results
diff --git a/test/onnx/test_caffe2.py b/test/onnx/test_caffe2.py
new file mode 100644
index 0000000..50a67d9
--- /dev/null
+++ b/test/onnx/test_caffe2.py
@@ -0,0 +1,765 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from functools import wraps
+import numpy as np
+import sys
+import unittest
+import itertools
+
+import torch.onnx
+import torch.onnx.operators
+from torch import nn
+from torch.autograd import Variable, function
+import torch.utils.model_zoo as model_zoo
+from torch.nn.utils import rnn as rnn_utils
+from debug_embed_params import run_embed_params
+import io
+
+# Import various models for testing
+from torchvision.models.alexnet import alexnet
+from torchvision.models.inception import inception_v3
+from torchvision.models.densenet import densenet121
+from torchvision.models.resnet import resnet50
+from torchvision.models.vgg import vgg16, vgg16_bn, vgg19, vgg19_bn
+
+from model_defs.squeezenet import SqueezeNet
+from model_defs.super_resolution import SuperResolutionNet
+from model_defs.srresnet import SRResNet
+import model_defs.dcgan as dcgan
+import model_defs.word_language_model as word_language_model
+from model_defs.mnist import MNIST
+from model_defs.lstm_flattening_result import LstmFlatteningResult
+from model_defs.rnn_model_with_packed_sequence import RnnModelWithPackedSequence
+
+import onnx
+import caffe2.python.onnx.backend as c2
+
+from test_pytorch_common import skipIfTravis, skipIfNoLapack, skipIfNoCuda
+import verify
+
+skip = unittest.skip
+
+
+def skipIfEmbed(func):
+    def wrapper(self):
+        if self.embed_params:
+            raise unittest.SkipTest("Skip embed_params verify test")
+        return func(self)
+    return wrapper
+
+
+# def import_model(proto, input, workspace=None, use_gpu=True):
+#    model_def = onnx.ModelProto.FromString(proto)
+#    onnx.checker.check_model(model_def)
+#
+#    if workspace is None:
+#        workspace = {}
+#    if isinstance(input, tuple):
+#        for i in range(len(input)):
+#            workspace[model_def.graph.input[i]] = input[i]
+#    else:
+#        workspace[model_def.graph.input[0]] = input
+#
+#    caffe2_out_workspace = c2.run_model(
+#        init_graph=None,
+#        predict_graph=graph_def,
+#        inputs=workspace,
+#        use_gpu=use_gpu)
+#    caffe2_out = caffe2_out_workspace[0]
+#    return caffe2_out
+
+
+def do_export(model, inputs, *args, **kwargs):
+    f = io.BytesIO()
+    out = torch.onnx._export(model, inputs, f, *args, **kwargs)
+    return f.getvalue(), out
+
+
+torch.set_default_tensor_type('torch.FloatTensor')
+try:
+    import torch
+except ImportError:
+    print('Cannot import torch, hence caffe2-torch test will not run.')
+    sys.exit(0)
+
+
+BATCH_SIZE = 2
+
+RNN_BATCH_SIZE = 7
+RNN_SEQUENCE_LENGTH = 11
+RNN_INPUT_SIZE = 5
+RNN_HIDDEN_SIZE = 3
+
+model_urls = {
+    'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
+    'dcgan_b': 'https://s3.amazonaws.com/pytorch/test_data/export/netG_bedroom_epoch_1-0649e76b.pth',
+    'dcgan_f': 'https://s3.amazonaws.com/pytorch/test_data/export/netG_faces_epoch_49-d86035a6.pth',
+    'densenet121': 'https://download.pytorch.org/models/densenet121-d66d3027.pth',
+    'inception_v3_google': 'https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'srresNet': 'https://s3.amazonaws.com/pytorch/demos/srresnet-e10b2039.pth',
+    'super_resolution': 'https://s3.amazonaws.com/pytorch/test_data/export/superres_epoch100-44c6958e.pth',
+    'squeezenet1_0': 'https://download.pytorch.org/models/squeezenet1_0-a815701f.pth',
+    'squeezenet1_1': 'https://download.pytorch.org/models/squeezenet1_1-f364aa15.pth',
+    'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
+    'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
+}
+
+
+class TestCaffe2Backend(unittest.TestCase):
+    embed_params = False
+
+    def setUp(self):
+        torch.manual_seed(0)
+        if torch.cuda.is_available():
+            torch.cuda.manual_seed_all(0)
+        np.random.seed(seed=0)
+
+    def convert_cuda(self, model, input):
+        cuda_model = model.cuda()
+        # input might be nested - we want to move everything to GPU
+        cuda_input = function._nested_map(
+            lambda o: isinstance(o, Variable) or torch.is_tensor(o),
+            lambda o: o.cuda())(input)
+        return cuda_model, cuda_input
+
+    def run_debug_test(self, model, train, batch_size, state_dict=None,
+                       input=None, use_gpu=True):
+        """
+        # TODO: remove this from the final release version
+        This test is for our debugging only for the case where
+        embed_params=False
+        """
+        model.train(train)
+        if state_dict is not None:
+            model.load_state_dict(state_dict)
+
+        # Either user specified input or random (deterministic) input
+        if input is None:
+            input = Variable(torch.randn(batch_size, 3, 224, 224),
+                             requires_grad=True)
+        if use_gpu:
+            model, input = self.convert_cuda(model, input)
+
+        onnxir, torch_out = do_export(model, input, export_params=self.embed_params, verbose=False)
+        if isinstance(torch_out, torch.autograd.Variable):
+            torch_out = (torch_out,)
+
+        caffe2_out = run_embed_params(onnxir, model, input, state_dict, use_gpu)
+        for i, (x, y) in enumerate(zip(torch_out, caffe2_out)):
+            np.testing.assert_almost_equal(x.data.cpu().numpy(), y, decimal=3)
+
+    def run_actual_test(self, model, train, batch_size, state_dict=None,
+                        input=None, use_gpu=True, rtol=0.001, atol=1e-7):
+        """
+        This is what the user facing version will look like
+        """
+        # set the training/test mode for the model
+        model.train(train)
+        # use the pre-trained model params if available
+        if state_dict is not None:
+            model.load_state_dict(state_dict)
+
+        # Either user specified input or random (deterministic) input
+        if input is None:
+            input = Variable(torch.randn(batch_size, 3, 224, 224),
+                             requires_grad=True)
+        # GPU-ize the model, if requested
+        if use_gpu:
+            model, input = self.convert_cuda(model, input)
+
+        # Verify the model runs the same in Caffe2
+        verify.verify(model, input, c2, rtol=rtol, atol=atol)
+
+    def run_model_test(self, model, train, batch_size, state_dict=None,
+                       input=None, use_gpu=True, rtol=0.001, atol=1e-7):
+        use_gpu_ = torch.cuda.is_available() and use_gpu
+        if self.embed_params:
+            self.run_actual_test(model, train, batch_size, state_dict, input,
+                                 use_gpu=use_gpu_, rtol=rtol, atol=atol)
+        else:
+            self.run_debug_test(model, train, batch_size, state_dict, input,
+                                use_gpu=use_gpu_)
+
+    def test_linear(self):
+        model = nn.Linear(1, 1)
+        input = Variable(torch.randn(1, 1), requires_grad=True)
+        self.run_model_test(model, train=False, batch_size=0, input=input)
+
+    def test_lstm_cell(self):
+        model = nn.LSTMCell(RNN_INPUT_SIZE, RNN_HIDDEN_SIZE)
+        input = Variable(torch.randn(BATCH_SIZE, RNN_INPUT_SIZE))
+        h0 = Variable(torch.randn(BATCH_SIZE, RNN_HIDDEN_SIZE))
+        c0 = Variable(torch.randn(BATCH_SIZE, RNN_HIDDEN_SIZE))
+        self.run_model_test(model, train=False, batch_size=BATCH_SIZE, input=(input, (h0, c0)), use_gpu=False)
+
+    def test_gru_cell(self):
+        model = nn.GRUCell(RNN_INPUT_SIZE, RNN_HIDDEN_SIZE)
+        input = Variable(torch.randn(BATCH_SIZE, RNN_INPUT_SIZE))
+        h0 = Variable(torch.randn(BATCH_SIZE, RNN_HIDDEN_SIZE))
+        self.run_model_test(model, train=False, batch_size=BATCH_SIZE, input=(input, h0), use_gpu=False)
+
+    def _dispatch_rnn_test(self, name, *args, **kwargs):
+        if name == 'elman':
+            self._elman_rnn_test(*args, **kwargs)
+        if name == 'lstm':
+            self._lstm_test(*args, **kwargs)
+        if name == 'gru':
+            self._gru_test(*args, **kwargs)
+
+    def _elman_rnn_test(self, layers, nonlinearity, bidirectional,
+                        initial_state, packed_sequence, dropout):
+        model = nn.RNN(RNN_INPUT_SIZE, RNN_HIDDEN_SIZE,
+                       layers,
+                       nonlinearity=nonlinearity,
+                       bidirectional=bidirectional,
+                       dropout=dropout)
+
+        if packed_sequence == 1:
+            model = RnnModelWithPackedSequence(model, False)
+        if packed_sequence == 2:
+            model = RnnModelWithPackedSequence(model, True)
+
+        def make_input(batch_size):
+            seq_lengths = np.random.randint(1, RNN_SEQUENCE_LENGTH + 1, size=batch_size)
+            seq_lengths = list(reversed(sorted(map(int, seq_lengths))))
+            inputs = [Variable(torch.randn(l, RNN_INPUT_SIZE)) for l in seq_lengths]
+            inputs = rnn_utils.pad_sequence(inputs)
+            if packed_sequence == 2:
+                inputs = inputs.transpose(0, 1)
+            inputs = [inputs]
+
+            directions = 2 if bidirectional else 1
+
+            if initial_state:
+                h0 = Variable(torch.randn(directions * layers, batch_size, RNN_HIDDEN_SIZE))
+                inputs.append(h0)
+            if packed_sequence != 0:
+                inputs.append(Variable(torch.IntTensor(seq_lengths)))
+            if len(inputs) == 1:
+                input = inputs[0]
+            else:
+                input = tuple(inputs)
+            return input
+
+        input = make_input(RNN_BATCH_SIZE)
+        self.run_model_test(model, train=False, batch_size=RNN_BATCH_SIZE, input=input, use_gpu=False, atol=1e-7)
+
+        # test that the model still runs with a different batch size
+        onnxir, _ = do_export(model, input)
+        other_input = make_input(RNN_BATCH_SIZE + 1)
+        _ = run_embed_params(onnxir, model, other_input, use_gpu=False)
+
+    def _lstm_test(self, layers, bidirectional, initial_state,
+                   packed_sequence, dropout):
+        model = LstmFlatteningResult(
+            RNN_INPUT_SIZE, RNN_HIDDEN_SIZE, layers,
+            bidirectional=bidirectional, dropout=dropout)
+        if packed_sequence == 1:
+            model = RnnModelWithPackedSequence(model, False)
+        if packed_sequence == 2:
+            model = RnnModelWithPackedSequence(model, True)
+
+        def make_input(batch_size):
+            seq_lengths = np.random.randint(1, RNN_SEQUENCE_LENGTH + 1, size=batch_size)
+            seq_lengths = list(reversed(sorted(map(int, seq_lengths))))
+            inputs = [Variable(torch.randn(l, RNN_INPUT_SIZE)) for l in seq_lengths]
+            inputs = rnn_utils.pad_sequence(inputs)
+            if packed_sequence == 2:
+                inputs = inputs.transpose(0, 1)
+            inputs = [inputs]
+
+            directions = 2 if bidirectional else 1
+
+            if initial_state:
+                h0 = Variable(torch.randn(directions * layers, batch_size, RNN_HIDDEN_SIZE))
+                c0 = Variable(torch.randn(directions * layers, batch_size, RNN_HIDDEN_SIZE))
+                inputs.append((h0, c0))
+            if packed_sequence != 0:
+                inputs.append(Variable(torch.IntTensor(seq_lengths)))
+            if len(inputs) == 1:
+                input = inputs[0]
+            else:
+                input = tuple(inputs)
+            return input
+
+        input = make_input(RNN_BATCH_SIZE)
+        self.run_model_test(model, train=False, batch_size=RNN_BATCH_SIZE, input=input, use_gpu=False)
+
+        # test that the model still runs with a different batch size
+        onnxir, _ = do_export(model, input)
+        other_input = make_input(RNN_BATCH_SIZE + 1)
+        _ = run_embed_params(onnxir, model, other_input, use_gpu=False)
+
+    def _gru_test(self, layers, bidirectional, initial_state,
+                  packed_sequence, dropout):
+        model = nn.GRU(RNN_INPUT_SIZE, RNN_HIDDEN_SIZE, layers,
+                       bidirectional=bidirectional, dropout=dropout)
+        if packed_sequence == 1:
+            model = RnnModelWithPackedSequence(model, False)
+        if packed_sequence == 2:
+            model = RnnModelWithPackedSequence(model, True)
+
+        def make_input(batch_size):
+            seq_lengths = np.random.randint(1, RNN_SEQUENCE_LENGTH + 1, size=batch_size)
+            seq_lengths = list(reversed(sorted(map(int, seq_lengths))))
+            inputs = [Variable(torch.randn(l, RNN_INPUT_SIZE)) for l in seq_lengths]
+            inputs = rnn_utils.pad_sequence(inputs)
+            if packed_sequence == 2:
+                inputs = inputs.transpose(0, 1)
+            inputs = [inputs]
+
+            directions = 2 if bidirectional else 1
+
+            if initial_state:
+                h0 = Variable(torch.randn(directions * layers, batch_size, RNN_HIDDEN_SIZE))
+                inputs.append(h0)
+            if packed_sequence != 0:
+                inputs.append(Variable(torch.IntTensor(seq_lengths)))
+            if len(inputs) == 1:
+                input = inputs[0]
+            else:
+                input = tuple(inputs)
+            return input
+
+        input = make_input(RNN_BATCH_SIZE)
+        self.run_model_test(model, train=False, batch_size=RNN_BATCH_SIZE, input=input, use_gpu=False)
+
+        # test that the model still runs with a different batch size
+        onnxir, _ = do_export(model, input)
+        other_input = make_input(RNN_BATCH_SIZE + 1)
+        _ = run_embed_params(onnxir, model, other_input, use_gpu=False)
+
+    def test_alexnet(self):
+        state_dict = model_zoo.load_url(model_urls['alexnet'], progress=False)
+        self.run_model_test(alexnet(), train=False, batch_size=BATCH_SIZE,
+                            state_dict=state_dict, atol=1e-3)
+
+    @skipIfNoCuda
+    def test_dcgan(self):
+        # dcgan is flaky on some seeds, see:
+        # https://github.com/ProjectToffee/onnx/pull/70
+        torch.manual_seed(1)
+        if torch.cuda.is_available():
+            torch.cuda.manual_seed_all(1)
+
+        netD = dcgan._netD(1)
+        netD.apply(dcgan.weights_init)
+        input = Variable(torch.randn(BATCH_SIZE, 3, dcgan.imgsz, dcgan.imgsz))
+        self.run_model_test(netD, train=False, batch_size=BATCH_SIZE,
+                            input=input)
+
+        netG = dcgan._netG(1)
+        netG.apply(dcgan.weights_init)
+        state_dict = model_zoo.load_url(model_urls['dcgan_b'], progress=False)
+        # state_dict = model_zoo.load_url(model_urls['dcgan_f'], progress=False)
+        noise = Variable(
+            torch.randn(BATCH_SIZE, dcgan.nz, 1, 1).normal_(0, 1))
+        self.run_model_test(netG, train=False, batch_size=BATCH_SIZE,
+                            input=noise, state_dict=state_dict, rtol=1e-2, atol=1e-6)
+
+    @unittest.skipIf(not torch.cuda.is_available(),
+                     "model on net has cuda in it, awaiting fix")
+    def test_densenet(self):
+        state_dict = model_zoo.load_url(model_urls['densenet121'], progress=False)
+        self.run_model_test(densenet121(), train=False, batch_size=BATCH_SIZE,
+                            state_dict=state_dict, atol=1e-7)
+
+    @skip("doesn't match exactly...")
+    # TODO: figure out the numerical instabilities
+    def test_inception(self):
+        x = Variable(
+            torch.randn(BATCH_SIZE, 3, 299, 299), requires_grad=True)
+        # state_dict = model_zoo.load_url(model_urls['inception_v3_google'], progress=False)
+        state_dict = None
+        self.run_model_test(inception_v3(), train=False, batch_size=BATCH_SIZE,
+                            state_dict=state_dict, input=x)
+
+    def test_resnet(self):
+        state_dict = model_zoo.load_url(model_urls['resnet50'], progress=False)
+        self.run_model_test(resnet50(), train=False, batch_size=BATCH_SIZE,
+                            state_dict=state_dict, atol=1e-6)
+
+    def test_squeezenet(self):
+        sqnet_v1_1 = SqueezeNet(version=1.1)
+        state_dict = model_zoo.load_url(model_urls['squeezenet1_1'], progress=False)
+        # state_dict = model_zoo.load_url(model_urls['squeezenet1_0'], progress=False)
+        self.run_model_test(sqnet_v1_1, train=False, batch_size=BATCH_SIZE,
+                            state_dict=state_dict)
+
+    # @skip('takes long to run, LAPACK needed for gpu')
+    @skipIfNoLapack
+    @unittest.skip("This model takes too much memory")
+    def test_srresnet(self):
+        super_resolution_net = SRResNet(
+            rescale_factor=4, n_filters=64, n_blocks=8)
+        state_dict = model_zoo.load_url(model_urls['srresNet'], progress=False)
+        x = Variable(torch.randn(1, 3, 224, 224), requires_grad=True)
+        self.run_model_test(super_resolution_net, train=False,
+                            batch_size=1, state_dict=state_dict,
+                            input=x, use_gpu=False)
+
+    @skipIfTravis
+    @skipIfNoLapack
+    @skipIfNoCuda
+    def test_super_resolution(self):
+        super_resolution_net = SuperResolutionNet(upscale_factor=3)
+        state_dict = model_zoo.load_url(model_urls['super_resolution'], progress=False)
+        x = Variable(torch.randn(1, 1, 224, 224), requires_grad=True)
+        self.run_model_test(super_resolution_net, train=False,
+                            batch_size=BATCH_SIZE, state_dict=state_dict,
+                            input=x, use_gpu=False)
+
+    @unittest.skip("This model takes too much memory")
+    def test_vgg16(self):
+        state_dict = model_zoo.load_url(model_urls['vgg16'], progress=False)
+        self.run_model_test(vgg16(), train=False, batch_size=BATCH_SIZE,
+                            state_dict=state_dict)
+
+    @skip("disable to run tests faster...")
+    def test_vgg16_bn(self):
+        self.run_model_test(vgg16_bn(), train=False,
+                            batch_size=BATCH_SIZE)
+
+    @skip("disable to run tests faster...")
+    def test_vgg19(self):
+        state_dict = model_zoo.load_url(model_urls['vgg19'], progress=False)
+        self.run_model_test(vgg19(), train=False, batch_size=BATCH_SIZE,
+                            state_dict=state_dict)
+
+    @skip("disable to run tests faster...")
+    def test_vgg19_bn(self):
+        self.run_model_test(vgg19_bn(), train=False,
+                            batch_size=BATCH_SIZE)
+
+    def run_word_language_model(self, model_name):
+        ntokens = 50
+        emsize = 5
+        nhid = 5
+        nlayers = 5
+        dropout = 0.2
+        tied = False
+        batchsize = 5
+        model = word_language_model.RNNModel(model_name, ntokens, emsize,
+                                             nhid, nlayers, dropout, tied,
+                                             batchsize)
+        x = Variable(torch.arange(0, ntokens).long().view(-1, batchsize),
+                     requires_grad=False)
+        # Only support CPU version, since tracer is not working in GPU RNN.
+        self.run_model_test(model, train=False, input=(x, model.hidden),
+                            batch_size=batchsize, use_gpu=False)
+
+    def test_word_language_model_RNN_TANH(self):
+        self.run_word_language_model("RNN_TANH")
+
+    def test_word_language_model_RNN_RELU(self):
+        self.run_word_language_model("RNN_RELU")
+
+    def test_word_language_model_LSTM(self):
+        self.run_word_language_model("LSTM")
+
+    def test_word_language_model_GRU(self):
+        self.run_word_language_model("GRU")
+
+    def test_batchnorm1d_special(self):
+        c = Variable(torch.randn(BATCH_SIZE, 224))
+        model = nn.BatchNorm1d(224)
+        self.run_model_test(model, train=True, input=c, batch_size=BATCH_SIZE)
+
+    def test_constant(self):
+        c = Variable(torch.randn(BATCH_SIZE, 3, 224, 224))
+
+        class MyModel(torch.nn.Module):
+            def __init__(self):
+                super(MyModel, self).__init__()
+
+            def forward(self, input):
+                return input + c.type_as(input)
+
+        self.run_model_test(MyModel(), train=False, batch_size=BATCH_SIZE)
+
+    def test_consumed_bn(self):
+        underlying = nn.BatchNorm2d(3)
+        self.run_model_test(underlying, train=True, batch_size=BATCH_SIZE)
+
+    def _test_index_generic(self, fn):
+        class MyModel(torch.nn.Module):
+            def __init__(self):
+                super(MyModel, self).__init__()
+
+            def forward(self, input):
+                return fn(input)
+
+        m1 = Variable(torch.randn(3, 4))
+        self.run_model_test(MyModel(), input=m1, train=False, batch_size=BATCH_SIZE)
+
+    def test_index_1d(self):
+        self._test_index_generic(lambda input: input[0])
+
+    def test_index_2d_1dimslice(self):
+        self._test_index_generic(lambda input: input[0:1, :])
+
+    def test_index_2d_sliceint(self):
+        self._test_index_generic(lambda input: input[1, :])
+
+    def test_index_2d_neg_slice(self):
+        self._test_index_generic(lambda input: input[0:-1, :])
+
+    # TODO: Slicing along two dimensions is currently unsupported by the caffe2
+    # backend. Revisit if this becomes supported in the future.
+    """
+    def test_index_2d_2dimslice(self):
+        self._test_index_generic(lambda input: input[0:1, 0:1])
+    """
+    """
+    def test_index_2d_neg_slice2dim(self):
+        self._test_index_generic(lambda input: input[0:-1, 0:-1])
+    """
+
+    def test_chunk(self):
+        class MyModel(torch.nn.Module):
+            def __init__(self):
+                super(MyModel, self).__init__()
+
+            def forward(self, input):
+                # TODO: Why index? This returns a tuple and test runner doesn't
+                # support tuple comparison.
+                return input.chunk(20, dim=2)[-1]
+        self.run_model_test(MyModel(), train=False, batch_size=BATCH_SIZE)
+
+    def test_sqrt(self):
+        class MyModel(torch.nn.Module):
+            def __init__(self):
+                super(MyModel, self).__init__()
+
+            def forward(self, input):
+                return input.sqrt()
+        input = Variable(torch.empty(BATCH_SIZE, 10, 10).uniform_(4, 9))
+        self.run_model_test(MyModel(), train=False, input=input, batch_size=BATCH_SIZE)
+
+    def test_addconstant(self):
+        class MyModel(torch.nn.Module):
+            def __init__(self):
+                super(MyModel, self).__init__()
+
+            def forward(self, input):
+                # TODO: Why index? This returns a tuple and test runner doesn't
+                # support tuple comparison.
+                return input + 1
+        self.run_model_test(MyModel(), train=False, batch_size=BATCH_SIZE)
+
+    def test_subconstant(self):
+        class MyModel(torch.nn.Module):
+            def __init__(self):
+                super(MyModel, self).__init__()
+
+            def forward(self, input):
+                # TODO: Why index? This returns a tuple and test runner doesn't
+                # support tuple comparison.
+                return input - 1
+        self.run_model_test(MyModel(), train=False, batch_size=BATCH_SIZE)
+
+    def test_embedding(self):
+        model = nn.Embedding(10, 3, padding_idx=-1)
+        input = Variable(torch.LongTensor(list(range(10))[::-1]))
+        self.run_model_test(model, train=False, input=input, batch_size=BATCH_SIZE)
+
+    def test_constantpad2d(self):
+        model = nn.ConstantPad2d((1, 2, 3, 4), 3.5)
+        self.run_model_test(model, train=False, batch_size=BATCH_SIZE)
+
+    def test_reflectionpad2d(self):
+        model = nn.ReflectionPad2d((1, 2, 3, 4))
+        self.run_model_test(model, train=False, batch_size=BATCH_SIZE)
+
+    def test_replicationpad2d(self):
+        model = nn.ReplicationPad2d((1, 2, 3, 4))
+        self.run_model_test(model, train=False, batch_size=BATCH_SIZE)
+
+    def test_maxpool2d(self):
+        model = nn.MaxPool2d(5, padding=(1, 2))
+        self.run_model_test(model, train=False, batch_size=BATCH_SIZE)
+
+    def test_maxpool2d_single_padding(self):
+        model = nn.MaxPool2d(5, padding=2)
+        self.run_model_test(model, train=False, batch_size=BATCH_SIZE)
+
+    @unittest.skip("C2 and PyTorch have small difference in padding implementation")
+    def test_avgpool2d(self):
+        model = nn.AvgPool2d(5, padding=(2))
+        self.run_model_test(model, train=False, batch_size=BATCH_SIZE)
+
+    def test_avgpool2d_no_padding(self):
+        model = nn.AvgPool2d(5)
+        self.run_model_test(model, train=False, batch_size=BATCH_SIZE)
+
+    def test_mnist(self):
+        model = MNIST()
+        input = Variable(torch.randn(BATCH_SIZE, 1, 28, 28))
+        state_dict = None
+        # TODO: test with state_dict
+        self.run_model_test(model, train=False, input=input, batch_size=BATCH_SIZE,
+                            state_dict=state_dict)
+
+    def test_mm(self):
+        class MyModel(torch.nn.Module):
+            def __init__(self):
+                super(MyModel, self).__init__()
+
+            def forward(self, m1, m2):
+                return torch.mm(m1, m2)
+        m1 = Variable(torch.randn(3, 4))
+        m2 = Variable(torch.randn(4, 5))
+        self.run_model_test(MyModel(), train=False, input=(m1, m2), batch_size=BATCH_SIZE, use_gpu=False)
+
+    def test_addmm(self):
+        class MyModel(torch.nn.Module):
+            def __init__(self):
+                super(MyModel, self).__init__()
+
+            def forward(self, ma, m1, m2):
+                return torch.addmm(ma, m1, m2)
+        ma = Variable(torch.randn(5))
+        m1 = Variable(torch.randn(3, 4))
+        m2 = Variable(torch.randn(4, 5))
+        self.run_model_test(MyModel(), train=False, input=(ma, m1, m2), batch_size=BATCH_SIZE, use_gpu=False)
+
+    def test_softmax(self):
+        for i in range(7)[2:]:
+            model = nn.Softmax(dim=i - 1)
+            dims = [2] * (i - 2) + [3, 4]
+            input = Variable(torch.randn(*dims).fill_(1),
+                             requires_grad=True)
+            self.run_model_test(model, train=False, batch_size=BATCH_SIZE, input=input)
+
+    def test_logsoftmax(self):
+        for i in range(7)[2:]:
+            model = nn.LogSoftmax(dim=i - 1)
+            dims = [2] * (i - 2) + [3, 4]
+            input = Variable(torch.randn(*dims).fill_(1),
+                             requires_grad=True)
+            self.run_model_test(model, train=False, batch_size=BATCH_SIZE, input=input)
+
+    def test_convtranspose(self):
+        model = nn.ConvTranspose2d(3, 3, 3, stride=3, bias=False, padding=1, output_padding=2)
+        self.run_model_test(model, train=False, batch_size=BATCH_SIZE, atol=1e-7)
+
+    # NB: InstanceNorm model includes unused weights, so skip this in TestCaffe2BackendEmbed
+    # TODO: We should have another pass to eliminate the unused initializers in ONNX models.
+    @skipIfEmbed
+    def test_instance_norm(self):
+        underlying = nn.InstanceNorm2d(3)
+        self.run_model_test(underlying, train=False, batch_size=BATCH_SIZE)
+
+    def test_dynamic_sizes(self):
+        class MyModel(torch.nn.Module):
+            def __init__(self):
+                super(MyModel, self).__init__()
+
+            def forward(self, x):
+                shape = torch.onnx.operators.shape_as_tensor(x)
+                new_shape = torch.cat((torch.LongTensor([-1]), shape[0].view(1)))
+                return torch.onnx.operators.reshape_from_tensor_shape(x, new_shape)
+        x = Variable(torch.randn(3, 5, 7))
+        self.run_model_test(MyModel(), train=False, input=x, batch_size=BATCH_SIZE, use_gpu=False)
+
+    def test_advanced_broadcast(self):
+        class MyModel(torch.nn.Module):
+            def __init__(self):
+                super(MyModel, self).__init__()
+
+            def forward(self, x, y):
+                return torch.mul(x, y)
+        x = Variable(torch.randn(1, 5, 10))
+        y = Variable(torch.randn(1, 5, 1))
+        self.run_model_test(MyModel(), train=False, input=(x, y), batch_size=BATCH_SIZE, use_gpu=False)
+
+# a bit of metaprogramming to set up all the rnn tests
+
+
+def make_test(name, base, layer, bidirectional, initial_state,
+              variable_length, dropout,
+              **extra_kwargs):
+    test_name = str('_'.join([
+        'test', name, layer[1],
+        bidirectional[1], initial_state[1],
+        variable_length[1], dropout[1]
+    ]))
+
+    def f(self):
+        self._dispatch_rnn_test(
+            base,
+            layers=layer[0],
+            bidirectional=bidirectional[0],
+            initial_state=initial_state[0],
+            packed_sequence=variable_length[0],
+            dropout=dropout[0],
+            **extra_kwargs)
+
+    f.__name__ = test_name
+    setattr(TestCaffe2Backend, f.__name__, f)
+
+
+def setup_rnn_tests():
+    layers_opts = [
+        (1, 'unilayer'),
+        (3, 'trilayer')
+    ]
+    bidirectional_opts = [
+        (False, 'forward'),
+        (True, 'bidirectional')
+    ]
+    initial_state_opts = [
+        (True, 'with_initial_state'),
+        (False, 'no_initial_state')
+    ]
+    variable_length_opts = [
+        (0, 'without_sequence_lengths'),
+        (1, 'with_variable_length_sequences'),
+        (2, 'with_batch_first_sequence_lengths')
+    ]
+    dropout_opts = [
+        (0.2, 'with_dropout'),
+        (0.0, 'without_dropout')
+    ]
+    test_count = 0
+    for (layer, bidirectional, initial_state, variable_length, dropout) in \
+        itertools.product(
+            layers_opts,
+            bidirectional_opts,
+            initial_state_opts,
+            variable_length_opts,
+            dropout_opts,
+    ):
+
+        for base, name, extra_kwargs in (
+                ('elman', 'elman_relu', {'nonlinearity': u'relu'}),
+                ('elman', 'elman_tanh', {'nonlinearity': u'tanh'}),
+                ('lstm', 'lstm', {}),
+                ('gru', 'gru', {})
+        ):
+            make_test(name, base, layer, bidirectional, initial_state,
+                      variable_length, dropout,
+                      **extra_kwargs)
+            test_count += 1
+
+    # sanity check that a representative example does exist
+    TestCaffe2Backend.test_gru_trilayer_forward_with_initial_state_without_sequence_lengths_with_dropout
+
+    # make sure no one accidentally disables all the tests without
+    # noticing
+    assert test_count == 192, test_count
+setup_rnn_tests()
+
+# add the same test suite as above, but switch embed_params=False
+# to embed_params=True
+TestCaffe2BackendEmbed = type(str("TestCaffe2BackendEmbed"),
+                              (unittest.TestCase,),
+                              dict(TestCaffe2Backend.__dict__, embed_params=True))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/onnx/test_caffe2_common.py b/test/onnx/test_caffe2_common.py
new file mode 100644
index 0000000..dba6649
--- /dev/null
+++ b/test/onnx/test_caffe2_common.py
@@ -0,0 +1,41 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import glob
+import numpy as np
+import onnx.backend.test
+import caffe2.python.onnx.backend as c2
+import os
+from onnx import numpy_helper
+
+
+def load_tensor_as_numpy_array(f):
+    tensor = onnx.TensorProto()
+    with open(f, 'rb') as file:
+        tensor.ParseFromString(file.read())
+    return tensor
+
+
+def assert_similar(ref, real):
+    np.testing.assert_equal(len(ref), len(real))
+    for i in range(len(ref)):
+        np.testing.assert_allclose(ref[i], real[i], rtol=1e-3)
+
+
+def run_generated_test(model_file, data_dir, device='CPU'):
+    model = onnx.load(model_file)
+    input_num = len(glob.glob(os.path.join(data_dir, "input_*.pb")))
+    inputs = []
+    for i in range(input_num):
+        inputs.append(numpy_helper.to_array(load_tensor_as_numpy_array(
+            os.path.join(data_dir, "input_{}.pb".format(i)))))
+    output_num = len(glob.glob(os.path.join(data_dir, "output_*.pb")))
+    outputs = []
+    for i in range(output_num):
+        outputs.append(numpy_helper.to_array(load_tensor_as_numpy_array(
+            os.path.join(data_dir, "output_{}.pb".format(i)))))
+    prepared = c2.prepare(model, device=device)
+    c2_outputs = prepared.run(inputs)
+    assert_similar(outputs, c2_outputs)
diff --git a/test/onnx/test_models.py b/test/onnx/test_models.py
new file mode 100644
index 0000000..e928855
--- /dev/null
+++ b/test/onnx/test_models.py
@@ -0,0 +1,163 @@
+from torchvision.models.alexnet import alexnet
+from torchvision.models.inception import inception_v3
+from torchvision.models.densenet import densenet121
+from torchvision.models.resnet import resnet50
+from torchvision.models.vgg import vgg16, vgg16_bn, vgg19, vgg19_bn
+
+from model_defs.mnist import MNIST
+from model_defs.word_language_model import RNNModel
+from model_defs.squeezenet import SqueezeNet
+from model_defs.super_resolution import SuperResolutionNet
+from model_defs.srresnet import SRResNet
+from model_defs.dcgan import _netD, _netG, weights_init, bsz, imgsz, nz
+from model_defs.op_test import DummyNet, ConcatNet, PermuteNet, PReluNet
+
+from test_pytorch_common import TestCase, run_tests, skipIfNoLapack, skipIfCI
+
+import torch
+import torch.onnx
+import torch.onnx.utils
+from torch.autograd import Variable, Function
+from torch.nn import Module
+
+import onnx
+import onnx.checker
+import onnx.helper
+
+import google.protobuf.text_format
+
+import io
+import unittest
+
+import caffe2.python.onnx.backend as backend
+
+from verify import verify
+
+if torch.cuda.is_available():
+    def toC(x):
+        return x.cuda()
+else:
+    def toC(x):
+        return x
+
+BATCH_SIZE = 2
+
+
+class TestModels(TestCase):
+    def exportTest(self, model, inputs, rtol=1e-2, atol=1e-7):
+        trace = torch.onnx.utils._trace(model, inputs)
+        torch._C._jit_pass_lint(trace.graph())
+        verify(model, inputs, backend, rtol=rtol, atol=atol)
+
+    def test_ops(self):
+        x = Variable(
+            torch.randn(BATCH_SIZE, 3, 224, 224).fill_(1.0)
+        )
+        self.exportTest(toC(DummyNet()), toC(x))
+
+    def test_prelu(self):
+        x = Variable(
+            torch.randn(BATCH_SIZE, 3, 224, 224).fill_(1.0)
+        )
+        self.exportTest(PReluNet(), x)
+
+    def test_concat(self):
+        input_a = Variable(torch.randn(BATCH_SIZE, 3))
+        input_b = Variable(torch.randn(BATCH_SIZE, 3))
+        inputs = ((toC(input_a), toC(input_b)), )
+        self.exportTest(toC(ConcatNet()), inputs)
+
+    def test_permute(self):
+        x = Variable(torch.randn(BATCH_SIZE, 3, 10, 12))
+        self.exportTest(PermuteNet(), x)
+
+    @unittest.skip("This model takes too much memory")
+    def test_srresnet(self):
+        x = Variable(torch.randn(1, 3, 224, 224).fill_(1.0))
+        self.exportTest(toC(SRResNet(rescale_factor=4, n_filters=64, n_blocks=8)), toC(x))
+
+    @skipIfCI
+    @skipIfNoLapack
+    def test_super_resolution(self):
+        x = Variable(
+            torch.randn(BATCH_SIZE, 1, 224, 224).fill_(1.0)
+        )
+        self.exportTest(toC(SuperResolutionNet(upscale_factor=3)), toC(x), atol=1e-6)
+
+    def test_alexnet(self):
+        x = Variable(
+            torch.randn(BATCH_SIZE, 3, 224, 224).fill_(1.0)
+        )
+        self.exportTest(toC(alexnet()), toC(x))
+
+    @unittest.skip("Waiting for https://github.com/pytorch/pytorch/pull/3100")
+    def test_mnist(self):
+        x = Variable(torch.randn(BATCH_SIZE, 1, 28, 28).fill_(1.0))
+        self.exportTest(toC(MNIST()), toC(x))
+
+    @skipIfCI
+    def test_vgg16(self):
+        # VGG 16-layer model (configuration "D")
+        x = Variable(torch.randn(BATCH_SIZE, 3, 224, 224).fill_(1.0))
+        self.exportTest(toC(vgg16()), toC(x))
+
+    @skipIfCI
+    def test_vgg16_bn(self):
+        # VGG 16-layer model (configuration "D") with batch normalization
+        x = Variable(torch.randn(BATCH_SIZE, 3, 224, 224).fill_(1.0))
+        self.exportTest(toC(vgg16_bn()), toC(x))
+
+    @skipIfCI
+    def test_vgg19(self):
+        # VGG 19-layer model (configuration "E")
+        x = Variable(torch.randn(BATCH_SIZE, 3, 224, 224).fill_(1.0))
+        self.exportTest(toC(vgg19()), toC(x))
+
+    @skipIfCI
+    def test_vgg19_bn(self):
+        # VGG 19-layer model (configuration 'E') with batch normalization
+        x = Variable(torch.randn(BATCH_SIZE, 3, 224, 224).fill_(1.0))
+        self.exportTest(toC(vgg19_bn()), toC(x))
+
+    def test_resnet(self):
+        # ResNet50 model
+        x = Variable(torch.randn(BATCH_SIZE, 3, 224, 224).fill_(1.0))
+        self.exportTest(toC(resnet50()), toC(x), atol=1e-6)
+
+    def test_inception(self):
+        x = Variable(
+            torch.randn(BATCH_SIZE, 3, 299, 299).fill_(1.0))
+        self.exportTest(toC(inception_v3()), toC(x))
+
+    def test_squeezenet(self):
+        # SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and
+        # <0.5MB model size
+        x = Variable(torch.randn(BATCH_SIZE, 3, 224, 224).fill_(1.0))
+        sqnet_v1_0 = SqueezeNet(version=1.1)
+        self.exportTest(toC(sqnet_v1_0), toC(x))
+
+        # SqueezeNet 1.1 has 2.4x less computation and slightly fewer params
+        # than SqueezeNet 1.0, without sacrificing accuracy.
+        x = Variable(torch.randn(BATCH_SIZE, 3, 224, 224).fill_(1.0))
+        sqnet_v1_1 = SqueezeNet(version=1.1)
+        self.exportTest(toC(sqnet_v1_1), toC(x))
+
+    def test_densenet(self):
+        # Densenet-121 model
+        x = Variable(torch.randn(BATCH_SIZE, 3, 224, 224).fill_(1.0))
+        self.exportTest(toC(densenet121()), toC(x))
+
+    def test_dcgan_netD(self):
+        netD = _netD(1)
+        netD.apply(weights_init)
+        input = Variable(torch.Tensor(bsz, 3, imgsz, imgsz).normal_(0, 1))
+        self.exportTest(toC(netD), toC(input))
+
+    def test_dcgan_netG(self):
+        netG = _netG(1)
+        netG.apply(weights_init)
+        input = Variable(torch.Tensor(bsz, nz, 1, 1).normal_(0, 1))
+        self.exportTest(toC(netG), toC(input))
+
+if __name__ == '__main__':
+    run_tests()
diff --git a/test/onnx/test_onnx_common.py b/test/onnx/test_onnx_common.py
new file mode 100644
index 0000000..9183a4b
--- /dev/null
+++ b/test/onnx/test_onnx_common.py
@@ -0,0 +1,17 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import os
+
+
+onnx_model_dir = os.path.join(os.path.dirname(
+    os.path.realpath(__file__)), os.pardir, "repos", "onnx", "onnx",
+    "backend", "test", "data")
+
+
+pytorch_converted_dir = os.path.join(onnx_model_dir, "pytorch-converted")
+
+
+pytorch_operator_dir = os.path.join(onnx_model_dir, "pytorch-operator")
diff --git a/test/onnx/test_operators.py b/test/onnx/test_operators.py
new file mode 100644
index 0000000..d749b31
--- /dev/null
+++ b/test/onnx/test_operators.py
@@ -0,0 +1,427 @@
+from test_pytorch_common import TestCase, run_tests, skipIfNoLapack, flatten
+import test_onnx_common
+
+import torch
+import torch.onnx
+from torch.autograd import Variable, Function
+from torch.nn import Module
+import torch.nn as nn
+
+import onnx
+import onnx.checker
+import onnx.helper
+
+import google.protobuf.text_format
+
+import itertools
+import io
+import unittest
+import inspect
+import argparse
+import glob
+import os
+import shutil
+import sys
+import common
+from onnx import numpy_helper
+
+_onnx_test = False
+
+
+def export_to_string(model, inputs, *args, **kwargs):
+    f = io.BytesIO()
+    with torch.no_grad():
+        torch.onnx.export(model, inputs, f, *args, **kwargs)
+    return f.getvalue()
+
+
+class FuncModule(Module):
+    def __init__(self, f, params=tuple()):
+        super(FuncModule, self).__init__()
+        self.f = f
+        self.params = nn.ParameterList(list(params))
+
+    def forward(self, *args):
+        return self.f(*itertools.chain(args, self.params))
+
+
+class TestOperators(TestCase):
+
+    def assertONNXExpected(self, binary_pb, subname=None):
+        model_def = onnx.ModelProto.FromString(binary_pb)
+        onnx.checker.check_model(model_def)
+        # doc_string contains stack trace in it, strip it
+        onnx.helper.strip_doc_string(model_def)
+        self.assertExpected(google.protobuf.text_format.MessageToString(model_def, float_format='.15g'), subname)
+        return model_def
+
+    def assertONNX(self, f, args, params=tuple(), **kwargs):
+        if isinstance(f, nn.Module):
+            m = f
+        else:
+            m = FuncModule(f, params)
+        onnx_model_pb = export_to_string(m, args, **kwargs)
+        model_def = self.assertONNXExpected(onnx_model_pb)
+        if _onnx_test:
+            test_function = inspect.stack()[1][0].f_code.co_name
+            test_name = test_function[0:4] + "_operator" + test_function[4:]
+            output_dir = os.path.join(test_onnx_common.pytorch_operator_dir, test_name)
+            # Assume:
+            #     1) the old test should be delete before the test.
+            #     2) only one assertONNX in each test, otherwise will override the data.
+            assert not os.path.exists(output_dir), "{} should not exist!".format(output_dir)
+            os.makedirs(output_dir)
+            with open(os.path.join(output_dir, "model.onnx"), 'wb') as file:
+                file.write(model_def.SerializeToString())
+            data_dir = os.path.join(output_dir, "test_data_set_0")
+            os.makedirs(data_dir)
+            if isinstance(args, Variable):
+                args = (args,)
+            for index, var in enumerate(flatten(args)):
+                tensor = numpy_helper.from_array(var.data.numpy())
+                with open(os.path.join(data_dir, "input_{}.pb".format(index)), 'wb') as file:
+                    file.write(tensor.SerializeToString())
+            outputs = m(*args)
+            if isinstance(outputs, Variable):
+                outputs = (outputs,)
+            for index, var in enumerate(flatten(outputs)):
+                tensor = numpy_helper.from_array(var.data.numpy())
+                with open(os.path.join(data_dir, "output_{}.pb".format(index)), 'wb') as file:
+                    file.write(tensor.SerializeToString())
+
+    def assertONNXRaises(self, err, f, args, params=tuple(), **kwargs):
+        if isinstance(f, nn.Module):
+            m = f
+        else:
+            m = FuncModule(f, params)
+        self.assertExpectedRaises(err, lambda: export_to_string(m, args, **kwargs))
+
+    def assertONNXRaisesRegex(self, err, reg, f, args, params=tuple(), **kwargs):
+        if isinstance(f, nn.Module):
+            m = f
+        else:
+            m = FuncModule(f, params)
+        with self.assertRaisesRegex(err, reg):
+            export_to_string(m, args, **kwargs)
+
+    def test_basic(self):
+        x = Variable(torch.Tensor([0.4]), requires_grad=True)
+        y = Variable(torch.Tensor([0.7]), requires_grad=True)
+        self.assertONNX(lambda x, y: -torch.sigmoid(torch.tanh(x * (x + y))), (x, y))
+
+    def test_view(self):
+        x = Variable(torch.Tensor([0]), requires_grad=True)
+        self.assertONNX(lambda x: x.view(1, 1), x)
+
+    def test_index(self):
+        x = Variable(torch.Tensor([[0]]), requires_grad=True)
+        self.assertONNX(lambda x: x[0], x)
+
+    def test_type_as(self):
+        x = Variable(torch.Tensor([0]), requires_grad=True)
+        self.assertONNX(lambda x: x.type_as(x), x)
+
+    def test_addconstant(self):
+        x = Variable(torch.DoubleTensor(2, 3), requires_grad=True)
+        self.assertONNX(lambda x: x + 1, x)
+
+    def test_add_broadcast(self):
+        x = Variable(torch.DoubleTensor(2, 3), requires_grad=True)
+        y = Variable(torch.DoubleTensor(3), requires_grad=True)
+        self.assertONNX(lambda x, y: x + y, (x, y))
+
+    def test_add_left_broadcast(self):
+        x = Variable(torch.DoubleTensor(3), requires_grad=True)
+        y = Variable(torch.DoubleTensor(2, 3), requires_grad=True)
+        self.assertONNXRaisesRegex(RuntimeError,
+                                   r"ONNX export failed: Could not export a broadcasted operation.*",
+                                   lambda x, y: x + y, (x, y), verbose=True)
+
+    def test_add_size1_broadcast(self):
+        x = Variable(torch.DoubleTensor(2, 3), requires_grad=True)
+        y = Variable(torch.DoubleTensor(2, 1), requires_grad=True)
+        self.assertONNX(lambda x, y: x + y, (x, y))
+
+    def test_add_size1_right_broadcast(self):
+        x = Variable(torch.DoubleTensor(2, 3), requires_grad=True)
+        y = Variable(torch.DoubleTensor(3), requires_grad=True)
+        self.assertONNX(lambda x, y: x + y, (x, y))
+
+    def test_add_size1_singleton_broadcast(self):
+        x = Variable(torch.DoubleTensor(2, 3), requires_grad=True)
+        y = Variable(torch.DoubleTensor(1, 3), requires_grad=True)
+        self.assertONNX(lambda x, y: x + y, (x, y))
+
+    def test_transpose(self):
+        x = Variable(torch.Tensor([[0, 1], [2, 3]]), requires_grad=True)
+        self.assertONNX(lambda x: x.transpose(0, 1).transpose(1, 0), x)
+
+    def test_chunk(self):
+        x = Variable(torch.Tensor([0, 1, 2]), requires_grad=True)
+        self.assertONNX(lambda x: x.chunk(2), x)
+
+    def test_concat2(self):
+        x = Variable(torch.randn(2, 3))
+        y = Variable(torch.randn(2, 3))
+        self.assertONNX(lambda inputs: torch.cat(inputs, 1), ((x, y),))
+
+    def test_mm(self):
+        m1 = Variable(torch.randn(2, 3), requires_grad=True)
+        m2 = Variable(torch.randn(3, 4), requires_grad=True)
+        self.assertONNX(torch.mm, (m1, m2))
+
+    def test_addmm(self):
+        m1 = Variable(torch.randn(2, 3), requires_grad=True)
+        m2 = Variable(torch.randn(3, 4), requires_grad=True)
+        m3 = Variable(torch.randn(4), requires_grad=True)
+        self.assertONNX(lambda x, y, z: torch.addmm(torch.addmm(z, x, y), x, y), (m1, m2, m3))
+
+    def test_permute2(self):
+        x = Variable(torch.Tensor([[[[[[0]]]]]]), requires_grad=True)
+        self.assertONNX(lambda x: x.permute(0, 1, 4, 2, 5, 3), x)
+
+    def test_pad(self):
+        x = Variable(torch.Tensor([[[[0, 1, 1, 1], [2, 3, 7, 7]]]]), requires_grad=True)
+        self.assertONNX(nn.ReflectionPad2d((2, 3, 0, 1)), x)
+
+    def test_params(self):
+        x = Variable(torch.Tensor([[1, 2], [3, 4]]), requires_grad=True)
+        y = nn.Parameter(torch.Tensor([[1, 2], [3, 4]]), requires_grad=True)
+        self.assertONNX(lambda x, y: -torch.sigmoid(torch.tanh(x * (x + y))), x, params=(y, ))
+
+    def test_non_float_params(self):
+        x = Variable(torch.LongTensor([[1, 2], [3, 4]]), requires_grad=True)
+        y = nn.Parameter(torch.LongTensor([[1, 2], [3, 4]]), requires_grad=True)
+        self.assertONNX(lambda x, y: x * (x + y), x, params=(y, ))
+
+    def test_symbolic_mismatch(self):
+        class MyFun(Function):
+            @staticmethod
+            def symbolic(g, x):
+                # The inside of this function should never be invoked, because
+                # we will fail due to an argument mismatch first.
+                assert False
+
+            @staticmethod
+            def forward(ctx, x, y):
+                return x + y
+
+        x = Variable(torch.randn(2, 2).fill_(1.0))
+        y = Variable(torch.randn(2, 2).fill_(1.0))
+        # NB: Don't use expect test here, the type error wobbles depending
+        # on Python version
+        with self.assertRaisesRegex(TypeError, "occurred when translating MyFun"):
+            export_to_string(FuncModule(MyFun().apply), (x, y))
+
+    # TODO: Do an nn style test for these
+    def test_batchnorm(self):
+        x = Variable(torch.randn(2, 2, 2, 2).fill_(1.0), requires_grad=True)
+        self.assertONNX(nn.BatchNorm2d(2), x)
+
+    def test_batchnorm_1d(self):
+        x = Variable(torch.randn(2, 2).fill_(1.0), requires_grad=True)
+        self.assertONNX(nn.BatchNorm1d(2), x)
+
+    def test_batchnorm_training(self):
+        x = Variable(torch.randn(2, 2, 2, 2).fill_(1.0), requires_grad=True)
+        self.assertONNX(nn.BatchNorm2d(2), x, training=True)
+
+    def test_conv(self):
+        x = Variable(torch.randn(20, 16, 50, 40).fill_(1.0), requires_grad=True)
+        self.assertONNX(nn.Conv2d(16, 13, 3, bias=False), x)
+
+    def test_convtranspose(self):
+        x = Variable(torch.randn(2, 3, 4, 5).fill_(1.0), requires_grad=True)
+        self.assertONNX(nn.ConvTranspose2d(3, 3, 3, stride=3, bias=False,
+                                           padding=1, output_padding=2), x)
+
+    def test_maxpool(self):
+        x = Variable(torch.randn(20, 16, 50))
+        self.assertONNX(nn.MaxPool1d(3, stride=2), x)
+
+    def test_at_op(self):
+        x = Variable(torch.randn(3, 4))
+
+        class MyFun(Function):
+
+            @staticmethod
+            def symbolic(g, x):
+                return g.at("add", x, x)
+
+            @staticmethod
+            def forward(ctx, x):
+                return x + x
+
+        class MyModule(Module):
+            def forward(self, x):
+                return MyFun.apply(x)
+
+        self.assertONNX(MyModule(), x)
+
+    def test_clip(self):
+        x = Variable(torch.randn(3, 4), requires_grad=True)
+        self.assertONNX(lambda x: torch.clamp(x, min=-0.5, max=0.5), x)
+
+    def test_max(self):
+        x = Variable(torch.randn(3, 4), requires_grad=True)
+        y = Variable(torch.randn(3, 4), requires_grad=True)
+        self.assertONNX(lambda x, y: torch.max(x, y), (x, y))
+
+    def test_min(self):
+        x = Variable(torch.randn(3, 4), requires_grad=True)
+        y = Variable(torch.randn(3, 4), requires_grad=True)
+        self.assertONNX(lambda x, y: torch.min(x, y), (x, y))
+
+    def test_mean(self):
+        x = Variable(torch.randn(1, 2, 3, 4), requires_grad=True)
+        self.assertONNX(lambda x: torch.mean(x), x)
+
+    def test_reduced_mean(self):
+        x = Variable(torch.randn(1, 2, 3, 4), requires_grad=True)
+        self.assertONNX(lambda x: torch.mean(x, dim=2), x)
+
+    def test_reduced_mean_keepdim(self):
+        x = Variable(torch.randn(1, 2, 3, 4), requires_grad=True)
+        self.assertONNX(lambda x: torch.mean(x, dim=2, keepdim=True), x)
+
+    def test_sum(self):
+        x = Variable(torch.randn(1, 2, 3, 4), requires_grad=True)
+        self.assertONNX(lambda x: torch.sum(x), x)
+
+    def test_reduced_sum(self):
+        x = Variable(torch.randn(1, 2, 3, 4), requires_grad=True)
+        self.assertONNX(lambda x: torch.sum(x, dim=2), x)
+
+    def test_reduced_sum_keepdim(self):
+        x = Variable(torch.randn(1, 2, 3, 4), requires_grad=True)
+        self.assertONNX(lambda x: torch.sum(x, dim=2, keepdim=True), x)
+
+    def test_prod(self):
+        x = Variable(torch.randn(1, 2, 3, 4), requires_grad=True)
+        self.assertONNX(lambda x: torch.prod(x), x)
+
+    def test_reduced_prod(self):
+        x = Variable(torch.randn(1, 2, 3, 4), requires_grad=True)
+        self.assertONNX(lambda x: torch.prod(x, dim=2), x)
+
+    def test_reduced_prod_keepdim(self):
+        x = Variable(torch.randn(1, 2, 3, 4), requires_grad=True)
+        self.assertONNX(lambda x: torch.prod(x, dim=2, keepdim=True), x)
+
+    def test_sqrt(self):
+        x = Variable(torch.randn(3, 4), requires_grad=True)
+        self.assertONNX(lambda x: torch.sqrt(x), x)
+
+    def test_equal(self):
+        x = Variable(torch.randn(3, 4).int(), requires_grad=True)
+        y = Variable(torch.randn(3, 4).int(), requires_grad=True)
+        self.assertONNX(lambda x, y: x == y, (x, y))
+
+    def test_exp(self):
+        x = Variable(torch.randn(3, 4), requires_grad=True)
+        self.assertONNX(lambda x: x.exp(), x)
+
+    def test_flatten(self):
+        # Flatten is a special case of Reshape when the output is a 2-D tensor.
+        x = Variable(torch.randn(1, 2, 3, 4), requires_grad=True)
+        self.assertONNX(lambda x: x.view(x.size()[0], x.numel() // x.size()[0]), x)
+
+    def test_logsoftmax(self):
+        x = Variable(torch.randn(1, 2, 3, 4), requires_grad=True)
+        self.assertONNX(nn.LogSoftmax(dim=2), x)
+
+    def test_pow(self):
+        x = Variable(torch.randn(1, 2, 3, 4), requires_grad=True)
+        y = Variable(torch.randn(1, 2, 3, 4), requires_grad=True)
+        self.assertONNX(lambda x, y: x.pow(y), (x, y))
+
+    def test_selu(self):
+        x = Variable(torch.randn(1, 2, 3, 4), requires_grad=True)
+        self.assertONNX(nn.SELU(), x)
+
+    def test_repeat(self):
+        x = Variable(torch.randn(1, 2, 3, 4), requires_grad=True)
+        self.assertONNX(lambda x: x.repeat(1, 2, 3, 4), x)
+
+    def test_repeat_dim_overflow(self):
+        x = Variable(torch.randn(1, 2), requires_grad=True)
+        self.assertONNX(lambda x: x.repeat(1, 2, 3, 4), x)
+
+    def test_symbolic_override(self):
+        """Lifted from fast-neural-style: custom implementation of instance norm
+        to be mapped to ONNX operator"""
+
+        class CustomInstanceNorm(torch.nn.Module):
+            def __init__(self, dim, eps=1e-9):
+                super(CustomInstanceNorm, self).__init__()
+                self.scale = nn.Parameter(torch.FloatTensor(dim).uniform_())
+                self.shift = nn.Parameter(torch.FloatTensor(dim).zero_())
+                self.eps = eps
+
+            def forward(self, x):
+                return self._run_forward(x, self.scale, self.shift, eps=self.eps)
+
+            @staticmethod
+            @torch.onnx.symbolic_override(
+                lambda g, x, scale, shift, eps: g.op(
+                    'InstanceNormalization', x, scale, shift, epsilon_f=eps)
+            )
+            def _run_forward(x, scale, shift, eps):
+                # since we hand-roll instance norm it doesn't perform well all in fp16
+                n = x.size(2) * x.size(3)
+                t = x.view(x.size(0), x.size(1), n)
+                mean = torch.mean(t, 2).unsqueeze(2).unsqueeze(3).expand_as(x)
+                # Calculate the biased var. torch.var returns unbiased var
+                var = torch.var(t, 2).unsqueeze(2).unsqueeze(3).expand_as(x) * ((float(n) - 1) / float(n))
+                scale_broadcast = scale.unsqueeze(1).unsqueeze(1).unsqueeze(0)
+                scale_broadcast = scale_broadcast.expand_as(x)
+                shift_broadcast = shift.unsqueeze(1).unsqueeze(1).unsqueeze(0)
+                shift_broadcast = shift_broadcast.expand_as(x)
+                out = (x - mean) / torch.sqrt(var + eps)
+                out = out * scale_broadcast + shift_broadcast
+                return out
+
+        instnorm = CustomInstanceNorm(10)
+        x = Variable(torch.randn(2, 10, 32, 32))
+        self.assertONNX(instnorm, x)
+
+    """
+    def test_rnn(self):
+        rnn = nn.RNN(30, 20, 2)
+        input = Variable(torch.randn(10, 32, 30))
+        output, hidden = rnn(input)
+        self.assertONNX(rnn, input)
+    """
+
+    def test_symbolic_override_nested(self):
+        def symb(g, x, y):
+            assert isinstance(x, torch._C.Value)
+            assert isinstance(y[0], torch._C.Value)
+            assert isinstance(y[1], torch._C.Value)
+            return g.op('Sum', x, y[0], y[1]), (
+                g.op('Neg', x), g.op('Neg', y[0]))
+
+        @torch.onnx.symbolic_override_first_arg_based(symb)
+        def foo(x, y):
+            return x + y[0] + y[1], (-x, -y[0])
+
+        class BigModule(torch.nn.Module):
+            def forward(self, x, y):
+                return foo(x, y)
+
+        inp = (Variable(torch.FloatTensor([1])),
+               (Variable(torch.FloatTensor([2])),
+                Variable(torch.FloatTensor([3]))))
+        BigModule()(*inp)
+        self.assertONNX(BigModule(), inp)
+
+
+if __name__ == '__main__':
+    onnx_test_flag = '--onnx-test'
+    _onnx_test = onnx_test_flag in common.UNITTEST_ARGS
+    if onnx_test_flag in common.UNITTEST_ARGS:
+        common.UNITTEST_ARGS.remove(onnx_test_flag)
+    if _onnx_test:
+        for d in glob.glob(os.path.join(test_onnx_common.pytorch_operator_dir, "test_operator_*")):
+            shutil.rmtree(d)
+    run_tests()
diff --git a/test/onnx/test_pytorch_common.py b/test/onnx/test_pytorch_common.py
new file mode 100644
index 0000000..f273965
--- /dev/null
+++ b/test/onnx/test_pytorch_common.py
@@ -0,0 +1,43 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import functools
+import os
+import unittest
+import sys
+import torch
+import torch.autograd.function as function
+
+pytorch_test_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+sys.path.insert(-1, pytorch_test_dir)
+
+from common import *
+
+torch.set_default_tensor_type('torch.FloatTensor')
+
+
+def _skipper(condition, reason):
+    def decorator(f):
+        @functools.wraps(f)
+        def wrapper(*args, **kwargs):
+            if condition():
+                raise unittest.SkipTest(reason)
+            return f(*args, **kwargs)
+        return wrapper
+    return decorator
+
+
+skipIfNoCuda = _skipper(lambda: not torch.cuda.is_available(),
+                        'CUDA is not available')
+
+skipIfTravis = _skipper(lambda: os.getenv('TRAVIS'),
+                        'Skip In Travis')
+
+skipIfCI = _skipper(lambda: os.getenv('CI'),
+                    'Skip In CI')
+
+
+def flatten(x):
+    return tuple(function._iter_filter(lambda o: isinstance(o, torch.Tensor))(x))
diff --git a/test/onnx/test_pytorch_helper.py b/test/onnx/test_pytorch_helper.py
new file mode 100644
index 0000000..3da5d89
--- /dev/null
+++ b/test/onnx/test_pytorch_helper.py
@@ -0,0 +1,68 @@
+# Some standard imports
+import numpy as np
+from torch import nn
+from torch.autograd import Variable
+import torch.onnx
+import torch.nn.init as init
+from caffe2.python.model_helper import ModelHelper
+from pytorch_helper import PyTorchModule
+import unittest
+from caffe2.python.core import workspace
+
+from test_pytorch_common import skipIfNoLapack
+
+
+class TestCaffe2Backend(unittest.TestCase):
+
+    @skipIfNoLapack
+    def test_helper(self):
+
+        class SuperResolutionNet(nn.Module):
+            def __init__(self, upscale_factor, inplace=False):
+                super(SuperResolutionNet, self).__init__()
+
+                self.relu = nn.ReLU(inplace=inplace)
+                self.conv1 = nn.Conv2d(1, 64, (5, 5), (1, 1), (2, 2))
+                self.conv2 = nn.Conv2d(64, 64, (3, 3), (1, 1), (1, 1))
+                self.conv3 = nn.Conv2d(64, 32, (3, 3), (1, 1), (1, 1))
+                self.conv4 = nn.Conv2d(32, upscale_factor ** 2, (3, 3), (1, 1), (1, 1))
+                self.pixel_shuffle = nn.PixelShuffle(upscale_factor)
+
+                self._initialize_weights()
+
+            def forward(self, x):
+                x = self.relu(self.conv1(x))
+                x = self.relu(self.conv2(x))
+                x = self.relu(self.conv3(x))
+                x = self.pixel_shuffle(self.conv4(x))
+                return x
+
+            def _initialize_weights(self):
+                init.orthogonal(self.conv1.weight, init.calculate_gain('relu'))
+                init.orthogonal(self.conv2.weight, init.calculate_gain('relu'))
+                init.orthogonal(self.conv3.weight, init.calculate_gain('relu'))
+                init.orthogonal(self.conv4.weight)
+
+        torch_model = SuperResolutionNet(upscale_factor=3)
+
+        fake_input = Variable(torch.randn(1, 1, 224, 224), requires_grad=True)
+
+        # use ModelHelper to create a C2 net
+        helper = ModelHelper(name="test_model")
+        start = helper.Sigmoid(['the_input'])
+        # Embed the ONNX-converted pytorch net inside it
+        toutput, = PyTorchModule(helper, torch_model, (fake_input,), [start])
+        output = helper.Sigmoid(toutput)
+
+        workspace.RunNetOnce(helper.InitProto())
+        workspace.FeedBlob('the_input', fake_input.data.numpy())
+        # print([ k for k in workspace.blobs ])
+        workspace.RunNetOnce(helper.Proto())
+        c2_out = workspace.FetchBlob(str(output))
+
+        torch_out = torch.sigmoid(torch_model(torch.sigmoid(fake_input)))
+
+        np.testing.assert_almost_equal(torch_out.data.cpu().numpy(), c2_out, decimal=3)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/onnx/test_verify.py b/test/onnx/test_verify.py
new file mode 100644
index 0000000..009010a
--- /dev/null
+++ b/test/onnx/test_verify.py
@@ -0,0 +1,121 @@
+import torch
+from torch.autograd import Variable, Function
+from torch.nn import Module, Parameter
+import caffe2.python.onnx.backend as backend
+from verify import verify
+
+from test_pytorch_common import TestCase, run_tests
+
+import unittest
+
+
+class TestVerify(TestCase):
+    maxDiff = None
+
+    def assertVerifyExpectFail(self, *args, **kwargs):
+        try:
+            verify(*args, **kwargs)
+        except AssertionError as e:
+            if str(e):
+                # substring a small piece of string because the exact message
+                # depends on system's formatting settings
+                self.assertExpected(str(e)[:60])
+                return
+            else:
+                raise
+        # Don't put this in the try block; the AssertionError will catch it
+        self.assertTrue(False, msg="verify() did not fail when expected to")
+
+    def test_result_different(self):
+        class BrokenAdd(Function):
+            @staticmethod
+            def symbolic(g, a, b):
+                return g.op("Add", a, b)
+
+            @staticmethod
+            def forward(ctx, a, b):
+                return a.sub(b)  # yahaha! you found me!
+
+        class MyModel(Module):
+            def forward(self, x, y):
+                return BrokenAdd().apply(x, y)
+
+        x = Variable(torch.Tensor([1, 2]))
+        y = Variable(torch.Tensor([3, 4]))
+        self.assertVerifyExpectFail(MyModel(), (x, y), backend)
+
+    def test_jumbled_params(self):
+        class MyModel(Module):
+            def __init__(self):
+                super(MyModel, self).__init__()
+
+            def forward(self, x):
+                y = x * x
+                self.param = Parameter(torch.Tensor([2]))
+                return y
+
+        x = Variable(torch.Tensor([1, 2]))
+        with self.assertRaisesRegex(RuntimeError, "state_dict changed"):
+            verify(MyModel(), x, backend)
+
+    def test_modifying_params(self):
+        class MyModel(Module):
+            def __init__(self):
+                super(MyModel, self).__init__()
+                self.param = Parameter(torch.Tensor([2]))
+
+            def forward(self, x):
+                y = x * x
+                self.param.data.add_(1.0)
+                return y
+
+        x = Variable(torch.Tensor([1, 2]))
+        self.assertVerifyExpectFail(MyModel(), x, backend)
+
+    def test_dynamic_model_structure(self):
+        class MyModel(Module):
+            def __init__(self):
+                super(MyModel, self).__init__()
+                self.iters = 0
+
+            def forward(self, x):
+                if self.iters % 2 == 0:
+                    r = x * x
+                else:
+                    r = x + x
+                self.iters += 1
+                return r
+
+        x = Variable(torch.Tensor([1, 2]))
+        self.assertVerifyExpectFail(MyModel(), x, backend)
+
+    @unittest.skip("Indexing is broken by #3725")
+    def test_embedded_constant_difference(self):
+        class MyModel(Module):
+            def __init__(self):
+                super(MyModel, self).__init__()
+                self.iters = 0
+
+            def forward(self, x):
+                r = x[self.iters % 2]
+                self.iters += 1
+                return r
+
+        x = Variable(torch.Tensor([[1, 2], [3, 4]]))
+        self.assertVerifyExpectFail(MyModel(), x, backend)
+
+    def test_explicit_test_args(self):
+        class MyModel(Module):
+            def forward(self, x):
+                if x.data.sum() == 1.0:
+                    return x + x
+                else:
+                    return x * x
+
+        x = Variable(torch.Tensor([[6, 2]]))
+        y = Variable(torch.Tensor([[2, -1]]))
+        self.assertVerifyExpectFail(MyModel(), x, backend, test_args=[(y,)])
+
+
+if __name__ == '__main__':
+    run_tests()
diff --git a/test/onnx/verify.py b/test/onnx/verify.py
new file mode 100644
index 0000000..f36415d
--- /dev/null
+++ b/test/onnx/verify.py
@@ -0,0 +1,448 @@
+import torch
+import torch.jit
+import torch.onnx
+
+import onnx
+import onnx.helper
+
+import numpy as np
+
+import difflib
+import contextlib
+import io
+
+
+def colonize(msg, sep=": "):
+    if not msg:
+        return ""
+    else:
+        return msg + sep
+
+
+class Errors(object):
+    """
+    An error-collecting object which supports error recovery.
+
+    It is intended to be used like a context manager:
+
+    >>> with Errors("Top-level error message") as errs:
+    >>>     ...
+    """
+
+    def __init__(self, msg, rtol=1e-3, atol=1e-7):
+        self.msg = msg
+        self.errors = []
+        self.context = []
+        self.rtol = rtol
+        self.atol = atol
+
+        # Allocated upon instance creation so that multiple Errors
+        # can be used
+        class ShortCircuit(Exception):
+            pass
+        self.exc_class = ShortCircuit
+
+    def requireAlmostEqual(self, x, y, msg=None):
+        """
+        Test that x and y are nearly equal (equal within self.rtol
+        precision); aborts execution if they are not.
+        """
+        self.almostEqualAndThen(x, y, msg, self.failWith)
+
+    def checkAlmostEqual(self, x, y, msg=None):
+        """
+        Test that x and y are nearly equal (equal within self.rtol
+        precision), but continue execution even if they are not equal.
+
+        To prevent error cascades, you should remember to call 'failIfErrs'
+        at some later point in time.
+        """
+        self.almostEqualAndThen(x, y, msg, self.addErr)
+
+    def almostEqualAndThen(self, x, y, msg, k):
+        """
+        Helper for implementing 'requireAlmostEqual' and 'checkAlmostEqual'.
+        Upon failure, invokes continuation 'k' with the error message.
+
+        At the moment, only tests on 'numpy.ndarray' are supported.
+        """
+        if isinstance(x, np.ndarray) and isinstance(y, np.ndarray):
+            try:
+                np.testing.assert_allclose(x, y, rtol=self.rtol, atol=self.atol, equal_nan=False, verbose=True)
+            except AssertionError as e:
+                raise
+                k("{}{}".format(colonize(msg), str(e).lstrip()))
+        else:
+            raise RuntimeError("Unsupported almost equal test")
+
+    def requireEqual(self, x, y, msg=None):
+        """
+        Test that x and y are equal; aborts execution if they are not.
+        """
+        self.equalAndThen(x, y, msg, self.failWith)
+
+    def checkEqual(self, x, y, msg=None):
+        """
+        Test that x and y are equal, but continue execution even if they are not equal.
+
+        To prevent error cascades, you should remember to call 'failIfErrs'
+        at some later point in time.
+        """
+        self.equalAndThen(x, y, msg, self.addErr)
+
+    # Bit-for-bit accuracy test
+    def equalAndThen(self, x, y, msg, k):
+        """
+        Helper for implementing 'requireEqual' and 'checkEqual'.  Upon failure,
+        invokes continuation 'k' with the error message.
+        """
+        if isinstance(x, onnx.TensorProto) and isinstance(y, onnx.TensorProto):
+            self.equalAndThen(x.name, y.name, msg, k)
+            # Use numpy for the comparison
+            t1 = onnx.numpy_helper.to_array(x)
+            t2 = onnx.numpy_helper.to_array(y)
+            new_msg = "{}In embedded parameter '{}'".format(colonize(msg), x.name)
+            self.equalAndThen(t1, t2, new_msg, k)
+        elif isinstance(x, np.ndarray) and isinstance(y, np.ndarray):
+            try:
+                np.testing.assert_equal(x, y)
+            except AssertionError as e:
+                raise
+                k("{}{}".format(colonize(msg, ": "), str(e).lstrip()))
+        else:
+            if x != y:
+                # TODO: Better algorithm for lists
+                sx = str(x)
+                sy = str(y)
+                if len(sx) > 40 or len(sy) > 40 or '\n' in sx or '\n' in sy:
+                    # long form
+                    l = "=" * 50
+                    k("\n{}The value\n{}\n{}\n{}\n\ndoes not equal\n\n{}\n{}\n{}"
+                        .format(colonize(msg, ":\n"), l, sx, l, l, sy, l))
+                else:
+                    k("{}{} != {}".format(colonize(msg), sx, sy))
+
+    def requireMultiLineEqual(self, x, y, msg=None):
+        """
+        Test that long, multi-line strings x and y are equal;
+        aborts execution if they are not.
+        """
+        self.multiLineEqualAndThen(x, y, msg, self.failWith)
+
+    def multiLineEqualAndThen(self, x, y, msg, k):
+        """
+        Helper for implementing 'requireMultiLineEqual'.  Upon failure,
+        invokes continuation 'k' with the error message.
+        """
+        if msg is None:
+            msg = "Strings are not equal"
+        if x != y:
+            diff = difflib.ndiff(x.splitlines(True), y.splitlines(True))
+            k("{}{}".format(colonize(msg, ":\n\n"), "".join(diff)))
+
+    def addErr(self, msg):
+        """
+        Add an error to the error context, but continue executing.
+        """
+        # TODO: instead of immediately concatenating the context in the msg,
+        # attach it as metadata and make a decision how to format it later.
+        msg_w_ctx = msg
+        for c in reversed(self.context):
+            msg += "\n\n  * " + "\n    ".join(c.splitlines())
+        self.errors.append(msg)
+
+    def fail(self):
+        """
+        Immediately fail and short-circuit to the next recovery context.
+
+        NB: It is an error to 'fail' without having added any errors to
+        the error context.
+        """
+        raise self.exc_class()
+
+    def failWith(self, msg):
+        """
+        Add an error to the error context, and then short-circuit.
+        """
+        self.addErr(msg)
+        self.fail()
+
+    def failIfErrs(self):
+        """
+        If there are any errors in the error context, short-circuit.
+
+        This is used to prevent error cascades.
+        """
+        if self.errors:
+            self.fail()
+
+    def recover(parent_self):
+        """
+        Returns a context manager which can be used to recover in case of
+        an error.  Example usage:
+
+        >>> with errs.recover():
+        >>>     ...
+        """
+        class Recover(object):
+            def __enter__(self):
+                pass
+
+            def __exit__(self, exc_type, exc_value, traceback):
+                if exc_type == parent_self.exc_class:
+                    return True
+        return Recover()
+
+    def addErrCtxt(parent_self, msg):
+        """
+        Returns a context manager which encloses a fragment of code with
+        an extra contextual message, e.g., where an error occurred, or a hint
+        applicable to all errors in the area.  Example usage:
+
+        >>> with errs.addErrCtx("Some text"):
+        >>>     ...
+        """
+        class AddContext(object):
+            def __enter__(self):
+                parent_self.context.append(msg)
+
+            def __exit__(self, exc_type, exc_value, traceback):
+                parent_self.context.pop()
+        return AddContext()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        if self.errors:
+            errors_msg = "\n\n".join(map(lambda x: "ERROR: " + x, self.errors))
+            final_msg = "{}\n{}\n{}".format(self.msg, '-' * 70, errors_msg)
+            raise AssertionError(final_msg)
+        if exc_type == self.exc_class:
+            raise RuntimeError("ShortCircuit was raised, but no errors were recorded")
+
+
+@contextlib.contextmanager
+def set_training(model, mode):
+    """
+    A context manager to temporarily set the training mode of 'model'
+    to 'mode', resetting it when we exit the with-block.
+    """
+    old_mode = model.training
+    if old_mode != mode:
+        model.train(mode)
+    try:
+        yield
+    finally:
+        if old_mode != mode:
+            model.train(old_mode)
+
+
+def verify(model, args, backend, verbose=False, training=False, rtol=1e-3, atol=1e-7, test_args=2):
+    """
+    Export a model into ONNX, import it into a specified ONNX backend, and then
+    on a few random inputs verify that PyTorch and the backend produced the same
+    results.  Requires onnx to be installed.
+
+    This function may spuriously fail: some operators are implemented with
+    different numerical precision in an ONNX backend, in which case an unstable
+    network (e.g., Inception) may blow up these numerical instabilities.  This
+    situation is less likely to happen if your model has been trained.  However,
+    if this is not the case, you may have found a bug!  Please report it to the
+    PyTorch developers.  You can also debug the issue yourself by removing
+    suffixes of operators from your model until verification passes.
+
+    For reproduceability, we recommend explicitly setting PyTorch's seed before
+    invoking this function.
+
+    Arguments:
+        model (torch.nn.Module): the model to be exported and verified
+        args (tuple of arguments): the inputs to
+            the model, e.g., such that ``model(*args)`` is a valid
+            invocation of the model.  Any non-Variable arguments will
+            be hard-coded into the exported model; any Variable arguments
+            will become inputs of the exported model, in the order they
+            occur in args.  If args is a Variable, this is equivalent
+            to having called it with a 1-ary tuple of that Variable.
+            (Note: passing keyword arguments to the model is not currently
+            supported.  Give us a shout if you need it.)
+        backend (onnx.backend module): ONNX backend to verify with
+        verbose (bool, default False): if specified, we will print out a debug
+            description of the trace being exported.
+        training (bool, default False): export the model in training mode.  At
+            the moment, ONNX is oriented towards exporting models for inference
+            only, so you will generally not need to set this to True.
+        rtol (float, default 1e-3): relative precision required
+        test_args (int or iterable of args, default 2):
+            either an integer specifying the number
+            of random arguments to generate, or an iterable producing arguments
+            to test under.
+    """
+    def _nested_map(condition, fn, condition_msg=None):
+        def _map(obj):
+            if condition(obj):
+                return fn(obj)
+            elif obj is None:
+                return None
+            elif isinstance(obj, (list, tuple)):
+                return type(obj)(_map(x) for x in obj)
+            else:
+                raise ValueError("Auto nesting doesn't know how to process "
+                                 "an input object of type " + torch.typename(obj) +
+                                 (". Accepted types: " + condition_msg +
+                                  ", or lists/tuples of them"
+                                  if condition_msg else ""))
+
+        return _map
+
+    def _iter_filter(condition, allow_unknown=False, condition_msg=None):
+        def _iter(obj):
+            if condition(obj):
+                yield obj
+            elif obj is None:
+                return
+            elif isinstance(obj, (list, tuple)):
+                for o in obj:
+                    for var in _iter(o):
+                        yield var
+            elif allow_unknown:
+                yield obj
+            else:
+                raise ValueError("Auto nesting doesn't know how to process "
+                                 "an input object of type " + torch.typename(obj) +
+                                 (". Accepted types: " + condition_msg +
+                                  ", or lists/tuples of them"
+                                  if condition_msg else ""))
+
+        return _iter
+
+    def is_tensor(o):
+        return isinstance(o, torch.Tensor)
+
+    _iter_tensors = _iter_filter(is_tensor, condition_msg="Tensors")
+
+    def randomize_arg(arg):
+        new_data = arg.data.clone()
+        # For now, don't try randomizing non-float tensors; these
+        # are likely to be things like indices, where just randomly
+        # spattering some longs is unlikely to work.  One way we could
+        # make this work is to apply a random permutation or something.
+        if arg.is_floating_point():
+            new_data.uniform_()
+        return torch.autograd.Variable(new_data, requires_grad=arg.requires_grad)
+
+    randomize_args = _nested_map(is_tensor, randomize_arg)
+
+    def backend_args(args):
+        # TODO: onnx should accept iterables
+        return tuple(v.data.cpu().numpy() for v in _iter_tensors(args))
+
+    def load_bytes(b):
+        b.seek(0)
+        x = onnx.load(b)
+        # doc_string has stack traces - let's remove them to make comparison
+        # sane
+        onnx.helper.strip_doc_string(x)
+        return x
+
+    # Special case for common case of passing a single Tensor
+    if isinstance(args, torch.Tensor):
+        args = (args,)
+
+    with set_training(model, training):
+        proto_bytes = io.BytesIO()
+        torch_out = torch.onnx._export(model, args, proto_bytes, verbose=verbose)
+        proto = load_bytes(proto_bytes)
+        prepared = backend.prepare(proto)
+
+        def run(args):
+            alt_proto_bytes = io.BytesIO()
+            torch_out = torch.onnx._export(model, args, alt_proto_bytes, verbose=verbose)
+            alt_proto = load_bytes(alt_proto_bytes)
+            if proto.SerializeToString() != alt_proto.SerializeToString():
+                # OK, let's try to figure out what happened.
+                msg = "When I exported your model with different inputs, the result was different."
+                if not verbose:
+                    msg += "\n(To get more information, run torch.onnx.verify(..., verbose=True))"
+                with Errors(msg, rtol=rtol, atol=atol) as errs:
+                    # First, check if we have the same number of parameters, and
+                    # that they're the same order.  If they don't, something has *really* gone wrong.
+                    initializer_order_hint = ("This is really strange! The second time I exported your model,\n"
+                                              "it had a different set of parameters.  Are you assigning Parameters\n"
+                                              "in the forward() of your model definition?")
+                    with errs.addErrCtxt(initializer_order_hint):
+                        errs.requireEqual(list(map(lambda x: x.name, proto.graph.initializer)),
+                                          list(map(lambda x: x.name, alt_proto.graph.initializer)),
+                                          msg="Parameters list differs")
+
+                    # Now check if the embedded parameters are actually the same
+                    initializer_hint = ("A difference in embedded parameters usually means that\n"
+                                        "your model is updating parameters/buffers even in inference\n"
+                                        "mode.  Look for a buggy nn.Module which isn't respecting train().\n")
+                    with errs.recover(), errs.addErrCtxt(initializer_hint):
+                        for x, y in zip(proto.graph.initializer, alt_proto.graph.initializer):
+                            errs.checkEqual(x, y)
+
+                    # Next, check if the model structure lines up.
+                    structure_hint = ("A difference in model structure usually means that\n"
+                                      "your model has dynamic control flow.  These models are not\n"
+                                      "currently supported by the exporter.")
+                    with errs.recover(), errs.addErrCtxt(structure_hint):
+                        # Delete initializers since we already tested them
+                        stripped_proto = onnx.ModelProto()
+                        stripped_proto.CopyFrom(proto)
+                        del stripped_proto.graph.initializer[:]
+
+                        stripped_alt_proto = onnx.ModelProto()
+                        stripped_alt_proto.CopyFrom(alt_proto)
+                        del stripped_alt_proto.graph.initializer[:]
+
+                        # Compare the printable graph representations first
+                        errs.requireMultiLineEqual(onnx.helper.printable_graph(stripped_proto.graph),
+                                                   onnx.helper.printable_graph(stripped_alt_proto.graph))
+
+                        # Compare the actual protobuf text formats now (not
+                        # very user-friendly!)
+                        errs.requireMultiLineEqual(str(stripped_proto), str(stripped_alt_proto))
+
+                        # One last ditch effort, using built-in equality on
+                        # protobufs
+                        errs.requireEqual(stripped_proto, stripped_alt_proto)
+
+                    errs.failIfErrs()
+
+                    # At this point, we should have figured out why the binary
+                    # protobufs differed, and short-circuited out of this code
+                    # with a helpful error message.  But what if we didn't?
+                    # We better still try to give a good error message in this
+                    # case.  We EXPECT these requires to fail.  If they don't,
+                    # that is a bug in verify
+                    errs.requireEqual(proto, alt_proto)
+                    errs.requireEqual(proto_bytes.getvalue(), alt_proto_bytes.getvalue())
+                    assert False
+
+            # TODO: test that the traced model also returns the same thing...
+            run_helper(torch_out, args)
+
+        # Factored out so we can avoid one run of the model
+        def run_helper(torch_out, args):
+            backend_out = prepared.run(backend_args(args))
+            if isinstance(torch_out, torch.Tensor):
+                torch_out = (torch_out,)
+            # NB: onnx backend NEVER returns bare numpy array
+            msg = "ONNX backend returned different results from PyTorch"
+            result_hint = ("If you are not using trained parameters, a difference in results\n"
+                           "could mean that your network is numerically unstable.  Otherwise\n"
+                           "it indicates a bug in PyTorch/ONNX; please file a bug report.")
+            with Errors(msg, rtol=rtol, atol=atol) as errs, errs.addErrCtxt(result_hint):
+                for i, (x, y) in enumerate(zip(torch_out, backend_out)):
+                    errs.checkAlmostEqual(x.data.cpu().numpy(), y, "In output {}".format(i))
+
+        run_helper(torch_out, args)
+
+        if isinstance(test_args, int):
+            for i in range(test_args):
+                run(randomize_args(args))
+        else:
+            for test_arg in test_args:
+                run(test_arg)