Move default arguments to function declaration

 * Make alpha, beta in addmm kwarg_only
 * Move kwarg_only arguments to the end
 * _out variants now have output arguments at the beginning
diff --git a/ATen.h b/ATen.h
index ba57b63..8792ad2 100644
--- a/ATen.h
+++ b/ATen.h
@@ -3,7 +3,6 @@
 #include "ATen/ATenGeneral.h"
 #include "ATen/Scalar.h"
 #include "ATen/Type.h"
-#include "ATen/TypeMethods.h"
 #include "ATen/Generator.h"
 #include "ATen/Context.h"
 #include "ATen/Storage.h"
diff --git a/CheckGenerator.h b/CheckGenerator.h
index d2013d5..ec7a23d 100644
--- a/CheckGenerator.h
+++ b/CheckGenerator.h
@@ -6,7 +6,9 @@
 namespace at {
 
 template <typename T>
-static inline T * check_generator(Generator* expr) {
+static inline T * check_generator(Generator * expr, Generator * defaultValue) {
+  if (!expr)
+    expr = defaultValue;
   if(auto result = dynamic_cast<T*>(expr))
     return result;
   runtime_error("Expected a '%s' but found '%s'", typeid(T).name(), typeid(expr).name());
diff --git a/Context.cpp b/Context.cpp
index a4a3093..63ff0a4 100644
--- a/Context.cpp
+++ b/Context.cpp
@@ -1,4 +1,5 @@
 #include "Context.h"
+
 #include <thread>
 #include <mutex>
 #include <sstream>
diff --git a/Declarations.cwrap b/Declarations.cwrap
new file mode 100644
index 0000000..c9deff3
--- /dev/null
+++ b/Declarations.cwrap
@@ -0,0 +1,4192 @@
+[[
+  name: THPTensor_(elementSize)
+  python_name: element_size
+  cpu_half: True
+  auto_gpu: False
+  only_register: True
+]]
+[[
+  name: THPTensor_(storage)
+  python_name: storage
+  cpu_half: True
+  auto_gpu: False
+  only_register: True
+]]
+[[
+  name: storageOffset
+  python_name: storage_offset
+  cpu_half: True
+  auto_gpu: False
+  return: long
+  arguments:
+    - THTensor* self
+]]
+[[
+  name: nDimension
+  python_name: ndimension
+  cpu_half: True
+  auto_gpu: False
+  return: long
+  arguments:
+    - THTensor* self
+]]
+[[
+  name: THPTensor_(nDimension)
+  python_name: dim
+  cpu_half: True
+  auto_gpu: False
+  only_register: True
+  method_flags: METH_KEYWORDS
+]]
+[[
+  python_name: index
+  name: THPTensor_(getValue)<true>
+  only_register: True
+  override_method_flags: METH_O
+]]
+[[
+  python_name: _set_index
+  name: THPTensor_(setIndex)
+  only_register: True
+]]
+[[
+  python_name: _check_advanced_indexing
+  name: THPTensor_(checkAdvancedIndexing)
+  cpu_half: False
+  only_register: True
+  override_method_flags: METH_O
+]]
+[[
+  python_name: _advanced_index_add
+  name: THPTensor_(advancedIndexAdd)
+  cpu_half: False
+  only_register: True
+]]
+[[
+  python_name: _advanced_index_select
+  name: THPTensor_(advancedIndexSelect)
+  cpu_half: False
+  only_register: True
+]]
+[[
+  name: resize_
+  return: self
+  cname: resize
+  cpu_half: True
+  before_call:
+    THPUtils_assert(arg_self->storage->flag & TH_STORAGE_RESIZABLE,
+      "calling resize_ on a tensor that has non-resizable storage. Clone it first "
+      "or create a new tensor instead.");
+  arguments:
+    - THTensor* self
+    - arg: THSize* size
+      long_args: True
+    - CONSTANT NULL
+]]
+[[
+  name: zeros
+  variants:
+    - function
+  auto_gpu: False
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - arg: THSize* size
+      long_args: True
+]]
+[[
+  name: zeros_like
+  cname: zerosLike
+  variants:
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* input
+]]
+[[
+  name: ones
+  variants:
+    - function
+  auto_gpu: False
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - arg: THSize* size
+      long_args: True
+]]
+[[
+  name: ones_like
+  cname: onesLike
+  variants:
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* input
+]]
+[[
+  name: numel
+  return: long
+  cname: nElement
+  cpu_half: True
+  auto_gpu: False
+  variants:
+    - method
+    - function
+  arguments:
+    - THTensor* self
+]]
+[[
+  name: THPTensor_(numel)
+  python_name: nelement
+  cpu_half: True
+  auto_gpu: False
+  only_register: True
+  method_flags: METH_KEYWORDS
+]]
+[[
+  name: set_
+  cname: set
+  cpu_half: True
+  auto_gpu: False
+  return: argument 0
+  options:
+    - cname: set
+      arguments:
+        - THTensor* self
+        - THTensor* source
+    - cname: setStorage
+      arguments:
+        - THTensor* self
+        - CONSTANT NULL, 0, NULL, NULL
+    - cname: setStorage
+      before_call: THLongStoragePtr __storage_size(THLongStorage_newWithSize1(THStorage_(size)(LIBRARY_STATE arg_storage)));
+      arguments:
+        - THTensor* self
+        - THStorage* storage
+        - CONSTANT 0
+        - CONSTANT __storage_size.get()
+        - CONSTANT NULL
+    - cname: setStorage
+      arguments:
+        - THTensor* self
+        - THStorage* sourceStorage
+        - long storage_offset
+        - THSize* size
+        - arg: THStride* stride
+          default: NULL
+]]
+[[
+  name: THPTensor_(select)
+  python_name: select
+  cpu_half: True
+  auto_gpu: False
+  only_register: True
+]]
+[[
+  name: THPTensor_(size)
+  python_name: size
+  cpu_half: True
+  auto_gpu: False
+  method_flags: METH_KEYWORDS
+  only_register: True
+]]
+[[
+  name: THPTensor_(stride)
+  python_name: stride
+  cpu_half: True
+  auto_gpu: False
+  method_flags: METH_KEYWORDS
+  only_register: True
+]]
+[[
+  name: fill_
+  cname: fill
+  return: self
+  arguments:
+    - THTensor* self
+    - real value
+]]
+[[
+  name: isSameSizeAs
+  python_name: is_same_size
+  cpu_half: True
+  auto_gpu: False
+  return: bool
+  arguments:
+    - THTensor* self
+    - THTensor* other
+]]
+[[
+  name: isContiguous
+  python_name: is_contiguous
+  cpu_half: True
+  auto_gpu: False
+  return: bool
+  arguments:
+    - THTensor* self
+]]
+[[
+  name: isSetTo
+  python_name: is_set_to
+  cpu_half: True
+  auto_gpu: False
+  return: bool
+  arguments:
+    - THTensor* self
+    - THTensor* tensor
+]]
+[[
+  name: maskedFill_
+  cname: maskedFill
+  python_name: masked_fill_
+  return: self
+  arguments:
+    - arg: THTensor* self
+      broadcast: mask inplace fallback types:Byte
+    - THBoolTensor* mask
+    - real value
+]]
+[[
+  name: maskedCopy_
+  cname: maskedCopy
+  python_name: masked_scatter_
+  return: self
+  arguments:
+    - arg: THTensor* self
+      broadcast: mask inplace fallback types:Byte
+    - THBoolTensor* mask
+    - THTensor* source
+]]
+[[
+  name: maskedSelect
+  python_name: masked_select
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - arg: THTensor* self
+      broadcast: mask fallback types:Byte
+    - THBoolTensor* mask
+]]
+[[
+  name: transpose
+  variants:
+    - method
+    - function
+  cname: newTranspose
+  cpu_half: True
+  auto_gpu: False
+  return: THTensor*
+  arguments:
+    - THTensor* self
+    - arg: long dim0
+      wrap_dim: self
+    - arg: long dim1
+      wrap_dim: self
+]]
+[[
+  name: transpose_
+  cname: transpose
+  cpu_half: True
+  auto_gpu: False
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+    - arg: long dim0
+      wrap_dim: self
+    - arg: long dim1
+      wrap_dim: self
+]]
+[[
+  name: t
+  variants:
+    - method
+    - function
+  auto_gpu: False
+  cname: newTranspose
+  return: THTensor*
+  before_call: |
+    long ndim = arg_self->nDimension;
+    THPUtils_assert(ndim == 2, "t() expects a 2D tensor, but self is %ldD", ndim);
+  arguments:
+    - THTensor* self
+    - CONSTANT 0
+    - CONSTANT 1
+]]
+[[
+  name: t_
+  cname: transpose
+  auto_gpu: False
+  return: self
+  before_call: |
+    long ndim = arg_self->nDimension;
+    THPUtils_assert(ndim == 2, "t_() expects a 2D tensor, but self is %ldD", ndim);
+  arguments:
+    - THTensor* self
+    - THTensor* self
+    - CONSTANT 0
+    - CONSTANT 1
+]]
+[[
+  name: squeeze
+  cpu_half: True
+  variants:
+    - method
+    - function
+  return: argument 0
+  options:
+    - arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* self
+    - cname: squeeze1d
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* self
+        - arg: long dim
+          wrap_dim: self
+]]
+[[
+  name: squeeze_
+  cpu_half: True
+  return: self
+  options:
+    - cname: squeeze
+      arguments:
+        - THTensor* self
+        - THTensor* self
+    - cname: squeeze1d
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - arg: long dim
+          wrap_dim: self
+]]
+[[
+  name: unsqueeze
+  variants:
+    - method
+    - function
+  cpu_half: True
+  auto_gpu: False
+  return: argument 0
+  cname: unsqueeze1d
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+    - arg: long dim
+      wrap_dim: self+1
+]]
+[[
+  name: unsqueeze_
+  cpu_half: True
+  auto_gpu: False
+  return: self
+  cname: unsqueeze1d
+  arguments:
+    - THTensor* self
+    - THTensor* self
+    - arg: long dim
+      wrap_dim: self+1
+]]
+[[
+  name: nonzero
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THIndexTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: contiguous
+  cname: newContiguous
+  return: THTensor*
+  arguments:
+    - THTensor* self
+]]
+[[
+  name: clone
+  cname: newClone
+  return: THTensor*
+  aten_sparse: True
+  arguments:
+    - THTensor* self
+]]
+[[
+  name: view
+  cname: newView
+  auto_gpu: False
+  return: THTensor*
+  arguments:
+    - THTensor* self
+    - arg: THSize* size
+      long_args: True
+]]
+[[
+  name: expand
+  cname: newExpand
+  return: THTensor*
+  arguments:
+    - THTensor* self
+    - arg: THSize* size
+      long_args: True
+]]
+[[
+  name: resizeAs_
+  python_name: resize_as_
+  cname: resizeAs
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* the_template
+]]
+[[
+  name: indexSelect
+  python_name: index_select
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+    - arg: long dim
+      wrap_dim: self
+    - THIndexTensor* index
+]]
+[[
+  name: indexCopy_
+  python_name: index_copy_
+  cname: indexCopy
+  return: argument 0
+  arguments:
+    - THTensor* self
+    - arg: long dim
+      wrap_dim: self
+    - THIndexTensor* index
+    - THTensor* source
+]]
+[[
+  name: indexAdd_
+  python_name: index_add_
+  cname: indexAdd
+  return: argument 0
+  arguments:
+    - THTensor* self
+    - arg: long dim
+      wrap_dim: self
+    - THIndexTensor* index
+    - THTensor* source
+]]
+[[
+  name: indexFill_
+  python_name: index_fill_
+  cname: indexFill
+  return: argument 0
+  arguments:
+    - THTensor* self
+    - arg: long dim
+      wrap_dim: self
+    - THIndexTensor* index
+    - real value
+]]
+[[
+  name: narrow
+  cpu_half: True
+  auto_gpu: False
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+    - arg: long dimension
+      wrap_dim: self
+    - long start
+    - long length
+]]
+[[
+  name: unfold
+  cpu_half: True
+  auto_gpu: False
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+    - arg: long dimension
+      wrap_dim: self
+    - long size
+    - long step
+]]
+[[
+  name: range
+  variants:
+    - function
+  backends:
+    - CPU
+    - CUDA
+  return: argument 0
+  before_arg_assign: |
+    PyErr_WarnEx(PyExc_UserWarning, "torch.range is deprecated in favor of torch.arange "
+        "and will be removed in 0.3. Note that arange generates values in [start; end), "
+        "not [start; end].", 1);
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - accreal start
+    - accreal end
+    - arg: accreal step
+      default: 1
+]]
+[[
+  name: arange
+  variants:
+    - function
+  backends:
+    - CPU
+    - CUDA
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - accreal start
+    - accreal end
+    - arg: accreal step
+      default: 1
+]]
+[[
+  name: scatter_
+  return: argument 0
+  options:
+    - cname: scatter
+      arguments:
+        - THTensor* self
+        - arg: long dim
+          wrap_dim: self
+        - THIndexTensor* index
+        - THTensor* src
+    - cname: scatterFill
+      arguments:
+        - THTensor* self
+        - arg: long dim
+          wrap_dim: self
+        - THIndexTensor* index
+        - real value
+]]
+[[
+  name: scatter_add_
+  return: argument 0
+  cname: scatterAdd
+  arguments:
+    - THTensor* self
+    - arg: long dim
+      wrap_dim: self
+    - THIndexTensor* index
+    - THTensor* src
+]]
+[[
+  name: gather
+  variants:
+    - method
+    - function
+  return: argument 0
+  before_call: |
+    THLongStoragePtr _size(THIndexTensor_(newSizeOf)(LIBRARY_STATE arg_index));
+    THTensor_(resize)(LIBRARY_STATE arg_result, _size, NULL);
+  arguments:
+    - arg: THTensor* result
+      output: True
+      resize: index
+    - THTensor* self
+    - arg: long dim
+      wrap_dim: self
+    - THIndexTensor* index
+]]
+[[
+  name: THPTensor_stateless_(cat)
+  python_name: cat
+  method_flags: METH_KEYWORDS
+  only_register: True
+  variants:
+    - function
+]]
+[[
+  name: data_ptr
+  defined_if: "!IS_DISTRIBUTED"
+  with_gil: True
+  auto_gpu: False
+  return: void*
+  cpu_half: True
+  cname: data
+  arguments:
+    - THTensor* self
+]]
+[[
+  name: equal
+  variants:
+    - method
+    - function
+  return: bool
+  arguments:
+    - THTensor* self
+    - THTensor* other
+]]
+[[
+  python_name: copy_
+  name: THPTensor_(copy_)
+  cpu_half: True
+  method_flags: METH_KEYWORDS
+  only_register: True
+]]
+[[
+  name: __and__
+  variants:
+    - method
+    - function
+  return: argument 0
+  options:
+    - cname: bitand
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* self
+        - real value
+    - cname: cbitand
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - arg: THTensor* self
+          broadcast: other fallback
+        - THTensor* other
+]]
+[[
+  name: __iand__
+  variants:
+    - method
+    - function
+  return: argument 0
+  options:
+    - cname: bitand
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - real value
+    - cname: cbitand
+      arguments:
+        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other inplace fallback
+        - THTensor* other
+]]
+[[
+  name: __or__
+  variants:
+    - method
+    - function
+  return: argument 0
+  options:
+    - cname: bitor
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* self
+        - real value
+    - cname: cbitor
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - arg: THTensor* self
+          broadcast: other fallback
+        - THTensor* other
+]]
+[[
+  name: __ior__
+  variants:
+    - method
+    - function
+  return: argument 0
+  options:
+    - cname: bitor
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - real value
+    - cname: cbitor
+      arguments:
+        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other inplace fallback
+        - THTensor* other
+]]
+[[
+  name: __xor__
+  variants:
+    - method
+    - function
+  return: argument 0
+  options:
+    - cname: bitxor
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* self
+        - real value
+    - cname: cbitxor
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - arg: THTensor* self
+          broadcast: other fallback
+        - THTensor* other
+]]
+[[
+  name: __ixor__
+  variants:
+    - method
+    - function
+  return: argument 0
+  options:
+    - cname: bitxor
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - real value
+    - cname: cbitxor
+      arguments:
+        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other inplace fallback
+        - THTensor* other
+]]
+[[
+  name: __lshift__
+  variants:
+    - method
+    - function
+  return: argument 0
+  options:
+    - cname: lshift
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* self
+        - real value
+    - cname: clshift
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - arg: THTensor* self
+          broadcast: other fallback
+        - THTensor* other
+]]
+[[
+  name: __ilshift__
+  variants:
+    - method
+    - function
+  return: argument 0
+  options:
+    - cname: lshift
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - real value
+    - cname: clshift
+      arguments:
+        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other inplace fallback
+        - THTensor* other
+]]
+[[
+  name: __rshift__
+  variants:
+    - method
+    - function
+  return: argument 0
+  options:
+    - cname: rshift
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* self
+        - real value
+    - cname: crshift
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - arg: THTensor* self
+          broadcast: other fallback
+        - THTensor* other
+]]
+[[
+  name: __irshift__
+  variants:
+    - method
+    - function
+  return: argument 0
+  options:
+    - cname: rshift
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - real value
+    - cname: crshift
+      arguments:
+        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other inplace fallback
+        - THTensor* other
+]]
+[[
+  name: THPTensor_(apply)
+  python_name: apply_
+  defined_if: "!IS_DISTRIBUTED"
+  backends:
+    - CPU
+  cpu_half: True
+  only_register: True
+  override_method_flags: METH_O
+]]
+[[
+  name: THPTensor_(map)
+  python_name: map_
+  defined_if: "!IS_DISTRIBUTED"
+  backends:
+    - CPU
+  cpu_half: True
+  only_register: True
+]]
+[[
+  name: THPTensor_(map2)
+  python_name: map2_
+  defined_if: "!IS_DISTRIBUTED"
+  backends:
+    - CPU
+  cpu_half: True
+  only_register: True
+]]
+[[
+  name: lt
+  variants:
+    - method
+  return: argument 0
+  options:
+    - cname: ltValue
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - THTensor* self
+        - real value
+    - cname: ltTensor
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - arg: THTensor* self
+          broadcast: other fallback
+        - THTensor* other
+]]
+[[
+  name: lt_
+  return: self
+  options:
+    - cname: ltValueT
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - real value
+    - cname: ltTensorT
+      arguments:
+        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other inplace fallback
+        - arg: THTensor* other
+]]
+[[
+  name: lt
+  variants:
+    - function
+  return: argument 0
+  options:
+    - cname: ltValue
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - THTensor* tensor
+        - real value
+    - cname: ltTensor
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - arg: THTensor* tensor
+          broadcast: other fallback
+        - THTensor* other
+    - cname: ltValueT
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* tensor
+        - real value
+    - cname: ltTensorT
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - arg: THTensor* tensor
+          broadcast: other fallback
+        - THTensor* other
+]]
+[[
+  name: gt
+  variants:
+    - method
+  return: argument 0
+  options:
+    - cname: gtValue
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - THTensor* self
+        - real value
+    - cname: gtTensor
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - arg: THTensor* self
+          broadcast: other fallback
+        - THTensor* other
+]]
+[[
+  name: gt_
+  return: self
+  options:
+    - cname: gtValueT
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - real value
+    - cname: gtTensorT
+      arguments:
+        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other inplace fallback
+        - THTensor* other
+]]
+[[
+  name: gt
+  variants:
+    - function
+  return: argument 0
+  options:
+    - cname: gtValue
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - THTensor* tensor
+        - real value
+    - cname: gtTensor
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - arg: THTensor* tensor
+          broadcast: other fallback
+        - THTensor* other
+    - cname: gtValueT
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* tensor
+        - real value
+    - cname: gtTensorT
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - arg: THTensor* tensor
+          broadcast: other fallback
+        - THTensor* other
+
+]]
+[[
+  name: le
+  variants:
+    - method
+  return: argument 0
+  options:
+    - cname: leValue
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - THTensor* self
+        - real value
+    - cname: leTensor
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - arg: THTensor* self
+          broadcast: other fallback
+        - THTensor* other
+]]
+[[
+  name: le_
+  return: self
+  options:
+    - cname: leValueT
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - real value
+    - cname: leTensorT
+      arguments:
+        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other inplace fallback
+        - THTensor* other
+]]
+[[
+  name: le
+  variants:
+    - function
+  return: argument 0
+  options:
+    - cname: leValue
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - THTensor* tensor
+        - real value
+    - cname: leTensor
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - arg: THTensor* tensor
+          broadcast: other fallback
+        - THTensor* other
+    - cname: leValueT
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* tensor
+        - real value
+    - cname: leTensorT
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - arg: THTensor* tensor
+          broadcast: other fallback
+        - THTensor* other
+]]
+[[
+  name: ge
+  variants:
+    - method
+  return: argument 0
+  options:
+    - cname: geValue
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - THTensor* self
+        - real value
+    - cname: geTensor
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - arg: THTensor* self
+          broadcast: other fallback
+        - THTensor* other
+]]
+[[
+  name: ge_
+  return: self
+  options:
+    - cname: geValueT
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - real value
+    - cname: geTensorT
+      arguments:
+        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other inplace fallback
+        - THTensor* other
+]]
+[[
+  name: ge
+  variants:
+    - function
+  return: argument 0
+  options:
+    - cname: geValue
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - THTensor* tensor
+        - real value
+    - cname: geTensor
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - arg: THTensor* tensor
+          broadcast: other fallback
+        - THTensor* other
+    - cname: geValueT
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* tensor
+        - real value
+    - cname: geTensorT
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - arg: THTensor* tensor
+          broadcast: other fallback
+        - THTensor* other
+]]
+[[
+  name: eq
+  variants:
+    - method
+  return: argument 0
+  options:
+    - cname: eqValue
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - THTensor* self
+        - real value
+    - cname: eqTensor
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - arg: THTensor* self
+          broadcast: other fallback
+        - THTensor* other
+]]
+[[
+  name: eq_
+  return: self
+  options:
+    - cname: eqValueT
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - real value
+    - cname: eqTensorT
+      arguments:
+        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other inplace fallback
+        - THTensor* other
+]]
+[[
+  name: eq
+  variants:
+    - function
+  return: argument 0
+  options:
+    - cname: eqValue
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - THTensor* tensor
+        - real value
+    - cname: eqTensor
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - arg: THTensor* tensor
+          broadcast: other fallback
+        - THTensor* other
+    - cname: eqValueT
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* tensor
+        - real value
+    - cname: eqTensorT
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - arg: THTensor* tensor
+          broadcast: other fallback
+        - THTensor* other
+]]
+[[
+  name: ne
+  variants:
+    - method
+  return: argument 0
+  options:
+    - cname: neValue
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - THTensor* self
+        - real value
+    - cname: neTensor
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - arg: THTensor* self
+          broadcast: other fallback
+        - THTensor* other
+]]
+[[
+  name: ne_
+  return: self
+  options:
+    - cname: neValueT
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - real value
+    - cname: neTensorT
+      arguments:
+        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other inplace fallback
+        - THTensor* other
+]]
+[[
+  name: ne
+  variants:
+    - function
+  return: argument 0
+  options:
+    - cname: neValue
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - THTensor* tensor
+        - real value
+    - cname: neTensor
+      arguments:
+        - arg: THBoolTensor* result
+          output: True
+        - arg: THTensor* tensor
+          broadcast: other fallback
+        - THTensor* other
+    - cname: neValueT
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* tensor
+        - real value
+    - cname: neTensorT
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - arg: THTensor* tensor
+          broadcast: other fallback
+        - THTensor* other
+]]
+[[
+  name: min
+  variants:
+    - method
+    - function
+  options:
+    - cname: minall
+      return: real
+      arguments:
+        - THTensor* self
+    - cname: cmin
+      return: argument 0
+      arguments:
+      - arg: THTensor* result
+        output: True
+      - arg: THTensor* self
+        broadcast: other fallback
+      - THTensor* other
+    - cname: min
+      return: argument 0,1
+      arguments:
+        - arg: THTensor* min
+          output: True
+        - arg: THIndexTensor* min_indices
+          output: True
+        - THTensor* self
+        - arg: long dim
+          wrap_dim: self
+        - arg: bool keepdim
+          default: "false"
+]]
+[[
+  name: max
+  variants:
+    - method
+    - function
+  options:
+    - cname: maxall
+      return: real
+      arguments:
+        - THTensor* self
+    - cname: cmax
+      return: argument 0
+      arguments:
+      - arg: THTensor* result
+        output: True
+      - arg: THTensor* self
+        broadcast: other fallback
+      - THTensor* other
+    - cname: max
+      return: argument 0,1
+      arguments:
+        - arg: THTensor* max
+          output: True
+        - arg: THIndexTensor* max_indices
+          output: True
+        - THTensor* self
+        - arg: long dim
+          wrap_dim: self
+        - arg: bool keepdim
+          default: "false"
+]]
+[[
+  name: kthvalue
+  backends:
+    - CPU
+  variants:
+    - method
+    - function
+  return: argument 0,1
+  arguments:
+    - arg: THTensor* values
+      output: True
+    - arg: THIndexTensor* indices
+      output: True
+    - THTensor* self
+    - long k
+    - arg: long dim
+      wrap_dim: self
+      default: __last_dim
+    - arg: bool keepdim
+      default: "false"
+]]
+[[
+  name: mode
+  variants:
+    - method
+    - function
+  return: argument 0,1
+  arguments:
+    - arg: THTensor* values
+      output: True
+    - arg: THIndexTensor* indices
+      output: True
+    - THTensor* self
+    - arg: long dim
+      wrap_dim: self
+      default: __last_dim
+    - arg: bool keepdim
+      default: "false"
+]]
+[[
+  name: median
+  variants:
+    - method
+    - function
+  return: argument 0,1
+  options:
+    - cname: medianall
+      return: real
+      arguments:
+        - THTensor* self
+    - cname: median
+      arguments:
+        - arg: THTensor* values
+          output: True
+        - arg: THIndexTensor* indices
+          output: True
+        - THTensor* self
+        - arg: long dim
+          wrap_dim: self
+        - arg: bool keepdim
+          default: "false"
+]]
+[[
+  name: sort
+  variants:
+    - method
+    - function
+  return: argument 0,1
+  arguments:
+    - arg: THTensor* values
+      output: True
+    - arg: THIndexTensor* indices
+      output: True
+    - THTensor* self
+    - arg: long dim
+      default: __last_dim
+      wrap_dim: self
+    - arg: bool descending
+      default: "false"
+]]
+[[
+  name: topk
+  variants:
+    - method
+    - function
+  return: argument 0,1
+  arguments:
+    - arg: THTensor* values
+      output: True
+    - arg: THIndexTensor* indices
+      output: True
+    - THTensor* self
+    - long k
+    - arg: long dim
+      default: __last_dim
+      wrap_dim: self
+    - arg: bool largest
+      default: "true"
+    - arg: bool sorted
+      default: "true"
+]]
+[[
+  name: all
+  types:
+    - Byte
+  backends:
+    - CPU
+    - CUDA
+  cname: logicalall
+  return: bool
+  arguments:
+    - THTensor* self
+]]
+[[
+  name: any
+  types:
+    - Byte
+  backends:
+    - CPU
+    - CUDA
+  cname: logicalany
+  return: bool
+  arguments:
+    - THTensor* self
+]]
+[[
+  name: getDevice
+  python_name: get_device
+  backends:
+    - CUDA
+  return: long
+  arguments:
+    - THTensor* self
+]]
+[[
+  name: THPTensor_(new)
+  python_name: new
+  method_flags: METH_KEYWORDS
+  backends:
+    - CUDA
+  only_register: True
+]]
+[[
+  name: THPTensor_(recordStream)
+  python_name: record_stream
+  override_method_flags: METH_O
+  backends:
+    - CUDA
+  only_register: True
+]]
+[[
+  name: abs
+  return: argument 0
+  types:
+    - floating_point
+    - Long
+    - Int
+    - Short
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  arguments:
+    - arg: THTensor* destination
+      output: True
+    - THTensor* self
+]]
+[[
+  name: abs_
+  cname: abs
+  return: self
+  types:
+    - floating_point
+    - Long
+    - Int
+    - Short
+  backends:
+    - CPU
+    - CUDA
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: sigmoid_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: sigmoid
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: sigmoid
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: sigmoid
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: log_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: log
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: log
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: log1p_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: log1p
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: log1p
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: lgamma
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: lgamma_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: lgamma
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: exp_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: exp
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: exp
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: cos_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: cos
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: cos
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: acos_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: acos
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: acos
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: cosh_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: cosh
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: cosh
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: sin_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: sin
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: sin
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: asin_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: asin
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: asin
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: sinh_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: sinh
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: sinh
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: tan_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: tan
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: tan
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: atan_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: atan
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: atan
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: tanh_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: tanh
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: tanh
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: sqrt_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: sqrt
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: sqrt
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: rsqrt_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: rsqrt
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: rsqrt
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: ceil_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: ceil
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: ceil
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: floor_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: floor
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: floor
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: round_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: round
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: round
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: trunc_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: trunc
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: trunc
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: frac_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: frac
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: frac
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: mean
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  options:
+    - cname: meanall
+      return: accreal
+      arguments:
+        - THTensor* self
+    - cname: mean
+      return: argument 0
+      arguments:
+        - arg: THTensor* destination
+          output: True
+        - THTensor* self
+        - arg: long dim
+          wrap_dim: self
+        - arg: bool keepdim
+          default: "false"
+]]
+[[
+  name: var
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  options:
+    - cname: varall
+      return: accreal
+      arguments:
+        - THTensor* self
+        - arg: bool unbiased
+          if_true: 0
+          if_false: 1
+          default: 0
+    - cname: var
+      return: argument 0
+      arguments:
+        - arg: THTensor* destination
+          output: True
+        - THTensor* self
+        - arg: long dim
+          wrap_dim: self
+        - arg: bool unbiased
+          if_true: 0
+          if_false: 1
+          default: 0
+        - arg: bool keepdim
+          default: "false"
+]]
+[[
+  name: std
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  options:
+    - cname: stdall
+      return: accreal
+      arguments:
+        - THTensor* self
+        - arg: bool unbiased
+          if_true: 0
+          if_false: 1
+          default: 0
+    - cname: std
+      return: argument 0
+      arguments:
+        - arg: THTensor* destination
+          output: True
+        - THTensor* self
+        - arg: long dim
+          wrap_dim: self
+        - arg: bool unbiased
+          if_true: 0
+          if_false: 1
+          default: 0
+        - arg: bool keepdim
+          default: "false"
+]]
+[[
+  name: norm
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  options:
+    - cname: normall
+      return: accreal
+      arguments:
+        - THTensor* self
+        - arg: real p
+          default: AS_REAL(2)
+    - cname: norm
+      return: argument 0
+      arguments:
+        - arg: THTensor* destination
+          output: True
+        - THTensor* self
+        - real p
+        - arg: long dim
+          wrap_dim: self
+        - arg: bool keepdim
+          default: "false"
+]]
+[[
+  name: renorm
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* destination
+      output: True
+    - THTensor* self
+    - real p
+    - arg: long dim
+      wrap_dim: self
+    - real maxnorm
+]]
+[[
+  name: renorm_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: renorm
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+    - real p
+    - arg: long dim
+      wrap_dim: self
+    - real maxnorm
+]]
+[[
+  name: dist
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  options:
+    - cname: dist
+      return: accreal
+      arguments:
+        - arg: THTensor* self
+          broadcast: other fallback
+        - THTensor* other
+        - arg: real p
+          default: AS_REAL(2)
+]]
+[[
+  name: reciprocal
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  options:
+    - cname: cinv
+      return: argument 0
+      arguments:
+        - arg: THTensor* destination
+          output: True
+        - THTensor* self
+]]
+[[
+  name: reciprocal_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  options:
+    - cname: cinv
+      return: self
+      arguments:
+        - THTensor* self
+        - THTensor* self
+]]
+[[
+  name: neg
+  types:
+    - floating_point
+    - Long
+    - Int
+    - Short
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  options:
+    - cname: neg
+      return: argument 0
+      arguments:
+        - arg: THTensor* destination
+          output: True
+        - THTensor* self
+]]
+[[
+  name: neg_
+  types:
+    - floating_point
+    - Long
+    - Int
+    - Short
+  backends:
+    - CPU
+    - CUDA
+  options:
+    - cname: neg
+      return: self
+      arguments:
+        - THTensor* self
+        - THTensor* self
+]]
+[[
+  name: atan2
+  types:
+    - Float
+    - Double
+  backends:
+    - CPU
+  backend_type_pairs: [[CPU,Float],[CPU,Double],[CUDA,Float]]
+
+  variants:
+    - method
+    - function
+  cname: atan2
+  return: argument 0
+  arguments:
+    - arg: THTensor* destination
+      output: True
+    - arg: THTensor* self
+      broadcast: other fallback
+    - THTensor* other
+]]
+[[
+  name: atan2_
+  backend_type_pairs: [[CPU,Float],[CPU,Double],[CUDA,Float]]
+  cname: atan2
+  return: argument 0
+  arguments:
+    - THTensor* self
+    - arg: THTensor* self
+      broadcast: other fallback inplace
+    - THTensor* other
+]]
+[[
+  name: pow
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  options:
+    - cname: pow
+      arguments:
+        - arg: THTensor* destination
+          output: True
+        - THTensor* self
+        - real exponent
+    - cname: cpow
+      arguments:
+        - arg: THTensor* destination
+          output: True
+        - arg: THTensor* self
+          broadcast: exponent fallback
+        - THTensor* exponent
+    - cname: tpow
+      arguments:
+        - arg: THTensor* destination
+          output: True
+        - real base
+        - THTensor* self
+]]
+[[
+  name: pow_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  return: argument 0
+  cname: pow
+  options:
+    - cname: pow
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - real exponent
+    - cname: cpow
+      arguments:
+        - THTensor* self
+        - arg: THTensor* self
+          broadcast: exponent inplace fallback
+        - THTensor* exponent
+]]
+[[
+  name: lerp
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  cname: lerp
+  arguments:
+    - arg: THTensor* destination
+      output: True
+    - arg: THTensor* self
+      broadcast: end fallback
+    - THTensor* end
+    - real weight
+]]
+[[
+  name: lerp_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  return: self
+  cname: lerp
+  arguments:
+    - THTensor* self
+    - arg: THTensor* self
+      broadcast: end fallback inplace
+    - THTensor* end
+    - real weight
+]]
+[[
+  name: linspace
+  types:
+    - Float
+    - Double
+  backends:
+    - CPU
+  variants:
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - real start
+    - real end
+    - arg: long steps
+      default: 100
+]]
+[[
+  name: logspace
+  types:
+    - Float
+    - Double
+  backends:
+    - CPU
+  variants:
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - real start
+    - real end
+    - arg: long steps
+      default: 100
+]]
+[[
+  name: histc
+  types:
+    - Float
+    - Double
+  backends:
+    - CPU
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* destination
+      output: True
+    - THTensor* self
+    - arg: long bins
+      default: 100
+    - arg: real min
+      default: 0
+    - arg: real max
+      default: 0
+]]
+[[
+  name: zero_
+  cname: zero
+  return: self
+  arguments:
+    - THTensor* self
+]]
+[[
+  name: sum
+  variants:
+    - method
+    - function
+  options:
+    - cname: sumall
+      return: accreal
+      arguments:
+        - THTensor* self
+    - cname: sum
+      return: argument 0
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* self
+        - arg: long dim
+          wrap_dim: self
+        - arg: bool keepdim
+          default: "false"
+]]
+[[
+  name: prod
+  variants:
+    - method
+    - function
+  options:
+    - cname: prodall
+      return: accreal
+      arguments:
+        - THTensor* self
+    - cname: prod
+      return: argument 0
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* self
+        - arg: long dim
+          wrap_dim: self
+        - arg: bool keepdim
+          default: "false"
+]]
+[[
+  name: cumsum
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+    - arg: long dim
+      wrap_dim: self
+]]
+[[
+  name: cumprod
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+    - arg: long dim
+      wrap_dim: self
+]]
+[[
+  name: sign
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+]]
+[[
+  name: sign_
+  cname: sign
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+]]
+[[
+  name: trace
+  variants:
+    - method
+    - function
+  return: accreal
+  arguments:
+    - THTensor* self
+]]
+[[
+  name: add
+  variants:
+    - method
+    - function
+  return: argument 0
+  options:
+    - cname: add_scaled
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* self
+        - real value
+        - arg: real alpha
+          default: AS_REAL(1)
+          kwarg_only: True
+    - cname: cadd
+      aten_sparse: True
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - arg: THTensor* self
+          broadcast: other fallback
+        - arg: real alpha
+          default: AS_REAL(1)
+          kwarg_only: True
+        - THTensor* other
+    - sparse: True
+      cname: spcadd
+      aten_dense_sparse: True
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* self
+        - arg: real alpha
+          default: AS_REAL(1)
+          kwarg_only: True
+        - THSTensor* other
+]]
+[[
+  name: add_
+  return: argument 0
+  options:
+    - cname: add_scaled
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - real value
+        - arg: real alpha
+          default: AS_REAL(1)
+          kwarg_only: True
+    - cname: cadd
+      aten_sparse: True
+      arguments:
+        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other inplace fallback
+        - arg: real alpha
+          default: AS_REAL(1)
+          kwarg_only: True
+        - THTensor* other
+    - sparse: True
+      cname: spcadd
+      aten_dense_sparse: True
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - arg: real alpha
+          default: AS_REAL(1)
+          kwarg_only: True
+        - THSTensor* other
+]]
+[[
+  name: sub
+  variants:
+    - method
+    - function
+  return: argument 0
+  options:
+    - cname: sub_scaled
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* self
+        - real value
+        - arg: real alpha
+          default: AS_REAL(1)
+          kwarg_only: True
+    - cname: csub
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - arg: THTensor* self
+          broadcast: other fallback
+        - arg: real alpha
+          default: AS_REAL(1)
+          kwarg_only: True
+        - THTensor* other
+]]
+[[
+  name: sub_
+  return: argument 0
+  options:
+    - cname: sub_scaled
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - real value
+        - arg: real alpha
+          default: AS_REAL(1)
+          kwarg_only: True
+    - cname: csub
+      arguments:
+        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other inplace fallback
+        - arg: real alpha
+          default: AS_REAL(1)
+          kwarg_only: True
+        - THTensor* other
+]]
+[[
+  name: mul
+  variants:
+    - method
+    - function
+  return: argument 0
+  options:
+    - cname: mul
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* self
+        - real value
+    - cname: cmul
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - arg: THTensor* self
+          broadcast: other fallback
+        - arg: THTensor* other
+]]
+[[
+  name: mul_
+  return: argument 0
+  options:
+    - cname: mul
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - real value
+    - cname: cmul
+      arguments:
+        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other inplace fallback
+        - THTensor* other
+]]
+[[
+  name: div
+  variants:
+    - method
+    - function
+  return: argument 0
+  options:
+    - cname: div
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* self
+        - real value
+    - cname: cdiv
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - arg: THTensor* self
+          broadcast: other fallback
+        - THTensor* other
+]]
+[[
+  name: div_
+  return: argument 0
+  options:
+    - cname: div
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - real value
+    - cname: cdiv
+      arguments:
+        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other inplace fallback
+        - THTensor* other
+]]
+[[
+  name: fmod
+  return: argument 0
+  variants:
+    - method
+    - function
+  options:
+    - cname: fmod
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* self
+        - real value
+    - cname: cfmod
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - arg: THTensor* self
+          broadcast: other fallback
+        - THTensor* other
+]]
+[[
+  name: fmod_
+  return: argument 0
+  options:
+    - cname: fmod
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - real value
+    - cname: cfmod
+      arguments:
+        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other inplace fallback
+        - THTensor* other
+]]
+[[
+  name: remainder
+  return: argument 0
+  variants:
+    - method
+    - function
+  options:
+    - cname: remainder
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* self
+        - real value
+    - cname: cremainder
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - arg: THTensor* self
+          broadcast: other fallback
+        - arg: THTensor* other
+]]
+[[
+  name: remainder_
+  return: argument 0
+  options:
+    - cname: remainder
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - real value
+    - cname: cremainder
+      arguments:
+        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other inplace fallback
+        - THTensor* other
+]]
+[[
+  name: clamp
+  variants:
+    - method
+    - function
+  return: argument 0
+  options:
+    - cname: clamp
+      arguments:
+      - arg: THTensor* destination
+        output: True
+      - THTensor* self
+      - real min
+      - real max
+    - cname: cmaxValue
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* self
+        - arg: real min
+          kwarg_only: True
+    - cname: cminValue
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - THTensor* self
+        - arg: real max
+          kwarg_only: True
+]]
+[[
+  name: clamp_
+  cname: clamp
+  return: self
+  options:
+    - cname: clamp
+      arguments:
+      - THTensor* self
+      - THTensor* self
+      - real min
+      - real max
+    - cname: cmaxValue
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - arg: real min
+          kwarg_only: True
+    - cname: cminValue
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - arg: real max
+          kwarg_only: True
+]]
+[[
+  name: dot
+  backend_type_pairs: [[CUDA,floating_point], [CPU,all]]
+
+  variants:
+    - method
+    - function
+  return: accreal
+  arguments:
+    - arg: THTensor* self
+      assert_ndim: 1
+    - arg: THTensor* tensor
+      assert_ndim: 1
+]]
+[[
+  name: tril
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* destination
+      output: True
+    - THTensor* self
+    - arg: long diagonal
+      default: 0
+]]
+[[
+  name: tril_
+  cname: tril
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+    - arg: long diagonal
+      default: 0
+]]
+[[
+  name: triu
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* destination
+      output: True
+    - THTensor* self
+    - arg: long diagonal
+      default: 0
+]]
+[[
+  name: triu_
+  cname: triu
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+    - arg: long diagonal
+      default: 0
+]]
+[[
+  name: cross
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* destination
+      output: True
+    - THTensor* self
+    - THTensor* other
+    - arg: long dim
+      default: -1
+]]
+[[
+  name: eye
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - long n
+    - arg: long m
+      default: 1
+]]
+[[
+  name: diag
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+    - arg: long diagonal
+      default: 0
+]]
+[[
+  name: addmm
+  variants:
+    - method
+    - function
+  return: argument 0
+  options:
+    - arguments:
+      - arg: THTensor* result
+        output: True
+      - arg: real beta
+        default: AS_REAL(1)
+        kwarg_only: True
+      - arg: THTensor* self
+        broadcast: mat1,mat2 dims:mat1.dim0,mat2.dim1
+      - arg: real alpha
+        default: AS_REAL(1)
+        kwarg_only: True
+      - THTensor* mat1
+      - THTensor* mat2
+    - cname: spaddmm
+      sparse: yes
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - arg: real beta
+          default: AS_REAL(1)
+          kwarg_only: True
+        - THTensor* self
+        - arg: real alpha
+          default: AS_REAL(1)
+          kwarg_only: True
+        - THSTensor* mat1
+        - THTensor* mat2
+]]
+[[
+  name: addmm_
+  return: self
+  options:
+    - cname: addmm
+      arguments:
+      - THTensor* self
+      - arg: real beta
+        default: AS_REAL(1)
+        kwarg_only: True
+      - THTensor* self
+      - arg: real alpha
+        default: AS_REAL(1)
+        kwarg_only: True
+      - THTensor* mat1
+      - THTensor* mat2
+    - cname: spaddmm
+      sparse: yes
+      arguments:
+        - arg: THTensor* self
+        - arg: real beta
+          default: AS_REAL(1)
+          kwarg_only: True
+        - THTensor* self
+        - arg: real alpha
+          default: AS_REAL(1)
+          kwarg_only: True
+        - THSTensor* mat1
+        - THTensor* mat2
+]]
+[[
+  name: addmv
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - arg: real beta
+      default: AS_REAL(1)
+      kwarg_only: True
+    - arg: THTensor* self
+      broadcast: mat,vec dims:mat.dim0
+    - arg: real alpha
+      default: AS_REAL(1)
+      kwarg_only: True
+    - THTensor* mat
+    - THTensor* vec
+]]
+[[
+  name: addmv_
+  cname: addmv
+  return: self
+  arguments:
+    - THTensor* self
+    - arg: real beta
+      default: AS_REAL(1)
+      kwarg_only: True
+    - THTensor* self
+    - arg: real alpha
+      default: AS_REAL(1)
+      kwarg_only: True
+    - THTensor* mat
+    - THTensor* vec
+]]
+[[
+  name: addr
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - arg: real beta
+      default: AS_REAL(1)
+      kwarg_only: True
+    - arg: THTensor* self
+      broadcast: vec1,vec2 dims:vec1.dim0,vec2.dim0
+    - arg: real alpha
+      default: AS_REAL(1)
+      kwarg_only: True
+    - THTensor* vec1
+    - THTensor* vec2
+]]
+[[
+  name: addr_
+  cname: addr
+  return: self
+  arguments:
+    - THTensor* self
+    - arg: real beta
+      default: AS_REAL(1)
+      kwarg_only: True
+    - THTensor* self
+    - arg: real alpha
+      default: AS_REAL(1)
+      kwarg_only: True
+    - THTensor* vec1
+    - THTensor* vec2
+]]
+[[
+  name: ger
+  cname: addr
+  variants:
+    - method
+    - function
+  return: argument 0
+  before_call: |
+    long s1 = THTensor_(size)(LIBRARY_STATE ((THPTensor*)$arg4)->cdata, 0);
+    long s2 = THTensor_(size)(LIBRARY_STATE ((THPTensor*)$arg5)->cdata, 0);
+    THTensor_(resize2d)(LIBRARY_STATE ((THPTensor*)$arg0)->cdata, s1, s2);
+  arguments:
+    - arg: THTensor* result
+      output: True
+      resize: [ [self,0], [vec2,0] ]
+    - CONSTANT AS_REAL(0)
+    - argument 0
+    - CONSTANT AS_REAL(1)
+    - THTensor* self
+    - THTensor* vec2
+]]
+[[
+  name: mv
+  cname: addmv
+  variants:
+    - method
+    - function
+  return: argument 0
+  before_call: |
+    long s = THTensor_(size)(LIBRARY_STATE ((THPTensor*)$arg4)->cdata, 0);
+    THTensor_(resize1d)(LIBRARY_STATE ((THPTensor*)$arg0)->cdata, s);
+    #if !IS_CUDA
+    THTensor_(zero)(LIBRARY_STATE ((THPTensor*)$arg0)->cdata);
+    #endif
+  arguments:
+    - arg: THTensor* result
+      output: True
+      resize: [ [self, 0] ]
+      cpu_zero: True
+    - CONSTANT AS_REAL(0)
+    - argument 0
+    - CONSTANT AS_REAL(1)
+    - THTensor* self
+    - THTensor* vec
+]]
+[[
+  name: mm
+  variants:
+    - method
+    - function
+  return: argument 0
+  options:
+    - cname: addmm
+      before_call: |
+        long s1 = THTensor_(size)(LIBRARY_STATE ((THPTensor*)$arg4)->cdata, 0);
+        long s2 = THTensor_(size)(LIBRARY_STATE ((THPTensor*)$arg5)->cdata, 1);
+        THTensor_(resize2d)(LIBRARY_STATE ((THPTensor*)$arg0)->cdata, s1, s2);
+        #if !IS_CUDA
+        THTensor_(zero)(LIBRARY_STATE ((THPTensor*)$arg0)->cdata);
+        #endif
+      arguments:
+        - arg: THTensor* result
+          output: True
+          resize: [ [self, 0], [mat2,1] ]
+          cpu_zero: True
+        - CONSTANT AS_REAL(0)
+        - argument 0
+        - CONSTANT AS_REAL(1)
+        - THTensor* self
+        - THTensor* mat2
+    - cname: spaddmm
+      sparse: True
+      arguments:
+        - arg: THTensor* result
+          output: True
+        - CONSTANT AS_REAL(0)
+        - argument 0
+        - CONSTANT AS_REAL(1)
+        - THSTensor* self
+        - THTensor* mat2
+]]
+[[
+  name: bmm
+  cname: baddbmm
+  variants:
+    - method
+    - function
+  return: argument 0
+  before_call: |
+    long s1 = THTensor_(size)(LIBRARY_STATE ((THPTensor*)$arg4)->cdata, 0);
+    long s2 = THTensor_(size)(LIBRARY_STATE ((THPTensor*)$arg4)->cdata, 1);
+    long s3 = THTensor_(size)(LIBRARY_STATE ((THPTensor*)$arg5)->cdata, 2);
+    THTensor_(resize3d)(LIBRARY_STATE ((THPTensor*)$arg0)->cdata, s1, s2, s3);
+    #if !IS_CUDA
+    THTensor_(zero)(LIBRARY_STATE ((THPTensor*)$arg0)->cdata);
+    #endif
+  arguments:
+    - arg: THTensor* result
+      output: True
+      resize: [ [self,0], [self,1], [mat2,2] ]
+      cpu_zero: True
+    - CONSTANT AS_REAL(0)
+    - argument 0
+    - CONSTANT AS_REAL(1)
+    - THTensor* self
+    - THTensor* mat2
+]]
+[[
+  name: addbmm
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - arg: real beta
+      default: AS_REAL(1)
+      kwarg_only: True
+    - arg: THTensor* self
+      broadcast: batch1,batch2 dims:batch1.dim1,batch2.dim2
+    - arg: real alpha
+      default: AS_REAL(1)
+      kwarg_only: True
+    - THTensor* batch1
+    - THTensor* batch2
+]]
+[[
+  name: addbmm_
+  cname: addbmm
+  return: self
+  arguments:
+    - THTensor* self
+    - arg: real beta
+      default: AS_REAL(1)
+      kwarg_only: True
+    - THTensor* self
+    - arg: real alpha
+      default: AS_REAL(1)
+      kwarg_only: True
+    - THTensor* batch1
+    - THTensor* batch2
+]]
+[[
+  name: baddbmm
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - arg: real beta
+      default: AS_REAL(1)
+      kwarg_only: True
+    - arg: THTensor* self
+      broadcast: batch1,batch2 dims:batch1.dim0,batch1.dim1,batch2.dim2
+    - arg: real alpha
+      default: AS_REAL(1)
+      kwarg_only: True
+    - THTensor* batch1
+    - THTensor* batch2
+]]
+[[
+  name: baddbmm_
+  cname: baddbmm
+  return: argument 0
+  arguments:
+    - THTensor* self
+    - arg: real beta
+      default: AS_REAL(1)
+      kwarg_only: True
+    - THTensor* self
+    - arg: real alpha
+      default: AS_REAL(1)
+      kwarg_only: True
+    - THTensor* batch1
+    - THTensor* batch2
+]]
+[[
+  name: addcmul
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - arg: THTensor* self
+      broadcast: tensor1,tensor2 fallback
+    - arg: real value
+      default: AS_REAL(1)
+      kwarg_only: True
+    - THTensor* tensor1
+    - THTensor* tensor2
+]]
+[[
+  name: addcmul_
+  options:
+    - cname: addcmul
+      return: argument 0
+      arguments:
+        - THTensor* self
+        - arg: THTensor* self
+          broadcast: tensor1,tensor2 inplace fallback
+        - arg: real value
+          default: AS_REAL(1)
+          kwarg_only: True
+        - THTensor* tensor1
+        - THTensor* tensor2
+    - cname: spaddcmul
+      defined_if: "!IS_DISTRIBUTED"
+      return: argument 0
+      arguments:
+        - THTensor* self
+        - THTensor* self
+        - arg: real value
+          default: AS_REAL(1)
+          kwarg_only: True
+        - THSTensor* tensor1
+        - THSTensor* tensor2
+]]
+[[
+  name: addcdiv
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - arg: THTensor* self
+      broadcast: tensor1,tensor2 fallback
+    - arg: real value
+      default: AS_REAL(1)
+      kwarg_only: True
+    - THTensor* tensor1
+    - THTensor* tensor2
+]]
+[[
+  name: addcdiv_
+  cname: addcdiv
+  return: argument 0
+  arguments:
+    - THTensor* self
+    - arg: THTensor* self
+      broadcast: tensor1,tensor2 inplace fallback
+    - arg: real value
+      default: AS_REAL(1)
+      kwarg_only: True
+    - THTensor* tensor1
+    - THTensor* tensor2
+]]
+[[
+  name: gesv
+  types:
+    - Float
+    - Double
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0,1
+  arguments:
+    - arg: THTensor* solution
+      output: True
+    - arg: THTensor* lu
+      output: True
+    - THTensor* self
+    - THTensor* A
+]]
+[[
+  name: gels
+  types:
+    - Float
+    - Double
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0,1
+  arguments:
+    - arg: THTensor* res1
+      output: True
+    - arg: THTensor* res2
+      output: True
+    - THTensor* self
+    - THTensor* A
+]]
+[[
+  name: trtrs
+  types:
+    - Float
+    - Double
+  backends:
+    - CPU
+  variants:
+    - method
+    - function
+  return: argument 0,1
+  arguments:
+    - arg: THTensor* res1
+      output: True
+    - arg: THTensor* res2
+      output: True
+    - THTensor* self
+    - THTensor* A
+    - arg: bool upper
+      if_true: U
+      if_false: L
+      default: U
+    - arg: bool transpose
+      if_true: T
+      if_false: N
+      default: N
+    - arg: bool unitriangular
+      if_true: U
+      if_false: N
+      default: N
+]]
+[[
+  name: symeig
+  cname: syev
+  types:
+    - Float
+    - Double
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0,1
+  arguments:
+    - arg: THTensor* res1
+      output: True
+    - arg: THTensor* res2
+      output: True
+    - THTensor* self
+    - arg: bool eigenvectors
+      if_true: V
+      if_false: N
+      default: N
+    - arg: bool upper
+      if_true: U
+      if_false: L
+      default: U
+]]
+[[
+  name: eig
+  cname: geev
+  types:
+    - Float
+    - Double
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0,1
+  arguments:
+    - arg: THTensor* res1
+      output: True
+    - arg: THTensor* res2
+      output: True
+    - THTensor* self
+    - arg: bool eigenvectors
+      if_true: V
+      if_false: N
+      default: N
+]]
+[[
+  name: svd
+  cname: gesvd
+  types:
+    - Float
+    - Double
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0,1,2
+  arguments:
+    - arg: THTensor* res1
+      output: True
+    - arg: THTensor* res2
+      output: True
+    - arg: THTensor* res3
+      output: True
+    - THTensor* self
+    - arg: bool some
+      if_true: S
+      if_false: A
+      default: S
+]]
+[[
+  name: inverse
+  cname: getri
+  types:
+    - Float
+    - Double
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* output
+      output: True
+    - THTensor* self
+]]
+[[
+  name: potrf
+  types:
+    - Float
+    - Double
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* output
+      output: True
+    - THTensor* self
+    - arg: bool upper
+      if_true: U
+      if_false: L
+      default: U
+]]
+[[
+  name: potrs
+  types:
+    - Float
+    - Double
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+    - THTensor* input2
+    - arg: bool upper
+      if_true: U
+      if_false: L
+      default: U
+]]
+[[
+  name: potri
+  types:
+    - Float
+    - Double
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* output
+      output: True
+    - THTensor* self
+    - arg: bool upper
+      if_true: U
+      if_false: L
+      default: U
+]]
+[[
+  name: pstrf
+  types:
+    - Float
+    - Double
+  backends:
+    - CPU
+  variants:
+    - method
+    - function
+  return: argument 0,1
+  after_call:
+    THIntTensor_sub(((THPIntTensor*)$arg1)->cdata, ((THPIntTensor*)$arg1)->cdata, 1);
+  arguments:
+    - arg: THTensor* res1
+      output: True
+    - arg: THIntegerTensor* res2
+      output: True
+    - THTensor* self
+    - arg: bool upper
+      if_true: U
+      if_false: L
+      default: U
+    - arg: real tol
+      default: -1
+]]
+[[
+  name: qr
+  types:
+    - Float
+    - Double
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0,1
+  arguments:
+    - arg: THTensor* res1
+      output: True
+    - arg: THTensor* res2
+      output: True
+    - THTensor* self
+]]
+[[
+  name: geqrf
+  types:
+    - Float
+    - Double
+  backends:
+    - CPU
+  variants:
+    - method
+    - function
+  return: argument 0,1
+  arguments:
+    - arg: THTensor* res1
+      output: True
+    - arg: THTensor* res2
+      output: True
+    - THTensor* self
+]]
+[[
+  name: orgqr
+  types:
+    - Float
+    - Double
+  backends:
+    - CPU
+  variants:
+    - method
+    - function
+  return: argument 0,1
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+    - THTensor* input2
+]]
+[[
+  name: ormqr
+  types:
+    - Float
+    - Double
+  backends:
+    - CPU
+  variants:
+    - method
+    - function
+  return: argument 0,1
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+    - THTensor* input2
+    - THTensor* input3
+    - arg: bool left
+      if_true: L
+      if_false: R
+      default: L
+    - arg: bool transpose
+      if_true: T
+      if_false: N
+      default: N
+]]
+[[
+  name: btrifact
+  cname: btrifact
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0,1
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - arg: THIntegerTensor* pivots
+      output: True
+    - arg: THIntegerTensor* info
+      kwarg_only: True
+      default: NULL
+    - arg: bool pivot
+      kwarg_only: True
+      default: "true"
+    - THTensor* self
+]]
+[[
+  name: btrisolve
+  cname: btrisolve
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+    - THTensor* LU_data
+    - THIntegerTensor* LU_pivots
+]]
+[[
+  name: randperm
+  defined_if: "!IS_DISTRIBUTED"
+  backends:
+    - CPU
+  variants:
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - arg: THGenerator* generator
+      default: THPDefaultGenerator->cdata
+      kwarg_only: True
+    - long n
+]]
+[[
+  name: random_
+  defined_if: "!IS_DISTRIBUTED"
+  backends:
+    - CPU
+  return: self
+  options:
+    - cname: random
+      arguments:
+        - THTensor* self
+        - arg: THGenerator* generator
+          default: THPDefaultGenerator->cdata
+          kwarg_only: True
+    - cname: cappedRandom
+      arguments:
+        - THTensor* self
+        - arg: THGenerator* generator
+          default: THPDefaultGenerator->cdata
+          kwarg_only: True
+        - long to
+    - cname: clampedRandom
+      arguments:
+        - THTensor* self
+        - arg: THGenerator* generator
+          default: THPDefaultGenerator->cdata
+          kwarg_only: True
+        - long from
+        - long to
+]]
+[[
+  name: multinomial
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - method
+    - function
+  return: argument 0
+  arguments:
+    - arg: THIndexTensor* result
+      output: True
+    - arg: THGenerator* generator
+      default: THPDefaultGenerator->cdata
+      kwarg_only: True
+    - THTensor* self
+    - long num_samples
+    - arg: bool replacement
+      default: "false"
+]]
+[[
+  name: uniform_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: uniform
+  return: self
+  arguments:
+    - THTensor* self
+    - arg: THGenerator* generator
+      default: THPDefaultGenerator->cdata
+      kwarg_only: True
+    - arg: double from
+      default: 0
+    - arg: double to
+      default: 1
+]]
+[[
+  name: normal
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  return: argument 0
+  variants:
+    - function
+  options:
+    - cname: normal_means
+      arguments:
+        - arg: THTensor* output
+          output: True
+        - arg: THGenerator* generator
+          default: THPDefaultGenerator->cdata
+          kwarg_only: True
+        - THTensor* means
+        - arg: double std
+          default: 1
+    - cname: normal_stddevs
+      arguments:
+        - arg: THTensor* output
+          output: True
+        - arg: THGenerator* generator
+          default: THPDefaultGenerator->cdata
+          kwarg_only: True
+        - arg: double mean
+        - THTensor* std
+    - cname: normal_means_stddevs
+      arguments:
+        - arg: THTensor* output
+          output: True
+        - arg: THGenerator* generator
+          default: THPDefaultGenerator->cdata
+          kwarg_only: True
+        - THTensor* means
+        - THTensor* std
+]]
+[[
+  name: normal_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: normal
+  return: self
+  arguments:
+    - THTensor* self
+    - arg: THGenerator* generator
+      default: THPDefaultGenerator->cdata
+      kwarg_only: True
+    - arg: double mean
+      default: 0
+    - arg: double std
+      default: 1
+]]
+[[
+  name: cauchy_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: cauchy
+  return: self
+  arguments:
+    - THTensor* self
+    - arg: THGenerator* generator
+      default: THPDefaultGenerator->cdata
+      kwarg_only: True
+    - arg: double median
+      default: 0
+    - arg: double sigma
+      default: 1
+]]
+[[
+  name: logNormal_
+  cname: logNormal
+  python_name: log_normal_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  return: self
+  arguments:
+    - THTensor* self
+    - arg: THGenerator* generator
+      default: THPDefaultGenerator->cdata
+      kwarg_only: True
+    - arg: double mean
+      default: 1
+    - arg: double std
+      default: 2
+]]
+[[
+  name: exponential_
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  cname: exponential
+  return: self
+  arguments:
+    - THTensor* self
+    - arg: THGenerator* generator
+      default: THPDefaultGenerator->cdata
+      kwarg_only: True
+    - arg: double lambd
+      default: 1
+]]
+[[
+  name: rand
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - arg: THGenerator* generator
+      default: THPDefaultGenerator->cdata
+      kwarg_only: True
+    - arg: THSize* size
+      long_args: True
+]]
+[[
+  name: randn
+  types:
+    - floating_point
+  backends:
+    - CPU
+    - CUDA
+  variants:
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - arg: THGenerator* generator
+      default: THPDefaultGenerator->cdata
+      kwarg_only: True
+    - arg: THSize* size
+      long_args: True
+]]
+[[
+  name: geometric_
+  defined_if: "!IS_DISTRIBUTED"
+  backends:
+    - CPU
+    - CUDA
+  cname: geometric
+  return: self
+  arguments:
+    - THTensor* self
+    - arg: THGenerator* generator
+      default: THPDefaultGenerator->cdata
+      kwarg_only: True
+    - double p
+]]
+[[
+  name: bernoulli
+  types:
+    - Float
+    - Double
+  backends:
+    - CPU
+  return: argument 0
+  variants:
+    - method
+    - function
+  before_call:
+    THTensor_(resizeAs)(LIBRARY_STATE ((THPTensor*)$arg0)->cdata, ((THPTensor*)$arg2)->cdata);
+  cname: BERNOULLI_TENSOR
+  arguments:
+    - arg: THTensor* output
+      output: True
+      resize: self
+    - arg: THGenerator* generator
+      default: THPDefaultGenerator->cdata
+      kwarg_only: True
+    - THTensor* self
+]]
+[[
+  name: bernoulli_
+  defined_if: "!IS_DISTRIBUTED"
+  backends:
+    - CPU
+    - CUDA
+  return: self
+  options:
+    - cname: bernoulli
+      arguments:
+        - THTensor* self
+        - arg: THGenerator* generator
+          default: THPDefaultGenerator->cdata
+          kwarg_only: True
+        - arg: double p
+          default: 0.5
+    - cname: bernoulli_FloatTensor
+      arguments:
+        - THTensor* self
+        - arg: THGenerator* generator
+          default: THPDefaultGenerator->cdata
+          kwarg_only: True
+        - BackendFloatTensor* float_p
+    - cname: bernoulli_DoubleTensor
+      arguments:
+        - THTensor* self
+        - arg: THGenerator* generator
+          default: THPDefaultGenerator->cdata
+          kwarg_only: True
+        - BackendDoubleTensor* float_p
+]]
diff --git a/Generator.h b/Generator.h
index e9a9d08..ec01212 100644
--- a/Generator.h
+++ b/Generator.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <stdint.h>
+
 namespace at {
 
 struct Generator {
diff --git a/Half.h b/Half.h
index aac6db2..bfb1cc6 100644
--- a/Half.h
+++ b/Half.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include<stdint.h>
+#include <stdint.h>
 #ifdef AT_CUDA_ENABLED
 #include <cuda.h>
 #include <cuda_runtime.h>
diff --git a/Local.cwrap b/Local.cwrap
index f381056..ede79c4 100644
--- a/Local.cwrap
+++ b/Local.cwrap
@@ -30,8 +30,11 @@
     - cname: newWithSize
       arguments:
         - THSize* size
+        - CONSTANT NULL
+    - cname: newWithSize
+      arguments:
+        - THSize* size
         - arg: THStride* stride
-          default: NULL
     - cname: newWithStorage
       arguments:
         - THStorage* storage
diff --git a/Scalar.cpp b/Scalar.cpp
index 3529236..bd61f43 100644
--- a/Scalar.cpp
+++ b/Scalar.cpp
@@ -1,8 +1,30 @@
 #include "ATen/Scalar.h"
+
 #include <TH/TH.h>
 
+#include "ATen/Tensor.h"
+#include "ATen/Context.h"
+
 namespace at {
 
+Scalar::Scalar(const Tensor & t)
+: tag(Tag::HAS_t) {
+  v.t = t.get();
+  v.t->retain();
+  AT_ASSERT(t.dim() == 0,"Attempting to create a Scalar from a %d dim tensor",t.dim());
+}
+
+Tensor Scalar::toTensor() const {
+  if (Tag::HAS_t == tag) {
+    return Tensor(v.t, true);
+  } else if (Tag::HAS_d == tag) {
+    return CPU(kDouble).scalarTensor(*this);
+  } else {
+    assert(Tag::HAS_i == tag);
+    return CPU(kLong).scalarTensor(*this);
+  }
+}
+
 template<> Half convert(double f) {
   float t = static_cast<float>(f);
   Half h;
diff --git a/Scalar.h b/Scalar.h
index 94bb069..945260c 100644
--- a/Scalar.h
+++ b/Scalar.h
@@ -1,24 +1,32 @@
 #pragma once
 
+#include <assert.h>
 #include <stdint.h>
 #include <stdexcept>
 #include <string>
-#include "ATen/Context.h"
+
+#include "ATen/ATenGeneral.h"
 #include "ATen/Half.h"
-#include "ATen/Type.h"
-#include "ATen/Utils.h"
-#include "ATen/Tensor.h"
+#include "ATen/ScalarType.h"
+#include "ATen/TensorImpl.h"
+
 
 namespace at {
 
+struct TensorImpl;
+struct Tensor;
+
 class Scalar {
 public:
   Scalar() : Scalar(int64_t(0)) {}
-
-  explicit Scalar(const Tensor & t)
-  : tag(Tag::HAS_t), t(t) {
-    AT_ASSERT(t.dim() == 0,"Attempting to create a Scalar from a %d dim tensor",t.dim());
+  ~Scalar() {
+    if (Tag::HAS_t == tag) {
+      v.t->release();
+    }
   }
+
+  explicit Scalar(const Tensor & t);
+
 #define DEFINE_IMPLICIT_CTOR(type,name,member) \
   Scalar(type vv) \
   : tag(Tag::HAS_##member) { \
@@ -46,7 +54,7 @@
     if (Tag::HAS_t != tag) {
       return *this;
     }
-    return t.pImpl->localScalar();
+    return v.t->localScalar();
   }
 
 #define DEFINE_ACCESSOR(type,name,member) \
@@ -69,16 +77,7 @@
     } \
   }
 
-  Tensor toTensor() const {
-    if (Tag::HAS_t == tag) {
-      return t;
-    } else if (Tag::HAS_d == tag) {
-      return CPU(kDouble).scalarTensor(*this);
-    } else {
-      assert(Tag::HAS_i == tag);
-      return CPU(kLong).scalarTensor(*this);
-    }
-  }
+  Tensor toTensor() const;
 
   AT_FORALL_SCALAR_TYPES(DEFINE_ACCESSOR)
 
@@ -103,10 +102,8 @@
   union {
     double d;
     int64_t i;
+    TensorImpl* t;
   } v;
-  Tensor t; //Note: cannot be in union be cause of copy/destruct behavior
-            //ideally we try to pack this structure tighter if it becomes
-            //a performance problem.
   friend struct Type;
 };
 
diff --git a/ScalarType.h b/ScalarType.h
new file mode 100644
index 0000000..034c4d0
--- /dev/null
+++ b/ScalarType.h
@@ -0,0 +1,74 @@
+#pragma once
+
+#include <stdint.h>
+
+#include "ATen/ArrayRef.h"
+#include "ATen/ATenGeneral.h"
+#include "ATen/Half.h"
+
+namespace at {
+
+#define AT_FORALL_SCALAR_TYPES(_) \
+_(uint8_t,Byte,i) \
+_(int8_t,Char,i) \
+_(double,Double,d) \
+_(float,Float,d) \
+_(int,Int,i) \
+_(int64_t,Long,i) \
+_(int16_t,Short,i) \
+_(Half,Half,d)
+
+enum class ScalarType {
+#define DEFINE_ENUM(_1,n,_2) \
+  n,
+  AT_FORALL_SCALAR_TYPES(DEFINE_ENUM)
+#undef DEFINE_ENUM
+  NumOptions
+};
+
+enum class Backend {
+  CPU,
+  CUDA,
+  SparseCPU,
+  SparseCUDA,
+  NumOptions
+};
+
+constexpr Backend kCPU = Backend::CPU;
+constexpr Backend kCUDA = Backend::CUDA;
+constexpr Backend kSparseCPU = Backend::SparseCPU;
+constexpr Backend kSparseCUDA = Backend::SparseCUDA;
+
+static inline const char * toString(Backend b) {
+  switch(b) {
+    case Backend::CPU: return "CPU";
+    case Backend::CUDA: return "CUDA";
+    case Backend::SparseCPU: return "SparseCPU";
+    case Backend::SparseCUDA: return "SparseCUDA";
+    default: return "UNKNOWN_BACKEND";
+  }
+}
+
+#define DEFINE_CONSTANT(_,name,_2) \
+constexpr ScalarType k##name = ScalarType::name;
+
+AT_FORALL_SCALAR_TYPES(DEFINE_CONSTANT)
+#undef DEFINE_CONSTANT
+
+static inline const char * toString(ScalarType t) {
+#define DEFINE_CASE(_,name,_2) \
+  case ScalarType:: name : return #name;
+
+  switch(t) {
+    AT_FORALL_SCALAR_TYPES(DEFINE_CASE)
+    default:
+      return "UNKNOWN_SCALAR_TYPE";
+  }
+#undef DEFINE_CASE
+}
+
+struct Tensor;
+typedef ArrayRef<int64_t> IntList;
+typedef ArrayRef<Tensor> TensorList;
+
+} // namespace at
diff --git a/Storage.h b/Storage.h
index 5554f24..c6aa270 100644
--- a/Storage.h
+++ b/Storage.h
@@ -1,10 +1,11 @@
 #pragma once
 
 #include "ATen/Scalar.h"
-#include "ATen/Type.h"
 
 namespace at {
 
+struct Type;
+
 struct Storage {
   Storage() {}
   Storage(const Storage& other) = delete;
diff --git a/TensorAccessor.h b/TensorAccessor.h
index 6d9cd83..e51af27 100644
--- a/TensorAccessor.h
+++ b/TensorAccessor.h
@@ -1,8 +1,9 @@
 #pragma once
+
 #include <cstddef>
 #include <stdint.h>
 
-#include "ATen/Type.h"
+#include "ATen/ScalarType.h"
 
 namespace at {
 
diff --git a/TensorImpl.h b/TensorImpl.h
index ad038f5..021f0b5 100644
--- a/TensorImpl.h
+++ b/TensorImpl.h
@@ -1,13 +1,15 @@
 #pragma once
 
 #include <atomic>
-
-#include "ATen/Type.h"
 #include <iostream>
+
+#include "ATen/ScalarType.h"
+
 namespace at {
 
 struct Type;
 class Scalar;
+
 struct TensorImpl {
   explicit TensorImpl(Type * type)
   :  refcount(1), is_scalar(false), type_(type) {}
diff --git a/function_wrapper.py b/function_wrapper.py
index fb60797..16c843f 100644
--- a/function_wrapper.py
+++ b/function_wrapper.py
@@ -8,12 +8,12 @@
     string_type = basestring
 
 # temporary things we cannot handle
-EXCLUDE_PATTERN = "bernoulli.*|normal.*|exponential.*|random.*|arange.*"
+EXCLUDE_PATTERN = "bernoulli.*"
 # what has to be done to add a Operation ...
 # 1. if broadcasting or without the full list of arguments, add a non-virtual
 #    declaration under Type.h
 TYPE_METHOD_DECLARATION_NON_VIRTUAL = CodeTemplate("""\
-${return_type} ${method_prefix}${api_name}(${formals}) const;
+${return_type} ${method_prefix}${api_name}(${formals_with_defaults}) const;
 """)
 # 2. broadcasting functions are implemented in Type.cpp
 TYPE_METHOD_DEFINITION_BROADCAST = CodeTemplate("""\
@@ -23,46 +23,40 @@
     return ${method_prefix_derived}${api_name}(${broadcast_modified_actuals});
 }
 """)
-# 3. functions without the full list of arguments are implemented in TypeMethods.h
-TYPE_METHOD_INLINE = CodeTemplate("""\
-inline ${return_type} Type::${method_prefix}${api_name}(${formals}) const {
-    ${return_call}${method_prefix}${api_name}(${actuals_with_constants});
-}
-""")
-# 4. add virtual dispatch declaration to Type.h and default impl to Type.cpp
+# 3. add virtual dispatch declaration to Type.h and default impl to Type.cpp
 TYPE_METHOD_DECLARATION = CodeTemplate("""\
-virtual ${return_type} ${method_prefix}${api_name}(${formals}) const;
+virtual ${return_type} ${method_prefix}${api_name}(${formals_with_defaults}) const;
 """)
 TYPE_METHOD_DEFINITION = CodeTemplate("""\
 ${return_type} Type::${method_prefix}${api_name}(${formals}) const {
     throw std::runtime_error(std::string("${api_name} is not implemented for type ") + toString());
 }
 """)
-# 5. add virtual override to TypeDerived.h
+# 4. add virtual override to TypeDerived.h
 TYPE_DERIVED_DECLARATION = CodeTemplate("""\
 virtual ${return_type} ${method_prefix_derived}${api_name}(${formals}) const override;
 """)
-# 6. add override definition to TypeDerived.cpp
+# 5. add override definition to TypeDerived.cpp
 TYPE_DERIVED_DEFINITION = CodeTemplate("""\
 ${return_type} ${Type}::${method_prefix_derived}${api_name}(${formals}) const {
     ${type_definition_body}
 }
 """)
-# 7. add non-virtual declaration to Tensor.h
+# 6. add non-virtual declaration to Tensor.h
 TENSOR_METHOD_DECLARATION = CodeTemplate("""\
-${return_type} ${api_name}(${method_formals})${const_mark};
+${return_type} ${api_name}(${method_formals_with_defaults})${const_mark};
 """)
-# 8. add non-virtual declaration to Tensor.cpp
+# 7. add non-virtual declaration to Tensor.cpp
 TENSOR_METHOD_DEFINITION = CodeTemplate("""\
 inline ${return_type} Tensor::${api_name}(${method_formals})${const_mark} {
     return type().${method_prefix}${api_name}(${method_actuals});
 }
 """)
-# 9. add a method declaration in Functions.h
+# 8. add a method declaration in Functions.h
 FUNCTION_DECLARATION = CodeTemplate("""\
-static inline ${return_type} ${api_name}(${formals});
+static inline ${return_type} ${api_name}(${formals_with_defaults});
 """)
-# 10. add a method definition in Functions.cpp
+# 9. add method definition in Functions.h
 FUNCTION_DEFINITION = CodeTemplate("""\
 static inline ${return_type} ${api_name}(${formals}) {
     return ${inferred_type}.${api_name}(${actuals});
@@ -97,7 +91,7 @@
     'THIndexTensor*': 'Tensor &',
     'THIntegerTensor*': 'Tensor &',
     'THStorage*': 'Storage &',
-    'THGenerator*': 'Generator &',
+    'THGenerator*': 'Generator *',
     'THSize*': 'IntList',
     'THStride*': 'IntList',
     'accreal': 'Scalar',
@@ -111,7 +105,7 @@
     'THIndexTensor*': 'IndexTensor',
     'THIntegerTensor*': 'IntegerTensor',
     'THStorage*': 'Storage',
-    'THGenerator*': 'Generator',
+    'THGenerator*': 'Generator*',
     'THSize*': 'IntList',
     'THStride*': 'IntList',
     'accreal': 'accreal',
@@ -143,7 +137,7 @@
         CodeTemplate(
             'checked_cast<${Backend}IntTensor>(${arg_name}.pImpl,"${arg_name}",${arg_pos}, ${null_okay})'),
     'THStorage*': CodeTemplate('checked_cast<${Storage}>(&${arg_name},"${arg_name}",${arg_pos}, false)'),
-    'THGenerator*': CodeTemplate('check_generator<${Backend}Generator>(&${arg_name})'),
+    'THGenerator*': CodeTemplate('check_generator<${Backend}Generator>(${arg_name}, &context->defaultGenerator(backend()))'),
     'THSize*': CodeTemplate('THLongStorageView::make(${arg_name}, true)'),
     'THStride*': CodeTemplate('THLongStorageView::make(${arg_name}, false, true)'),
     'real': CodeTemplate('${arg_name}.to${ScalarName}()'),
@@ -176,17 +170,17 @@
 CONSTANT_REPLACEMENTS = [
     ('AS_REAL', '${AS_REAL}'),
     ('THPDefaultGenerator->cdata',
-     'dynamic_cast<${Generator}&>(context->defaultGenerator(backend())).generator'),
+     'dynamic_cast<${Generator}&>().generator'),
     ('__storage_size.get\\(\\)',
      'THLongStorageView::make(static_cast<int64_t>(storage.size()))'),
     ('__last_dim', 'self.ndimension()-1'),
 ]
 
-# Replacements for constants when calling other ATen functions
-INLINE_CONSTANT_REPLACEMENTS = [
+# Replacements for constants in header file function definitions
+HEADER_CONSTANT_REPLACEMENTS = [
     (r'AS_REAL\((.*)\)', r'\1'),
-    ('THPDefaultGenerator->cdata', 'context->defaultGenerator(backend())'),
-    ('__last_dim', 'self.ndimension()-1'),
+    ('THPDefaultGenerator->cdata', 'nullptr'),
+    ('__last_dim', '-1'),
 ]
 
 
@@ -249,12 +243,19 @@
 
     def get_formals(option, include_constants=False):
         seen = set()
-        result = []
+        pos_args = []
+        kwd_args = []
 
         def insert(argument):
             if argument['name'] not in seen:
                 seen.add(argument['name'])
-                result.append(argument)
+                if argument.get('kwarg_only', False):
+                    kwd_args.append(argument)
+                else:
+                    pos_args.append(argument)
+        for argument in option['arguments']:
+            if argument.get('output') and not argument.get('allocate', False):
+                insert(argument)
         for argument in option['arguments']:
             if argument['type'] == 'THSTensor*':
                 # only enable for a subset of Dense/Sparse ops
@@ -265,29 +266,10 @@
                 insert(argument)
             elif is_real_argument_to_wrapper(argument):
                 insert(argument)
-        for argument in option['arguments']:
-            if argument.get('output') and not argument.get('allocate', False):
-                insert(argument)
 
+        result = pos_args + kwd_args
         return [translate_formal(argument, option) for argument in result]
 
-    def get_actuals_with_constants(option):
-        actuals = []
-        for arg in get_formals(option, include_constants=True):
-            if arg['type'] != 'CONSTANT':
-                actuals.append(arg['name'])
-                continue
-            v = str(arg.get('default', arg['name']))
-            for pattern, replacement in INLINE_CONSTANT_REPLACEMENTS:
-                v = re.sub(pattern, replacement, v)
-            if v in {'NULL', 'nullptr'}:
-                if arg['name'] == 'stride':
-                    v = 'IntList()'
-                else:
-                    v = 'Tensor()'
-            actuals.append(v)
-        return actuals
-
     def get_return_types(option):
         ret = option['return']
         if ret['kind'] == 'arguments':
@@ -312,15 +294,29 @@
         return "std::tuple<{}>".format(','.join(r['type'] for r in return_types))
         return return_types
 
-    def find_first_tensor(formals):
+    def find_dispatch_tensor(formals):
+        # dispatch to self if it's a parameter
         for formal in formals:
-            if 'Tensor' == formal['dynamic_type'] or 'TensorList' == formal['dynamic_type']:
+            if formal['name'] == 'self' and formal['dynamic_type'] == 'Tensor':
+                return formal['name']
+        # otherwise dispatch to the first Tensor or TensorList
+        for formal in formals:
+            if 'TensorList' == formal['dynamic_type'] or formal['dynamic_type'] == 'Tensor':
                 return formal['name']
         return None
 
     def format_formal(f):
         return '{} {}'.format(f['type'], f['name'])
 
+    def formal_with_default(f):
+        s = format_formal(f)
+        v = f.get('default')
+        if v is None:
+            return s
+        for pattern, replacement in HEADER_CONSTANT_REPLACEMENTS:
+            v = re.sub(pattern, replacement, str(v))
+        return '{}={}'.format(s, v)
+
     def get_broadcast_argument(option):
         for argument in option['arguments']:
             if argument.get('broadcast'):
@@ -359,6 +355,7 @@
         formals = get_formals(option)
         option['formals_list'] = formals
         option['formals'] = [format_formal(f) for f in formals]
+        option['formals_with_defaults'] = [formal_with_default(f) for f in formals]
         option['returns'] = get_return_types(option)
         option['return_type'] = format_return_type(option['returns'])
         option['return_call'] = 'return ' if option['return_type'] != 'void' else ''
@@ -366,6 +363,8 @@
 
         option['method_formals'] = [format_formal(f) for f in formals
                                     if f['name'] != 'self']
+        option['method_formals_with_defaults'] = (
+            [formal_with_default(f) for f in formals if f['name'] != 'self'])
         option['method_actuals'] = [
             f['name'] if f['name'] != 'self' else '*this' for f in formals]
 
@@ -373,8 +372,8 @@
 
         is_method = 'method' in option['variants']
         is_function = 'function' in option['variants']
-        first_tensor = find_first_tensor(formals)
-        is_namespace_function = is_function and first_tensor is not None
+        dispatch_tensor = find_dispatch_tensor(formals)
+        is_namespace_function = is_function and dispatch_tensor is not None
 
         # method-only things are prefixed with m_ in Type so that
         # another function-only variant can exist without the name colliding
@@ -383,7 +382,7 @@
         env = nested_dict(option, top_env)
 
         broadcast_arg = get_broadcast_argument(option)
-        if broadcast_arg is None and option['has_full_argument_list']:
+        if broadcast_arg is None:
             top_env['type_method_declarations'].append(
                 TYPE_METHOD_DECLARATION.substitute(env))
             top_env['type_method_definitions'].append(
@@ -392,13 +391,6 @@
             top_env['type_method_declarations'].append(
                 TYPE_METHOD_DECLARATION_NON_VIRTUAL.substitute(env))
 
-        if not option['has_full_argument_list']:
-            # functions without the full list of arguments are implemented
-            # inline in TypeMethods.h
-            option['actuals_with_constants'] = get_actuals_with_constants(option)
-            top_env['type_method_inline_definitions'].append(
-                TYPE_METHOD_INLINE.substitute(env))
-        elif broadcast_arg is not None:
             # "s_" for "same size".
             option['method_prefix_derived'] = 's_' + option['method_prefix']
             same_size_option = option.copy()
@@ -434,7 +426,7 @@
             method_of.append('Tensor')
 
         if is_namespace_function:
-            option['inferred_type'] = 'infer_type({})'.format(first_tensor)
+            option['inferred_type'] = 'infer_type({})'.format(dispatch_tensor)
             top_env['function_declarations'].append(
                 FUNCTION_DECLARATION.substitute(env))
             top_env['function_definitions'].append(
@@ -448,7 +440,6 @@
             'method_of': method_of,
             'returns': option['returns'],
             'inplace': option['inplace'],
-            'has_full_argument_list': option['has_full_argument_list'],
         })
 
     output_declarations = []
@@ -476,8 +467,7 @@
         return argument['type'] in CHECKED_CAST
 
     def nullable_argument(argument):
-        return (argument['type'] in {'THIntegerTensor*', 'THTensor*'} and
-                argument.get('default', '') in {'NULL', 'nullptr'})
+        return argument.get('is_nullable', False)
 
     def bool_option_is_string(argument):
         return 'if_true' in argument and isinstance(argument['if_true'], string_type)
@@ -721,7 +711,7 @@
 
     for declaration in declarations:
         for option in declaration['options']:
-            if not option.get('skip', False) and option['has_full_argument_list']:
+            if not option.get('skip', False):
                 try:
                     process_option(option)
                 except NYIError:
diff --git a/gen.py b/gen.py
index 98f5cf6..1b3f885 100644
--- a/gen.py
+++ b/gen.py
@@ -32,7 +32,6 @@
 TYPE_DERIVED_CPP = CodeTemplate.from_file(TEMPLATE_PATH + "/TypeDerived.cpp")
 TYPE_DERIVED_H = CodeTemplate.from_file(TEMPLATE_PATH + "/TypeDerived.h")
 TYPE_H = CodeTemplate.from_file(TEMPLATE_PATH + "/Type.h")
-TYPE_METHODS_H = CodeTemplate.from_file(TEMPLATE_PATH + "/TypeMethods.h")
 TYPE_CPP = CodeTemplate.from_file(TEMPLATE_PATH + "/Type.cpp")
 
 TENSOR_DERIVED_CPP = CodeTemplate.from_file(
@@ -225,8 +224,6 @@
                 for d in cwrap_parser.parse(file)]
 declarations += nn_parse.run(nn_files)
 declarations = preprocess_declarations.run(declarations)
-# print(yaml.dump(declarations))
-
 for fname, env in generators.items():
     write(fname, GENERATOR_DERIVED.substitute(env))
 
@@ -250,7 +247,6 @@
                 backend, density, scalar_type, declarations))
 
 write('Type.h', TYPE_H.substitute(top_env))
-write('TypeMethods.h', TYPE_METHODS_H.substitute(top_env))
 write('Type.cpp', TYPE_CPP.substitute(top_env))
 
 write('Tensor.h', TENSOR_H.substitute(top_env))
diff --git a/nn_parse.py b/nn_parse.py
index f92b72d..d328fe8 100644
--- a/nn_parse.py
+++ b/nn_parse.py
@@ -14,7 +14,7 @@
         'arg': typ + ' ' + arg.name,
     }
     if arg.is_optional:
-        result['default'] = 'nullptr'
+        result['is_nullable'] = True
     return result
 
 
diff --git a/preprocess_declarations.py b/preprocess_declarations.py
index 6c620c8..23f5115 100644
--- a/preprocess_declarations.py
+++ b/preprocess_declarations.py
@@ -77,7 +77,17 @@
 
 def handle_outputs_taken_as_arguments(options):
     new_options = []
+
+    def is_nullable(arg):
+        return (arg['type'] in {'THIntegerTensor*', 'THTensor*'} and
+                arg.get('default', '') in {'NULL', 'nullptr'})
+
     for option in options:
+        for arg in option['arguments']:
+            # mark arguments which can be null
+            if is_nullable(arg):
+                arg['is_nullable'] = True
+
         if any('output' in arg for arg in option['arguments']):
             allocate_option = deepcopy(option)
             # the allocating option needs to be marked
@@ -196,8 +206,11 @@
     declarations = [d for d in declarations if not exclude(d)]
     for declaration in declarations:
         common_with_cwrap.set_declaration_defaults(declaration)
-        common_with_cwrap.enumerate_options_due_to_default(
-            declaration,
+        declaration['options'] = [deepcopy(o) for o in declaration['options']]
+        for option in declaration['options']:
+            option['has_full_argument_list'] = True
+        declaration['options'] = common_with_cwrap.filter_unique_options(
+            declaration['options'],
             allow_kwarg=False,
             type_to_signature=TYPE_FORMAL_GENERIC,
             remove_self=True)
diff --git a/templates/Functions.h b/templates/Functions.h
index d61bc29..fc0deba 100644
--- a/templates/Functions.h
+++ b/templates/Functions.h
@@ -19,6 +19,7 @@
 ${function_declarations}
 
 static inline Type & infer_type(const Tensor & t) {
+  AT_ASSERT(t.defined(), "undefined Tensor");
   return t.type();
 }
 static inline Type & infer_type(const TensorList & tl) {
diff --git a/templates/Tensor.h b/templates/Tensor.h
index e96054d..7a3ebe6 100644
--- a/templates/Tensor.h
+++ b/templates/Tensor.h
@@ -1,9 +1,13 @@
 #pragma once
 
-#include "ATen/Type.h"
-#include "ATen/TensorImpl.h"
-#include "ATen/Utils.h"
+#include "ATen/Generator.h"
+#include "ATen/Scalar.h"
+#include "ATen/ScalarType.h"
 #include "ATen/TensorAccessor.h"
+#include "ATen/TensorImpl.h"
+#include "ATen/Storage.h"
+#include "ATen/SparseTensorRef.h"
+#include "ATen/Utils.h"
 
 namespace at {
 struct Type;
@@ -106,22 +110,10 @@
   Type & type() const {
     return pImpl->type();
   }
-  Tensor toType(const Type & t) const {
-    if(type().ID() ==t.ID())
-      return *this;
-    return t.copy(*this);
-  }
-  Tensor & copy_(const Tensor & src) {
-    resize_(src.sizes());
-    type().copy(src,*this);
-    return *this;
-  }
-  Tensor toType(ScalarType t) const {
-    return toType(type().toScalarType(t));
-  }
-  Tensor toBackend(Backend b) const {
-    return toType(type().toBackend(b));
-  }
+  inline Tensor toType(const Type & t) const;
+  inline Tensor & copy_(const Tensor & src);
+  inline Tensor toType(ScalarType t) const;
+  inline Tensor toBackend(Backend b) const;
 
   template<typename T>
   T * data() const;
diff --git a/templates/TensorMethods.h b/templates/TensorMethods.h
index c12365d..cd3adfc 100644
--- a/templates/TensorMethods.h
+++ b/templates/TensorMethods.h
@@ -6,6 +6,27 @@
 
 namespace at {
 
+inline Tensor Tensor::toType(const Type & t) const {
+  if(type().ID() ==t.ID())
+    return *this;
+  return t.copy(*this);
+}
+
+inline Tensor & Tensor::copy_(const Tensor & src) {
+  resize_(src.sizes());
+  type().copy(src,*this);
+  return *this;
+}
+
+inline Tensor Tensor::toType(ScalarType t) const {
+  return toType(type().toScalarType(t));
+}
+
+inline Tensor Tensor::toBackend(Backend b) const {
+  return toType(type().toBackend(b));
+}
+
+
 // all static inline to allow for inlining of the non-dynamic part of dispatch
 ${tensor_method_definitions}
 
diff --git a/templates/Type.cpp b/templates/Type.cpp
index b7b2424..7903245 100644
--- a/templates/Type.cpp
+++ b/templates/Type.cpp
@@ -1,5 +1,4 @@
 #include "ATen/Type.h"
-#include "ATen/TypeMethods.h"
 #include "ATen/Tensor.h"
 #include "ATen/Storage.h"
 #include "ATen/Scalar.h"
@@ -53,7 +52,7 @@
 }
 Tensor Type::scalarTensor(Scalar s) const {
   if(s.isBackedByTensor())
-    return s.t.toType(*this);
+    return Tensor(s.v.t, false).toType(*this);
   return tensor({}).fill_(s);
 }
 
diff --git a/templates/Type.h b/templates/Type.h
index 41a095d..704a33b 100644
--- a/templates/Type.h
+++ b/templates/Type.h
@@ -5,8 +5,11 @@
 
 #include "ATen/ATenGeneral.h"
 #include "ATen/ArrayRef.h"
+#include "ATen/Generator.h"
 #include "ATen/Half.h"
 #include "ATen/SparseTensorRef.h"
+#include "ATen/ScalarType.h"
+#include "ATen/Scalar.h"
 
 // To solve the conflict of s_addr in inaddr.h
 #ifdef _MSC_VER
@@ -19,42 +22,8 @@
 
 class Context;
 struct Storage;
-struct Tensor;
-class Scalar;
 struct Generator;
 
-#define AT_FORALL_SCALAR_TYPES(_) \
-_(uint8_t,Byte,i) \
-_(int8_t,Char,i) \
-_(double,Double,d) \
-_(float,Float,d) \
-_(int,Int,i) \
-_(int64_t,Long,i) \
-_(int16_t,Short,i) \
-_(Half,Half,d)
-
-enum class ScalarType {
-#define DEFINE_ENUM(_1,n,_2) \
-  n,
-  AT_FORALL_SCALAR_TYPES(DEFINE_ENUM)
-#undef DEFINE_ENUM
-  NumOptions
-};
-
-enum class Backend {
-  CPU,
-  CUDA,
-  SparseCPU,
-  SparseCUDA,
-  NumOptions
-};
-
-
-constexpr Backend kCPU = Backend::CPU;
-constexpr Backend kCUDA = Backend::CUDA;
-constexpr Backend kSparseCPU = Backend::SparseCPU;
-constexpr Backend kSparseCUDA = Backend::SparseCUDA;
-
 // Note [Undefined-dim versus 0-dim]
 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 // Unlike Torch, ATen treats zero-dimension tensors as having ONE
@@ -67,43 +36,12 @@
 // situation.
 constexpr int64_t kUndefinedDimensions = std::numeric_limits<int64_t>::min();
 
-static inline const char * toString(Backend b) {
-  switch(b) {
-    case Backend::CPU: return "CPU";
-    case Backend::CUDA: return "CUDA";
-    case Backend::SparseCPU: return "SparseCPU";
-    case Backend::SparseCUDA: return "SparseCUDA";
-    default: return "UNKNOWN_BACKEND";
-  }
-}
-
-#define DEFINE_CONSTANT(_,name,_2) \
-constexpr ScalarType k##name = ScalarType::name;
-
-AT_FORALL_SCALAR_TYPES(DEFINE_CONSTANT)
-#undef DEFINE_CONSTANT
-
-static inline const char * toString(ScalarType t) {
-#define DEFINE_CASE(_,name,_2) \
-  case ScalarType:: name : return #name;
-
-  switch(t) {
-    AT_FORALL_SCALAR_TYPES(DEFINE_CASE)
-    default:
-      return "UNKNOWN_SCALAR_TYPE";
-  }
-#undef DEFINE_CASE
-}
-
 enum class TypeID {
   ${type_ids}
   NumOptions
 };
 
 
-typedef ArrayRef<int64_t> IntList;
-typedef ArrayRef<Tensor> TensorList;
-
 struct ATen_CLASS Type {
   explicit Type(Context * context)
   : context(context) {}
diff --git a/templates/TypeMethods.h b/templates/TypeMethods.h
deleted file mode 100644
index 80875d2..0000000
--- a/templates/TypeMethods.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#pragma once
-
-#include "ATen/Type.h"
-#include "ATen/Context.h"
-#include "ATen/Scalar.h"
-
-namespace at {
-
-// inline non-dynamic type method definitions
-${type_method_inline_definitions}
-
-} //namespace at
diff --git a/test/atest.cpp b/test/atest.cpp
index bfe9c41..d44ae12 100644
--- a/test/atest.cpp
+++ b/test/atest.cpp
@@ -33,7 +33,7 @@
   bool threw = false;
   try {
     Tensor no;
-    add_out(foo,foo,no);
+    add_out(no,foo,foo);
   } catch (std::runtime_error&) {
     threw = true;
   }
diff --git a/test/basic.cpp b/test/basic.cpp
index 81716aa..3d45729 100644
--- a/test/basic.cpp
+++ b/test/basic.cpp
@@ -93,7 +93,7 @@
     Tensor d = type.ones({3, 4});
     Tensor r = type.zeros({3,4});
     for(auto i = 0; i < 100000; i++) {
-      add_out(r, d, r);
+      add_out(r, r, d);
     }
     auto end = std::chrono::high_resolution_clock::now();
     std::cout << std::dec << "   " << std::chrono::duration_cast<std::chrono::milliseconds>(end-begin).count() << " ms" << std::endl;
@@ -132,7 +132,7 @@
     std::cout << a << std::endl;
     std::cout << b << std::endl;
     std::cout << c << std::endl;
-    ASSERT(c.equal(addmv(0, type.zeros({3}), 1, a,b)));
+    ASSERT(c.equal(addmv(type.zeros({3}), a, b, 0, 1)));
   }
 
   {