Move default arguments to function declaration

 * Make alpha, beta in addmm kwarg_only
 * Move kwarg_only arguments to the end
 * _out variants now have output arguments at the beginning
diff --git a/aten/CMakeLists.txt b/aten/CMakeLists.txt
index 997d16c..79a4031 100644
--- a/aten/CMakeLists.txt
+++ b/aten/CMakeLists.txt
@@ -56,7 +56,7 @@
 endif()
 
 set(cwrap_files
-  ${CMAKE_CURRENT_SOURCE_DIR}/tools/Declarations.cwrap
+  ${CMAKE_CURRENT_SOURCE_DIR}/src/ATen/Declarations.cwrap
   ${CMAKE_CURRENT_SOURCE_DIR}/src/ATen/Local.cwrap
   ${CMAKE_CURRENT_SOURCE_DIR}/lib/THNN/generic/THNN.h
   ${CMAKE_CURRENT_SOURCE_DIR}/lib/THCUNN/generic/THCUNN.h
diff --git a/aten/src/ATen/ATen.h b/aten/src/ATen/ATen.h
index ba57b63..8792ad2 100644
--- a/aten/src/ATen/ATen.h
+++ b/aten/src/ATen/ATen.h
@@ -3,7 +3,6 @@
 #include "ATen/ATenGeneral.h"
 #include "ATen/Scalar.h"
 #include "ATen/Type.h"
-#include "ATen/TypeMethods.h"
 #include "ATen/Generator.h"
 #include "ATen/Context.h"
 #include "ATen/Storage.h"
diff --git a/aten/src/ATen/CheckGenerator.h b/aten/src/ATen/CheckGenerator.h
index d2013d5..ec7a23d 100644
--- a/aten/src/ATen/CheckGenerator.h
+++ b/aten/src/ATen/CheckGenerator.h
@@ -6,7 +6,9 @@
 namespace at {
 
 template <typename T>
-static inline T * check_generator(Generator* expr) {
+static inline T * check_generator(Generator * expr, Generator * defaultValue) {
+  if (!expr)
+    expr = defaultValue;
   if(auto result = dynamic_cast<T*>(expr))
     return result;
   runtime_error("Expected a '%s' but found '%s'", typeid(T).name(), typeid(expr).name());
diff --git a/aten/src/ATen/Context.cpp b/aten/src/ATen/Context.cpp
index a4a3093..63ff0a4 100644
--- a/aten/src/ATen/Context.cpp
+++ b/aten/src/ATen/Context.cpp
@@ -1,4 +1,5 @@
 #include "Context.h"
+
 #include <thread>
 #include <mutex>
 #include <sstream>
diff --git a/aten/tools/Declarations.cwrap b/aten/src/ATen/Declarations.cwrap
similarity index 86%
rename from aten/tools/Declarations.cwrap
rename to aten/src/ATen/Declarations.cwrap
index 54c34a0..c9deff3 100644
--- a/aten/tools/Declarations.cwrap
+++ b/aten/src/ATen/Declarations.cwrap
@@ -73,6 +73,10 @@
   return: self
   cname: resize
   cpu_half: True
+  before_call:
+    THPUtils_assert(arg_self->storage->flag & TH_STORAGE_RESIZABLE,
+      "calling resize_ on a tensor that has non-resizable storage. Clone it first "
+      "or create a new tensor instead.");
   arguments:
     - THTensor* self
     - arg: THSize* size
@@ -92,6 +96,17 @@
       long_args: True
 ]]
 [[
+  name: zeros_like
+  cname: zerosLike
+  variants:
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* input
+]]
+[[
   name: ones
   variants:
     - function
@@ -104,6 +119,17 @@
       long_args: True
 ]]
 [[
+  name: ones_like
+  cname: onesLike
+  variants:
+    - function
+  return: argument 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* input
+]]
+[[
   name: numel
   return: long
   cname: nElement
@@ -536,19 +562,13 @@
     - CPU
     - CUDA
   return: argument 0
-  options:
-      - arguments:
-        - arg: THTensor* result
-          output: True
-        - accreal start
-        - accreal end
-        - accreal step
-      - arguments:
-        - arg: THTensor* result
-          output: True
-        - accreal start
-        - accreal end
-        - CONSTANT 1
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - accreal start
+    - accreal end
+    - arg: accreal step
+      default: 1
 ]]
 [[
   name: scatter_
@@ -572,14 +592,13 @@
 [[
   name: scatter_add_
   return: argument 0
-  options:
-    - cname: scatterAdd
-      arguments:
-        - THTensor* self
-        - arg: long dim
-          wrap_dim: self
-        - THIndexTensor* index
-        - THTensor* src
+  cname: scatterAdd
+  arguments:
+    - THTensor* self
+    - arg: long dim
+      wrap_dim: self
+    - THIndexTensor* index
+    - THTensor* src
 ]]
 [[
   name: gather
@@ -652,7 +671,8 @@
       arguments:
         - arg: THTensor* result
           output: True
-        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other fallback
         - THTensor* other
 ]]
 [[
@@ -670,7 +690,8 @@
     - cname: cbitand
       arguments:
         - THTensor* self
-        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other inplace fallback
         - THTensor* other
 ]]
 [[
@@ -690,7 +711,8 @@
       arguments:
         - arg: THTensor* result
           output: True
-        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other fallback
         - THTensor* other
 ]]
 [[
@@ -708,7 +730,8 @@
     - cname: cbitor
       arguments:
         - THTensor* self
-        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other inplace fallback
         - THTensor* other
 ]]
 [[
@@ -728,7 +751,8 @@
       arguments:
         - arg: THTensor* result
           output: True
-        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other fallback
         - THTensor* other
 ]]
 [[
@@ -746,7 +770,8 @@
     - cname: cbitxor
       arguments:
         - THTensor* self
-        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other inplace fallback
         - THTensor* other
 ]]
 [[
@@ -766,7 +791,8 @@
       arguments:
         - arg: THTensor* result
           output: True
-        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other fallback
         - THTensor* other
 ]]
 [[
@@ -784,7 +810,8 @@
     - cname: clshift
       arguments:
         - THTensor* self
-        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other inplace fallback
         - THTensor* other
 ]]
 [[
@@ -804,7 +831,8 @@
       arguments:
         - arg: THTensor* result
           output: True
-        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other fallback
         - THTensor* other
 ]]
 [[
@@ -822,7 +850,8 @@
     - cname: crshift
       arguments:
         - THTensor* self
-        - THTensor* self
+        - arg: THTensor* self
+          broadcast: other inplace fallback
         - THTensor* other
 ]]
 [[
@@ -1296,19 +1325,8 @@
         - THTensor* self
         - arg: long dim
           wrap_dim: self
-        - bool keepdim
-    - cname: min
-      return: argument 0,1
-      before_call: maybeThrowBackCompatKeepdimWarn("min");
-      arguments:
-        - arg: THTensor* min
-          output: True
-        - arg: THIndexTensor* min_indices
-          output: True
-        - THTensor* self
-        - arg: long dim
-          wrap_dim: self
-        - CONSTANT false
+        - arg: bool keepdim
+          default: "false"
 ]]
 [[
   name: max
@@ -1338,19 +1356,8 @@
         - THTensor* self
         - arg: long dim
           wrap_dim: self
-        - bool keepdim
-    - cname: max
-      return: argument 0,1
-      before_call: maybeThrowBackCompatKeepdimWarn("max");
-      arguments:
-        - arg: THTensor* max
-          output: True
-        - arg: THIndexTensor* max_indices
-          output: True
-        - THTensor* self
-        - arg: long dim
-          wrap_dim: self
-        - CONSTANT false
+        - arg: bool keepdim
+          default: "false"
 ]]
 [[
   name: kthvalue
@@ -1360,50 +1367,18 @@
     - method
     - function
   return: argument 0,1
-  options:
-    - before_call: long __last_dim = THTensor_(nDimension)(LIBRARY_STATE ((THPTensor*)$arg2)->cdata)-1;
-      arguments:
-        - arg: THTensor* values
-          output: True
-        - arg: THIndexTensor* indices
-          output: True
-        - THTensor* self
-        - long k
-        - CONSTANT __last_dim
-        - bool keepdim
-    - before_call: |
-        long __last_dim = THTensor_(nDimension)(LIBRARY_STATE ((THPTensor*)$arg2)->cdata)-1;
-        maybeThrowBackCompatKeepdimWarn("kthvalue");
-      arguments:
-        - arg: THTensor* values
-          output: True
-        - arg: THIndexTensor* indices
-          output: True
-        - THTensor* self
-        - long k
-        - CONSTANT __last_dim
-        - CONSTANT false
-    - arguments:
-        - arg: THTensor* values
-          output: True
-        - arg: THIndexTensor* indices
-          output: True
-        - THTensor* self
-        - long k
-        - arg: long dim
-          wrap_dim: self
-        - bool keepdim
-    - before_call: maybeThrowBackCompatKeepdimWarn("kthvalue");
-      arguments:
-        - arg: THTensor* values
-          output: True
-        - arg: THIndexTensor* indices
-          output: True
-        - THTensor* self
-        - long k
-        - arg: long dim
-          wrap_dim: self
-        - CONSTANT false
+  arguments:
+    - arg: THTensor* values
+      output: True
+    - arg: THIndexTensor* indices
+      output: True
+    - THTensor* self
+    - long k
+    - arg: long dim
+      wrap_dim: self
+      default: __last_dim
+    - arg: bool keepdim
+      default: "false"
 ]]
 [[
   name: mode
@@ -1411,46 +1386,17 @@
     - method
     - function
   return: argument 0,1
-  options:
-    - before_call: long __last_dim = THTensor_(nDimension)(LIBRARY_STATE ((THPTensor*)$arg2)->cdata)-1;
-      arguments:
-        - arg: THTensor* values
-          output: True
-        - arg: THIndexTensor* indices
-          output: True
-        - THTensor* self
-        - CONSTANT __last_dim
-        - bool keepdim
-    - before_call: |
-        long __last_dim = THTensor_(nDimension)(LIBRARY_STATE ((THPTensor*)$arg2)->cdata)-1;
-        maybeThrowBackCompatKeepdimWarn("mode");
-      arguments:
-        - arg: THTensor* values
-          output: True
-        - arg: THIndexTensor* indices
-          output: True
-        - THTensor* self
-        - CONSTANT __last_dim
-        - CONSTANT false
-    - arguments:
-        - arg: THTensor* values
-          output: True
-        - arg: THIndexTensor* indices
-          output: True
-        - THTensor* self
-        - arg: long dim
-          wrap_dim: self
-        - bool keepdim
-    - before_call: maybeThrowBackCompatKeepdimWarn("mode");
-      arguments:
-        - arg: THTensor* values
-          output: True
-        - arg: THIndexTensor* indices
-          output: True
-        - THTensor* self
-        - arg: long dim
-          wrap_dim: self
-        - CONSTANT false
+  arguments:
+    - arg: THTensor* values
+      output: True
+    - arg: THIndexTensor* indices
+      output: True
+    - THTensor* self
+    - arg: long dim
+      wrap_dim: self
+      default: __last_dim
+    - arg: bool keepdim
+      default: "false"
 ]]
 [[
   name: median
@@ -1464,16 +1410,6 @@
       arguments:
         - THTensor* self
     - cname: median
-      before_call: long __last_dim = THTensor_(nDimension)(LIBRARY_STATE ((THPTensor*)$arg2)->cdata)-1;
-      arguments:
-        - arg: THTensor* values
-          output: True
-        - arg: THIndexTensor* indices
-          output: True
-        - THTensor* self
-        - CONSTANT __last_dim
-        - bool keepdim
-    - before_call: maybeThrowBackCompatKeepdimWarn("median");
       arguments:
         - arg: THTensor* values
           output: True
@@ -1482,16 +1418,8 @@
         - THTensor* self
         - arg: long dim
           wrap_dim: self
-        - CONSTANT false
-    - arguments:
-        - arg: THTensor* values
-          output: True
-        - arg: THIndexTensor* indices
-          output: True
-        - THTensor* self
-        - arg: long dim
-          wrap_dim: self
-        - bool keepdim
+        - arg: bool keepdim
+          default: "false"
 ]]
 [[
   name: sort
@@ -1499,34 +1427,17 @@
     - method
     - function
   return: argument 0,1
-  options:
-    - before_call: long __last_dim = THTensor_(nDimension)(LIBRARY_STATE ((THPTensor*)$arg2)->cdata)-1;
-      arguments:
-        - arg: THTensor* values
-          output: True
-        - arg: THIndexTensor* indices
-          output: True
-        - THTensor* self
-        - CONSTANT __last_dim
-        - CONSTANT false
-    - arguments:
-        - arg: THTensor* values
-          output: True
-        - arg: THIndexTensor* indices
-          output: True
-        - THTensor* self
-        - arg: long dim
-          wrap_dim: self
-        - CONSTANT false
-    - arguments:
-        - arg: THTensor* values
-          output: True
-        - arg: THIndexTensor* indices
-          output: True
-        - THTensor* self
-        - arg: long dim
-          wrap_dim: self
-        - bool descending
+  arguments:
+    - arg: THTensor* values
+      output: True
+    - arg: THIndexTensor* indices
+      output: True
+    - THTensor* self
+    - arg: long dim
+      default: __last_dim
+      wrap_dim: self
+    - arg: bool descending
+      default: "false"
 ]]
 [[
   name: topk
@@ -1534,31 +1445,20 @@
     - method
     - function
   return: argument 0,1
-  options:
-    - before_call: long __last_dim = THTensor_(nDimension)(LIBRARY_STATE ((THPTensor*)$arg2)->cdata)-1;
-      arguments:
-        - arg: THTensor* values
-          output: True
-        - arg: THIndexTensor* indices
-          output: True
-        - THTensor* self
-        - long k
-        - CONSTANT __last_dim
-        - CONSTANT true
-        - CONSTANT true
-    - arguments:
-        - arg: THTensor* values
-          output: True
-        - arg: THIndexTensor* indices
-          output: True
-        - THTensor* self
-        - long k
-        - arg: long dim
-          wrap_dim: self
-        - arg: bool largest
-          default: "true"
-        - arg: bool sorted
-          default: "true"
+  arguments:
+    - arg: THTensor* values
+      output: True
+    - arg: THIndexTensor* indices
+      output: True
+    - THTensor* self
+    - long k
+    - arg: long dim
+      default: __last_dim
+      wrap_dim: self
+    - arg: bool largest
+      default: "true"
+    - arg: bool sorted
+      default: "true"
 ]]
 [[
   name: all
@@ -2277,17 +2177,8 @@
         - THTensor* self
         - arg: long dim
           wrap_dim: self
-        - bool keepdim
-    - cname: mean
-      return: argument 0
-      before_call: maybeThrowBackCompatKeepdimWarn("mean");
-      arguments:
-        - arg: THTensor* destination
-          output: True
-        - THTensor* self
-        - arg: long dim
-          wrap_dim: self
-        - CONSTANT false
+        - arg: bool keepdim
+          default: "false"
 ]]
 [[
   name: var
@@ -2320,21 +2211,8 @@
           if_true: 0
           if_false: 1
           default: 0
-        - bool keepdim
-    - cname: var
-      return: argument 0
-      before_call: maybeThrowBackCompatKeepdimWarn("var");
-      arguments:
-        - arg: THTensor* destination
-          output: True
-        - THTensor* self
-        - arg: long dim
-          wrap_dim: self
-        - arg: bool unbiased
-          if_true: 0
-          if_false: 1
-          default: 0
-        - CONSTANT false
+        - arg: bool keepdim
+          default: "false"
 ]]
 [[
   name: std
@@ -2367,21 +2245,8 @@
           if_true: 0
           if_false: 1
           default: 0
-        - bool keepdim
-    - cname: std
-      return: argument 0
-      before_call: maybeThrowBackCompatKeepdimWarn("std");
-      arguments:
-        - arg: THTensor* destination
-          output: True
-        - THTensor* self
-        - arg: long dim
-          wrap_dim: self
-        - arg: bool unbiased
-          if_true: 0
-          if_false: 1
-          default: 0
-        - CONSTANT false
+        - arg: bool keepdim
+          default: "false"
 ]]
 [[
   name: norm
@@ -2409,18 +2274,8 @@
         - real p
         - arg: long dim
           wrap_dim: self
-        - bool keepdim
-    - cname: norm
-      return: argument 0
-      before_call: maybeThrowBackCompatKeepdimWarn("norm");
-      arguments:
-        - arg: THTensor* destination
-          output: True
-        - THTensor* self
-        - real p
-        - arg: long dim
-          wrap_dim: self
-        - CONSTANT false
+        - arg: bool keepdim
+          default: "false"
 ]]
 [[
   name: renorm
@@ -2432,17 +2287,15 @@
   variants:
     - method
     - function
-  options:
-    - cname: renorm
-      return: argument 0
-      arguments:
-        - arg: THTensor* destination
-          output: True
-        - THTensor* self
-        - real p
-        - arg: long dim
-          wrap_dim: self
-        - real maxnorm
+  return: argument 0
+  arguments:
+    - arg: THTensor* destination
+      output: True
+    - THTensor* self
+    - real p
+    - arg: long dim
+      wrap_dim: self
+    - real maxnorm
 ]]
 [[
   name: renorm_
@@ -2451,16 +2304,15 @@
   backends:
     - CPU
     - CUDA
-  options:
-    - cname: renorm
-      return: self
-      arguments:
-        - THTensor* self
-        - THTensor* self
-        - real p
-        - arg: long dim
-          wrap_dim: self
-        - real maxnorm
+  cname: renorm
+  return: self
+  arguments:
+    - THTensor* self
+    - THTensor* self
+    - real p
+    - arg: long dim
+      wrap_dim: self
+    - real maxnorm
 ]]
 [[
   name: dist
@@ -2518,6 +2370,9 @@
   name: neg
   types:
     - floating_point
+    - Long
+    - Int
+    - Short
   backends:
     - CPU
     - CUDA
@@ -2536,6 +2391,9 @@
   name: neg_
   types:
     - floating_point
+    - Long
+    - Int
+    - Short
   backends:
     - CPU
     - CUDA
@@ -2715,35 +2573,16 @@
     - method
     - function
   return: argument 0
-  options:
-    - arguments:
-      - arg: THTensor* destination
-        output: True
-      - THTensor* self
-      - CONSTANT 100
-      - CONSTANT 0
-      - CONSTANT 0
-    - arguments:
-      - arg: THTensor* destination
-        output: True
-      - THTensor* self
-      - long bins
-      - CONSTANT 0
-      - CONSTANT 0
-    - arguments:
-      - arg: THTensor* destination
-        output: True
-      - THTensor* self
-      - long bins
-      - real min
-      - CONSTANT 0
-    - arguments:
-      - arg: THTensor* destination
-        output: True
-      - THTensor* self
-      - long bins
-      - real min
-      - real max
+  arguments:
+    - arg: THTensor* destination
+      output: True
+    - THTensor* self
+    - arg: long bins
+      default: 100
+    - arg: real min
+      default: 0
+    - arg: real max
+      default: 0
 ]]
 [[
   name: zero_
@@ -2770,17 +2609,8 @@
         - THTensor* self
         - arg: long dim
           wrap_dim: self
-        - bool keepdim
-    - cname: sum
-      return: argument 0
-      before_call: maybeThrowBackCompatKeepdimWarn("sum");
-      arguments:
-        - arg: THTensor* result
-          output: True
-        - THTensor* self
-        - arg: long dim
-          wrap_dim: self
-        - CONSTANT false
+        - arg: bool keepdim
+          default: "false"
 ]]
 [[
   name: prod
@@ -2800,17 +2630,8 @@
         - THTensor* self
         - arg: long dim
           wrap_dim: self
-        - bool keepdim
-    - cname: prod
-      return: argument 0
-      before_call: maybeThrowBackCompatKeepdimWarn("prod");
-      arguments:
-        - arg: THTensor* result
-          output: True
-        - THTensor* self
-        - arg: long dim
-          wrap_dim: self
-        - CONSTANT false
+        - arg: bool keepdim
+          default: "false"
 ]]
 [[
   name: cumsum
@@ -2873,12 +2694,15 @@
     - function
   return: argument 0
   options:
-    - cname: add
+    - cname: add_scaled
       arguments:
         - arg: THTensor* result
           output: True
         - THTensor* self
         - real value
+        - arg: real alpha
+          default: AS_REAL(1)
+          kwarg_only: True
     - cname: cadd
       aten_sparse: True
       arguments:
@@ -2886,8 +2710,9 @@
           output: True
         - arg: THTensor* self
           broadcast: other fallback
-        - arg: real value
+        - arg: real alpha
           default: AS_REAL(1)
+          kwarg_only: True
         - THTensor* other
     - sparse: True
       cname: spcadd
@@ -2896,27 +2721,32 @@
         - arg: THTensor* result
           output: True
         - THTensor* self
-        - arg: real value
+        - arg: real alpha
           default: AS_REAL(1)
+          kwarg_only: True
         - THSTensor* other
 ]]
 [[
   name: add_
   return: argument 0
   options:
-    - cname: add
+    - cname: add_scaled
       arguments:
         - THTensor* self
         - THTensor* self
         - real value
+        - arg: real alpha
+          default: AS_REAL(1)
+          kwarg_only: True
     - cname: cadd
       aten_sparse: True
       arguments:
         - THTensor* self
         - arg: THTensor* self
           broadcast: other inplace fallback
-        - arg: real value
+        - arg: real alpha
           default: AS_REAL(1)
+          kwarg_only: True
         - THTensor* other
     - sparse: True
       cname: spcadd
@@ -2924,8 +2754,9 @@
       arguments:
         - THTensor* self
         - THTensor* self
-        - arg: real value
+        - arg: real alpha
           default: AS_REAL(1)
+          kwarg_only: True
         - THSTensor* other
 ]]
 [[
@@ -2935,38 +2766,46 @@
     - function
   return: argument 0
   options:
-    - cname: sub
+    - cname: sub_scaled
       arguments:
         - arg: THTensor* result
           output: True
         - THTensor* self
         - real value
+        - arg: real alpha
+          default: AS_REAL(1)
+          kwarg_only: True
     - cname: csub
       arguments:
         - arg: THTensor* result
           output: True
         - arg: THTensor* self
           broadcast: other fallback
-        - arg: real value
+        - arg: real alpha
           default: AS_REAL(1)
+          kwarg_only: True
         - THTensor* other
 ]]
 [[
   name: sub_
   return: argument 0
   options:
-    - cname: sub
+    - cname: sub_scaled
       arguments:
         - THTensor* self
         - THTensor* self
         - real value
+        - arg: real alpha
+          default: AS_REAL(1)
+          kwarg_only: True
     - cname: csub
       arguments:
         - THTensor* self
         - arg: THTensor* self
           broadcast: other inplace fallback
-        - arg: real value
+        - arg: real alpha
           default: AS_REAL(1)
+          kwarg_only: True
         - THTensor* other
 ]]
 [[
@@ -3248,20 +3087,16 @@
   name: eye
   backends:
     - CPU
+    - CUDA
   variants:
     - function
   return: argument 0
-  options:
-    - arguments:
-      - arg: THTensor* result
-        output: True
-      - long n
-      - argument 1
-    - arguments:
-      - arg: THTensor* result
-        output: True
-      - long n
-      - long m
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - long n
+    - arg: long m
+      default: 1
 ]]
 [[
   name: diag
@@ -3269,13 +3104,12 @@
     - method
     - function
   return: argument 0
-  options:
-    - arguments:
-        - arg: THTensor* result
-          output: True
-        - THTensor* self
-        - arg: long diagonal
-          default: 0
+  arguments:
+    - arg: THTensor* result
+      output: True
+    - THTensor* self
+    - arg: long diagonal
+      default: 0
 ]]
 [[
   name: addmm
@@ -3289,10 +3123,12 @@
         output: True
       - arg: real beta
         default: AS_REAL(1)
+        kwarg_only: True
       - arg: THTensor* self
         broadcast: mat1,mat2 dims:mat1.dim0,mat2.dim1
       - arg: real alpha
         default: AS_REAL(1)
+        kwarg_only: True
       - THTensor* mat1
       - THTensor* mat2
     - cname: spaddmm
@@ -3302,9 +3138,11 @@
           output: True
         - arg: real beta
           default: AS_REAL(1)
+          kwarg_only: True
         - THTensor* self
         - arg: real alpha
           default: AS_REAL(1)
+          kwarg_only: True
         - THSTensor* mat1
         - THTensor* mat2
 ]]
@@ -3317,9 +3155,11 @@
       - THTensor* self
       - arg: real beta
         default: AS_REAL(1)
+        kwarg_only: True
       - THTensor* self
       - arg: real alpha
         default: AS_REAL(1)
+        kwarg_only: True
       - THTensor* mat1
       - THTensor* mat2
     - cname: spaddmm
@@ -3328,9 +3168,11 @@
         - arg: THTensor* self
         - arg: real beta
           default: AS_REAL(1)
+          kwarg_only: True
         - THTensor* self
         - arg: real alpha
           default: AS_REAL(1)
+          kwarg_only: True
         - THSTensor* mat1
         - THTensor* mat2
 ]]
@@ -3345,10 +3187,12 @@
       output: True
     - arg: real beta
       default: AS_REAL(1)
+      kwarg_only: True
     - arg: THTensor* self
       broadcast: mat,vec dims:mat.dim0
     - arg: real alpha
       default: AS_REAL(1)
+      kwarg_only: True
     - THTensor* mat
     - THTensor* vec
 ]]
@@ -3360,9 +3204,11 @@
     - THTensor* self
     - arg: real beta
       default: AS_REAL(1)
+      kwarg_only: True
     - THTensor* self
     - arg: real alpha
       default: AS_REAL(1)
+      kwarg_only: True
     - THTensor* mat
     - THTensor* vec
 ]]
@@ -3377,10 +3223,12 @@
       output: True
     - arg: real beta
       default: AS_REAL(1)
+      kwarg_only: True
     - arg: THTensor* self
       broadcast: vec1,vec2 dims:vec1.dim0,vec2.dim0
     - arg: real alpha
       default: AS_REAL(1)
+      kwarg_only: True
     - THTensor* vec1
     - THTensor* vec2
 ]]
@@ -3392,9 +3240,11 @@
     - THTensor* self
     - arg: real beta
       default: AS_REAL(1)
+      kwarg_only: True
     - THTensor* self
     - arg: real alpha
       default: AS_REAL(1)
+      kwarg_only: True
     - THTensor* vec1
     - THTensor* vec2
 ]]
@@ -3516,10 +3366,12 @@
       output: True
     - arg: real beta
       default: AS_REAL(1)
+      kwarg_only: True
     - arg: THTensor* self
       broadcast: batch1,batch2 dims:batch1.dim1,batch2.dim2
     - arg: real alpha
       default: AS_REAL(1)
+      kwarg_only: True
     - THTensor* batch1
     - THTensor* batch2
 ]]
@@ -3531,9 +3383,11 @@
     - THTensor* self
     - arg: real beta
       default: AS_REAL(1)
+      kwarg_only: True
     - THTensor* self
     - arg: real alpha
       default: AS_REAL(1)
+      kwarg_only: True
     - THTensor* batch1
     - THTensor* batch2
 ]]
@@ -3548,10 +3402,12 @@
       output: True
     - arg: real beta
       default: AS_REAL(1)
+      kwarg_only: True
     - arg: THTensor* self
       broadcast: batch1,batch2 dims:batch1.dim0,batch1.dim1,batch2.dim2
     - arg: real alpha
       default: AS_REAL(1)
+      kwarg_only: True
     - THTensor* batch1
     - THTensor* batch2
 ]]
@@ -3563,9 +3419,11 @@
     - THTensor* self
     - arg: real beta
       default: AS_REAL(1)
+      kwarg_only: True
     - THTensor* self
     - arg: real alpha
       default: AS_REAL(1)
+      kwarg_only: True
     - THTensor* batch1
     - THTensor* batch2
 ]]
@@ -3582,6 +3440,7 @@
       broadcast: tensor1,tensor2 fallback
     - arg: real value
       default: AS_REAL(1)
+      kwarg_only: True
     - THTensor* tensor1
     - THTensor* tensor2
 ]]
@@ -3596,6 +3455,7 @@
           broadcast: tensor1,tensor2 inplace fallback
         - arg: real value
           default: AS_REAL(1)
+          kwarg_only: True
         - THTensor* tensor1
         - THTensor* tensor2
     - cname: spaddcmul
@@ -3606,6 +3466,7 @@
         - THTensor* self
         - arg: real value
           default: AS_REAL(1)
+          kwarg_only: True
         - THSTensor* tensor1
         - THSTensor* tensor2
 ]]
@@ -3622,6 +3483,7 @@
       broadcast: tensor1,tensor2 fallback
     - arg: real value
       default: AS_REAL(1)
+      kwarg_only: True
     - THTensor* tensor1
     - THTensor* tensor2
 ]]
@@ -3635,6 +3497,7 @@
       broadcast: tensor1,tensor2 inplace fallback
     - arg: real value
       default: AS_REAL(1)
+      kwarg_only: True
     - THTensor* tensor1
     - THTensor* tensor2
 ]]
@@ -4048,14 +3911,14 @@
         - arg: THGenerator* generator
           default: THPDefaultGenerator->cdata
           kwarg_only: True
-    - cname: random1__
+    - cname: cappedRandom
       arguments:
         - THTensor* self
         - arg: THGenerator* generator
           default: THPDefaultGenerator->cdata
           kwarg_only: True
         - long to
-    - cname: random2__
+    - cname: clampedRandom
       arguments:
         - THTensor* self
         - arg: THGenerator* generator
@@ -4134,7 +3997,6 @@
           default: THPDefaultGenerator->cdata
           kwarg_only: True
         - arg: double mean
-          default: 0
         - THTensor* std
     - cname: normal_means_stddevs
       arguments:
@@ -4302,6 +4164,7 @@
   defined_if: "!IS_DISTRIBUTED"
   backends:
     - CPU
+    - CUDA
   return: self
   options:
     - cname: bernoulli
@@ -4318,54 +4181,12 @@
         - arg: THGenerator* generator
           default: THPDefaultGenerator->cdata
           kwarg_only: True
-        - THFloatTensor* float_p
+        - BackendFloatTensor* float_p
     - cname: bernoulli_DoubleTensor
       arguments:
         - THTensor* self
         - arg: THGenerator* generator
           default: THPDefaultGenerator->cdata
           kwarg_only: True
-        - THDoubleTensor* float_p
-]]
-[[
-  name: bernoulli
-  types:
-    - Float
-    - Double
-  backends:
-    - CUDA
-  return: argument 0
-  variants:
-    - method
-    - function
-  cname: BERNOULLI_TENSOR
-  before_call:
-    THTensor_(resizeAs)(LIBRARY_STATE ((THPTensor*)$arg0)->cdata, ((THPTensor*)$arg1)->cdata);
-  arguments:
-    - arg: THTensor* output
-      output: True
-      resize: self
-    - THTensor* self
-]]
-[[
-  name: bernoulli_
-  types:
-    - floating_point
-  backends:
-    - CUDA
-  return: self
-  options:
-    - cname: bernoulli
-      arguments:
-        - THTensor* self
-        - arg: double p
-          default: 0.5
-    - cname: bernoulli_FloatTensor
-      arguments:
-        - THTensor* self
-        - THCudaTensor* float_p
-    - cname: bernoulli_DoubleTensor
-      arguments:
-        - THTensor* self
-        - THCudaDoubleTensor* float_p
+        - BackendDoubleTensor* float_p
 ]]
diff --git a/aten/src/ATen/Generator.h b/aten/src/ATen/Generator.h
index e9a9d08..ec01212 100644
--- a/aten/src/ATen/Generator.h
+++ b/aten/src/ATen/Generator.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <stdint.h>
+
 namespace at {
 
 struct Generator {
diff --git a/aten/src/ATen/Half.h b/aten/src/ATen/Half.h
index aac6db2..bfb1cc6 100644
--- a/aten/src/ATen/Half.h
+++ b/aten/src/ATen/Half.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include<stdint.h>
+#include <stdint.h>
 #ifdef AT_CUDA_ENABLED
 #include <cuda.h>
 #include <cuda_runtime.h>
diff --git a/aten/src/ATen/Local.cwrap b/aten/src/ATen/Local.cwrap
index f381056..ede79c4 100644
--- a/aten/src/ATen/Local.cwrap
+++ b/aten/src/ATen/Local.cwrap
@@ -30,8 +30,11 @@
     - cname: newWithSize
       arguments:
         - THSize* size
+        - CONSTANT NULL
+    - cname: newWithSize
+      arguments:
+        - THSize* size
         - arg: THStride* stride
-          default: NULL
     - cname: newWithStorage
       arguments:
         - THStorage* storage
diff --git a/aten/src/ATen/Scalar.cpp b/aten/src/ATen/Scalar.cpp
index 3529236..bd61f43 100644
--- a/aten/src/ATen/Scalar.cpp
+++ b/aten/src/ATen/Scalar.cpp
@@ -1,8 +1,30 @@
 #include "ATen/Scalar.h"
+
 #include <TH/TH.h>
 
+#include "ATen/Tensor.h"
+#include "ATen/Context.h"
+
 namespace at {
 
+Scalar::Scalar(const Tensor & t)
+: tag(Tag::HAS_t) {
+  v.t = t.get();
+  v.t->retain();
+  AT_ASSERT(t.dim() == 0,"Attempting to create a Scalar from a %d dim tensor",t.dim());
+}
+
+Tensor Scalar::toTensor() const {
+  if (Tag::HAS_t == tag) {
+    return Tensor(v.t, true);
+  } else if (Tag::HAS_d == tag) {
+    return CPU(kDouble).scalarTensor(*this);
+  } else {
+    assert(Tag::HAS_i == tag);
+    return CPU(kLong).scalarTensor(*this);
+  }
+}
+
 template<> Half convert(double f) {
   float t = static_cast<float>(f);
   Half h;
diff --git a/aten/src/ATen/Scalar.h b/aten/src/ATen/Scalar.h
index 94bb069..945260c 100644
--- a/aten/src/ATen/Scalar.h
+++ b/aten/src/ATen/Scalar.h
@@ -1,24 +1,32 @@
 #pragma once
 
+#include <assert.h>
 #include <stdint.h>
 #include <stdexcept>
 #include <string>
-#include "ATen/Context.h"
+
+#include "ATen/ATenGeneral.h"
 #include "ATen/Half.h"
-#include "ATen/Type.h"
-#include "ATen/Utils.h"
-#include "ATen/Tensor.h"
+#include "ATen/ScalarType.h"
+#include "ATen/TensorImpl.h"
+
 
 namespace at {
 
+struct TensorImpl;
+struct Tensor;
+
 class Scalar {
 public:
   Scalar() : Scalar(int64_t(0)) {}
-
-  explicit Scalar(const Tensor & t)
-  : tag(Tag::HAS_t), t(t) {
-    AT_ASSERT(t.dim() == 0,"Attempting to create a Scalar from a %d dim tensor",t.dim());
+  ~Scalar() {
+    if (Tag::HAS_t == tag) {
+      v.t->release();
+    }
   }
+
+  explicit Scalar(const Tensor & t);
+
 #define DEFINE_IMPLICIT_CTOR(type,name,member) \
   Scalar(type vv) \
   : tag(Tag::HAS_##member) { \
@@ -46,7 +54,7 @@
     if (Tag::HAS_t != tag) {
       return *this;
     }
-    return t.pImpl->localScalar();
+    return v.t->localScalar();
   }
 
 #define DEFINE_ACCESSOR(type,name,member) \
@@ -69,16 +77,7 @@
     } \
   }
 
-  Tensor toTensor() const {
-    if (Tag::HAS_t == tag) {
-      return t;
-    } else if (Tag::HAS_d == tag) {
-      return CPU(kDouble).scalarTensor(*this);
-    } else {
-      assert(Tag::HAS_i == tag);
-      return CPU(kLong).scalarTensor(*this);
-    }
-  }
+  Tensor toTensor() const;
 
   AT_FORALL_SCALAR_TYPES(DEFINE_ACCESSOR)
 
@@ -103,10 +102,8 @@
   union {
     double d;
     int64_t i;
+    TensorImpl* t;
   } v;
-  Tensor t; //Note: cannot be in union be cause of copy/destruct behavior
-            //ideally we try to pack this structure tighter if it becomes
-            //a performance problem.
   friend struct Type;
 };
 
diff --git a/aten/src/ATen/ScalarType.h b/aten/src/ATen/ScalarType.h
new file mode 100644
index 0000000..034c4d0
--- /dev/null
+++ b/aten/src/ATen/ScalarType.h
@@ -0,0 +1,74 @@
+#pragma once
+
+#include <stdint.h>
+
+#include "ATen/ArrayRef.h"
+#include "ATen/ATenGeneral.h"
+#include "ATen/Half.h"
+
+namespace at {
+
+#define AT_FORALL_SCALAR_TYPES(_) \
+_(uint8_t,Byte,i) \
+_(int8_t,Char,i) \
+_(double,Double,d) \
+_(float,Float,d) \
+_(int,Int,i) \
+_(int64_t,Long,i) \
+_(int16_t,Short,i) \
+_(Half,Half,d)
+
+enum class ScalarType {
+#define DEFINE_ENUM(_1,n,_2) \
+  n,
+  AT_FORALL_SCALAR_TYPES(DEFINE_ENUM)
+#undef DEFINE_ENUM
+  NumOptions
+};
+
+enum class Backend {
+  CPU,
+  CUDA,
+  SparseCPU,
+  SparseCUDA,
+  NumOptions
+};
+
+constexpr Backend kCPU = Backend::CPU;
+constexpr Backend kCUDA = Backend::CUDA;
+constexpr Backend kSparseCPU = Backend::SparseCPU;
+constexpr Backend kSparseCUDA = Backend::SparseCUDA;
+
+static inline const char * toString(Backend b) {
+  switch(b) {
+    case Backend::CPU: return "CPU";
+    case Backend::CUDA: return "CUDA";
+    case Backend::SparseCPU: return "SparseCPU";
+    case Backend::SparseCUDA: return "SparseCUDA";
+    default: return "UNKNOWN_BACKEND";
+  }
+}
+
+#define DEFINE_CONSTANT(_,name,_2) \
+constexpr ScalarType k##name = ScalarType::name;
+
+AT_FORALL_SCALAR_TYPES(DEFINE_CONSTANT)
+#undef DEFINE_CONSTANT
+
+static inline const char * toString(ScalarType t) {
+#define DEFINE_CASE(_,name,_2) \
+  case ScalarType:: name : return #name;
+
+  switch(t) {
+    AT_FORALL_SCALAR_TYPES(DEFINE_CASE)
+    default:
+      return "UNKNOWN_SCALAR_TYPE";
+  }
+#undef DEFINE_CASE
+}
+
+struct Tensor;
+typedef ArrayRef<int64_t> IntList;
+typedef ArrayRef<Tensor> TensorList;
+
+} // namespace at
diff --git a/aten/src/ATen/Storage.h b/aten/src/ATen/Storage.h
index 5554f24..c6aa270 100644
--- a/aten/src/ATen/Storage.h
+++ b/aten/src/ATen/Storage.h
@@ -1,10 +1,11 @@
 #pragma once
 
 #include "ATen/Scalar.h"
-#include "ATen/Type.h"
 
 namespace at {
 
+struct Type;
+
 struct Storage {
   Storage() {}
   Storage(const Storage& other) = delete;
diff --git a/aten/src/ATen/TensorAccessor.h b/aten/src/ATen/TensorAccessor.h
index 6d9cd83..e51af27 100644
--- a/aten/src/ATen/TensorAccessor.h
+++ b/aten/src/ATen/TensorAccessor.h
@@ -1,8 +1,9 @@
 #pragma once
+
 #include <cstddef>
 #include <stdint.h>
 
-#include "ATen/Type.h"
+#include "ATen/ScalarType.h"
 
 namespace at {
 
diff --git a/aten/src/ATen/TensorImpl.h b/aten/src/ATen/TensorImpl.h
index ad038f5..021f0b5 100644
--- a/aten/src/ATen/TensorImpl.h
+++ b/aten/src/ATen/TensorImpl.h
@@ -1,13 +1,15 @@
 #pragma once
 
 #include <atomic>
-
-#include "ATen/Type.h"
 #include <iostream>
+
+#include "ATen/ScalarType.h"
+
 namespace at {
 
 struct Type;
 class Scalar;
+
 struct TensorImpl {
   explicit TensorImpl(Type * type)
   :  refcount(1), is_scalar(false), type_(type) {}
diff --git a/aten/src/ATen/function_wrapper.py b/aten/src/ATen/function_wrapper.py
index fb60797..16c843f 100644
--- a/aten/src/ATen/function_wrapper.py
+++ b/aten/src/ATen/function_wrapper.py
@@ -8,12 +8,12 @@
     string_type = basestring
 
 # temporary things we cannot handle
-EXCLUDE_PATTERN = "bernoulli.*|normal.*|exponential.*|random.*|arange.*"
+EXCLUDE_PATTERN = "bernoulli.*"
 # what has to be done to add a Operation ...
 # 1. if broadcasting or without the full list of arguments, add a non-virtual
 #    declaration under Type.h
 TYPE_METHOD_DECLARATION_NON_VIRTUAL = CodeTemplate("""\
-${return_type} ${method_prefix}${api_name}(${formals}) const;
+${return_type} ${method_prefix}${api_name}(${formals_with_defaults}) const;
 """)
 # 2. broadcasting functions are implemented in Type.cpp
 TYPE_METHOD_DEFINITION_BROADCAST = CodeTemplate("""\
@@ -23,46 +23,40 @@
     return ${method_prefix_derived}${api_name}(${broadcast_modified_actuals});
 }
 """)
-# 3. functions without the full list of arguments are implemented in TypeMethods.h
-TYPE_METHOD_INLINE = CodeTemplate("""\
-inline ${return_type} Type::${method_prefix}${api_name}(${formals}) const {
-    ${return_call}${method_prefix}${api_name}(${actuals_with_constants});
-}
-""")
-# 4. add virtual dispatch declaration to Type.h and default impl to Type.cpp
+# 3. add virtual dispatch declaration to Type.h and default impl to Type.cpp
 TYPE_METHOD_DECLARATION = CodeTemplate("""\
-virtual ${return_type} ${method_prefix}${api_name}(${formals}) const;
+virtual ${return_type} ${method_prefix}${api_name}(${formals_with_defaults}) const;
 """)
 TYPE_METHOD_DEFINITION = CodeTemplate("""\
 ${return_type} Type::${method_prefix}${api_name}(${formals}) const {
     throw std::runtime_error(std::string("${api_name} is not implemented for type ") + toString());
 }
 """)
-# 5. add virtual override to TypeDerived.h
+# 4. add virtual override to TypeDerived.h
 TYPE_DERIVED_DECLARATION = CodeTemplate("""\
 virtual ${return_type} ${method_prefix_derived}${api_name}(${formals}) const override;
 """)
-# 6. add override definition to TypeDerived.cpp
+# 5. add override definition to TypeDerived.cpp
 TYPE_DERIVED_DEFINITION = CodeTemplate("""\
 ${return_type} ${Type}::${method_prefix_derived}${api_name}(${formals}) const {
     ${type_definition_body}
 }
 """)
-# 7. add non-virtual declaration to Tensor.h
+# 6. add non-virtual declaration to Tensor.h
 TENSOR_METHOD_DECLARATION = CodeTemplate("""\
-${return_type} ${api_name}(${method_formals})${const_mark};
+${return_type} ${api_name}(${method_formals_with_defaults})${const_mark};
 """)
-# 8. add non-virtual declaration to Tensor.cpp
+# 7. add non-virtual declaration to Tensor.cpp
 TENSOR_METHOD_DEFINITION = CodeTemplate("""\
 inline ${return_type} Tensor::${api_name}(${method_formals})${const_mark} {
     return type().${method_prefix}${api_name}(${method_actuals});
 }
 """)
-# 9. add a method declaration in Functions.h
+# 8. add a method declaration in Functions.h
 FUNCTION_DECLARATION = CodeTemplate("""\
-static inline ${return_type} ${api_name}(${formals});
+static inline ${return_type} ${api_name}(${formals_with_defaults});
 """)
-# 10. add a method definition in Functions.cpp
+# 9. add method definition in Functions.h
 FUNCTION_DEFINITION = CodeTemplate("""\
 static inline ${return_type} ${api_name}(${formals}) {
     return ${inferred_type}.${api_name}(${actuals});
@@ -97,7 +91,7 @@
     'THIndexTensor*': 'Tensor &',
     'THIntegerTensor*': 'Tensor &',
     'THStorage*': 'Storage &',
-    'THGenerator*': 'Generator &',
+    'THGenerator*': 'Generator *',
     'THSize*': 'IntList',
     'THStride*': 'IntList',
     'accreal': 'Scalar',
@@ -111,7 +105,7 @@
     'THIndexTensor*': 'IndexTensor',
     'THIntegerTensor*': 'IntegerTensor',
     'THStorage*': 'Storage',
-    'THGenerator*': 'Generator',
+    'THGenerator*': 'Generator*',
     'THSize*': 'IntList',
     'THStride*': 'IntList',
     'accreal': 'accreal',
@@ -143,7 +137,7 @@
         CodeTemplate(
             'checked_cast<${Backend}IntTensor>(${arg_name}.pImpl,"${arg_name}",${arg_pos}, ${null_okay})'),
     'THStorage*': CodeTemplate('checked_cast<${Storage}>(&${arg_name},"${arg_name}",${arg_pos}, false)'),
-    'THGenerator*': CodeTemplate('check_generator<${Backend}Generator>(&${arg_name})'),
+    'THGenerator*': CodeTemplate('check_generator<${Backend}Generator>(${arg_name}, &context->defaultGenerator(backend()))'),
     'THSize*': CodeTemplate('THLongStorageView::make(${arg_name}, true)'),
     'THStride*': CodeTemplate('THLongStorageView::make(${arg_name}, false, true)'),
     'real': CodeTemplate('${arg_name}.to${ScalarName}()'),
@@ -176,17 +170,17 @@
 CONSTANT_REPLACEMENTS = [
     ('AS_REAL', '${AS_REAL}'),
     ('THPDefaultGenerator->cdata',
-     'dynamic_cast<${Generator}&>(context->defaultGenerator(backend())).generator'),
+     'dynamic_cast<${Generator}&>().generator'),
     ('__storage_size.get\\(\\)',
      'THLongStorageView::make(static_cast<int64_t>(storage.size()))'),
     ('__last_dim', 'self.ndimension()-1'),
 ]
 
-# Replacements for constants when calling other ATen functions
-INLINE_CONSTANT_REPLACEMENTS = [
+# Replacements for constants in header file function definitions
+HEADER_CONSTANT_REPLACEMENTS = [
     (r'AS_REAL\((.*)\)', r'\1'),
-    ('THPDefaultGenerator->cdata', 'context->defaultGenerator(backend())'),
-    ('__last_dim', 'self.ndimension()-1'),
+    ('THPDefaultGenerator->cdata', 'nullptr'),
+    ('__last_dim', '-1'),
 ]
 
 
@@ -249,12 +243,19 @@
 
     def get_formals(option, include_constants=False):
         seen = set()
-        result = []
+        pos_args = []
+        kwd_args = []
 
         def insert(argument):
             if argument['name'] not in seen:
                 seen.add(argument['name'])
-                result.append(argument)
+                if argument.get('kwarg_only', False):
+                    kwd_args.append(argument)
+                else:
+                    pos_args.append(argument)
+        for argument in option['arguments']:
+            if argument.get('output') and not argument.get('allocate', False):
+                insert(argument)
         for argument in option['arguments']:
             if argument['type'] == 'THSTensor*':
                 # only enable for a subset of Dense/Sparse ops
@@ -265,29 +266,10 @@
                 insert(argument)
             elif is_real_argument_to_wrapper(argument):
                 insert(argument)
-        for argument in option['arguments']:
-            if argument.get('output') and not argument.get('allocate', False):
-                insert(argument)
 
+        result = pos_args + kwd_args
         return [translate_formal(argument, option) for argument in result]
 
-    def get_actuals_with_constants(option):
-        actuals = []
-        for arg in get_formals(option, include_constants=True):
-            if arg['type'] != 'CONSTANT':
-                actuals.append(arg['name'])
-                continue
-            v = str(arg.get('default', arg['name']))
-            for pattern, replacement in INLINE_CONSTANT_REPLACEMENTS:
-                v = re.sub(pattern, replacement, v)
-            if v in {'NULL', 'nullptr'}:
-                if arg['name'] == 'stride':
-                    v = 'IntList()'
-                else:
-                    v = 'Tensor()'
-            actuals.append(v)
-        return actuals
-
     def get_return_types(option):
         ret = option['return']
         if ret['kind'] == 'arguments':
@@ -312,15 +294,29 @@
         return "std::tuple<{}>".format(','.join(r['type'] for r in return_types))
         return return_types
 
-    def find_first_tensor(formals):
+    def find_dispatch_tensor(formals):
+        # dispatch to self if it's a parameter
         for formal in formals:
-            if 'Tensor' == formal['dynamic_type'] or 'TensorList' == formal['dynamic_type']:
+            if formal['name'] == 'self' and formal['dynamic_type'] == 'Tensor':
+                return formal['name']
+        # otherwise dispatch to the first Tensor or TensorList
+        for formal in formals:
+            if 'TensorList' == formal['dynamic_type'] or formal['dynamic_type'] == 'Tensor':
                 return formal['name']
         return None
 
     def format_formal(f):
         return '{} {}'.format(f['type'], f['name'])
 
+    def formal_with_default(f):
+        s = format_formal(f)
+        v = f.get('default')
+        if v is None:
+            return s
+        for pattern, replacement in HEADER_CONSTANT_REPLACEMENTS:
+            v = re.sub(pattern, replacement, str(v))
+        return '{}={}'.format(s, v)
+
     def get_broadcast_argument(option):
         for argument in option['arguments']:
             if argument.get('broadcast'):
@@ -359,6 +355,7 @@
         formals = get_formals(option)
         option['formals_list'] = formals
         option['formals'] = [format_formal(f) for f in formals]
+        option['formals_with_defaults'] = [formal_with_default(f) for f in formals]
         option['returns'] = get_return_types(option)
         option['return_type'] = format_return_type(option['returns'])
         option['return_call'] = 'return ' if option['return_type'] != 'void' else ''
@@ -366,6 +363,8 @@
 
         option['method_formals'] = [format_formal(f) for f in formals
                                     if f['name'] != 'self']
+        option['method_formals_with_defaults'] = (
+            [formal_with_default(f) for f in formals if f['name'] != 'self'])
         option['method_actuals'] = [
             f['name'] if f['name'] != 'self' else '*this' for f in formals]
 
@@ -373,8 +372,8 @@
 
         is_method = 'method' in option['variants']
         is_function = 'function' in option['variants']
-        first_tensor = find_first_tensor(formals)
-        is_namespace_function = is_function and first_tensor is not None
+        dispatch_tensor = find_dispatch_tensor(formals)
+        is_namespace_function = is_function and dispatch_tensor is not None
 
         # method-only things are prefixed with m_ in Type so that
         # another function-only variant can exist without the name colliding
@@ -383,7 +382,7 @@
         env = nested_dict(option, top_env)
 
         broadcast_arg = get_broadcast_argument(option)
-        if broadcast_arg is None and option['has_full_argument_list']:
+        if broadcast_arg is None:
             top_env['type_method_declarations'].append(
                 TYPE_METHOD_DECLARATION.substitute(env))
             top_env['type_method_definitions'].append(
@@ -392,13 +391,6 @@
             top_env['type_method_declarations'].append(
                 TYPE_METHOD_DECLARATION_NON_VIRTUAL.substitute(env))
 
-        if not option['has_full_argument_list']:
-            # functions without the full list of arguments are implemented
-            # inline in TypeMethods.h
-            option['actuals_with_constants'] = get_actuals_with_constants(option)
-            top_env['type_method_inline_definitions'].append(
-                TYPE_METHOD_INLINE.substitute(env))
-        elif broadcast_arg is not None:
             # "s_" for "same size".
             option['method_prefix_derived'] = 's_' + option['method_prefix']
             same_size_option = option.copy()
@@ -434,7 +426,7 @@
             method_of.append('Tensor')
 
         if is_namespace_function:
-            option['inferred_type'] = 'infer_type({})'.format(first_tensor)
+            option['inferred_type'] = 'infer_type({})'.format(dispatch_tensor)
             top_env['function_declarations'].append(
                 FUNCTION_DECLARATION.substitute(env))
             top_env['function_definitions'].append(
@@ -448,7 +440,6 @@
             'method_of': method_of,
             'returns': option['returns'],
             'inplace': option['inplace'],
-            'has_full_argument_list': option['has_full_argument_list'],
         })
 
     output_declarations = []
@@ -476,8 +467,7 @@
         return argument['type'] in CHECKED_CAST
 
     def nullable_argument(argument):
-        return (argument['type'] in {'THIntegerTensor*', 'THTensor*'} and
-                argument.get('default', '') in {'NULL', 'nullptr'})
+        return argument.get('is_nullable', False)
 
     def bool_option_is_string(argument):
         return 'if_true' in argument and isinstance(argument['if_true'], string_type)
@@ -721,7 +711,7 @@
 
     for declaration in declarations:
         for option in declaration['options']:
-            if not option.get('skip', False) and option['has_full_argument_list']:
+            if not option.get('skip', False):
                 try:
                     process_option(option)
                 except NYIError:
diff --git a/aten/src/ATen/gen.py b/aten/src/ATen/gen.py
index 98f5cf6..1b3f885 100644
--- a/aten/src/ATen/gen.py
+++ b/aten/src/ATen/gen.py
@@ -32,7 +32,6 @@
 TYPE_DERIVED_CPP = CodeTemplate.from_file(TEMPLATE_PATH + "/TypeDerived.cpp")
 TYPE_DERIVED_H = CodeTemplate.from_file(TEMPLATE_PATH + "/TypeDerived.h")
 TYPE_H = CodeTemplate.from_file(TEMPLATE_PATH + "/Type.h")
-TYPE_METHODS_H = CodeTemplate.from_file(TEMPLATE_PATH + "/TypeMethods.h")
 TYPE_CPP = CodeTemplate.from_file(TEMPLATE_PATH + "/Type.cpp")
 
 TENSOR_DERIVED_CPP = CodeTemplate.from_file(
@@ -225,8 +224,6 @@
                 for d in cwrap_parser.parse(file)]
 declarations += nn_parse.run(nn_files)
 declarations = preprocess_declarations.run(declarations)
-# print(yaml.dump(declarations))
-
 for fname, env in generators.items():
     write(fname, GENERATOR_DERIVED.substitute(env))
 
@@ -250,7 +247,6 @@
                 backend, density, scalar_type, declarations))
 
 write('Type.h', TYPE_H.substitute(top_env))
-write('TypeMethods.h', TYPE_METHODS_H.substitute(top_env))
 write('Type.cpp', TYPE_CPP.substitute(top_env))
 
 write('Tensor.h', TENSOR_H.substitute(top_env))
diff --git a/aten/src/ATen/nn_parse.py b/aten/src/ATen/nn_parse.py
index f92b72d..d328fe8 100644
--- a/aten/src/ATen/nn_parse.py
+++ b/aten/src/ATen/nn_parse.py
@@ -14,7 +14,7 @@
         'arg': typ + ' ' + arg.name,
     }
     if arg.is_optional:
-        result['default'] = 'nullptr'
+        result['is_nullable'] = True
     return result
 
 
diff --git a/aten/src/ATen/preprocess_declarations.py b/aten/src/ATen/preprocess_declarations.py
index 6c620c8..23f5115 100644
--- a/aten/src/ATen/preprocess_declarations.py
+++ b/aten/src/ATen/preprocess_declarations.py
@@ -77,7 +77,17 @@
 
 def handle_outputs_taken_as_arguments(options):
     new_options = []
+
+    def is_nullable(arg):
+        return (arg['type'] in {'THIntegerTensor*', 'THTensor*'} and
+                arg.get('default', '') in {'NULL', 'nullptr'})
+
     for option in options:
+        for arg in option['arguments']:
+            # mark arguments which can be null
+            if is_nullable(arg):
+                arg['is_nullable'] = True
+
         if any('output' in arg for arg in option['arguments']):
             allocate_option = deepcopy(option)
             # the allocating option needs to be marked
@@ -196,8 +206,11 @@
     declarations = [d for d in declarations if not exclude(d)]
     for declaration in declarations:
         common_with_cwrap.set_declaration_defaults(declaration)
-        common_with_cwrap.enumerate_options_due_to_default(
-            declaration,
+        declaration['options'] = [deepcopy(o) for o in declaration['options']]
+        for option in declaration['options']:
+            option['has_full_argument_list'] = True
+        declaration['options'] = common_with_cwrap.filter_unique_options(
+            declaration['options'],
             allow_kwarg=False,
             type_to_signature=TYPE_FORMAL_GENERIC,
             remove_self=True)
diff --git a/aten/src/ATen/templates/Functions.h b/aten/src/ATen/templates/Functions.h
index d61bc29..fc0deba 100644
--- a/aten/src/ATen/templates/Functions.h
+++ b/aten/src/ATen/templates/Functions.h
@@ -19,6 +19,7 @@
 ${function_declarations}
 
 static inline Type & infer_type(const Tensor & t) {
+  AT_ASSERT(t.defined(), "undefined Tensor");
   return t.type();
 }
 static inline Type & infer_type(const TensorList & tl) {
diff --git a/aten/src/ATen/templates/Tensor.h b/aten/src/ATen/templates/Tensor.h
index e96054d..7a3ebe6 100644
--- a/aten/src/ATen/templates/Tensor.h
+++ b/aten/src/ATen/templates/Tensor.h
@@ -1,9 +1,13 @@
 #pragma once
 
-#include "ATen/Type.h"
-#include "ATen/TensorImpl.h"
-#include "ATen/Utils.h"
+#include "ATen/Generator.h"
+#include "ATen/Scalar.h"
+#include "ATen/ScalarType.h"
 #include "ATen/TensorAccessor.h"
+#include "ATen/TensorImpl.h"
+#include "ATen/Storage.h"
+#include "ATen/SparseTensorRef.h"
+#include "ATen/Utils.h"
 
 namespace at {
 struct Type;
@@ -106,22 +110,10 @@
   Type & type() const {
     return pImpl->type();
   }
-  Tensor toType(const Type & t) const {
-    if(type().ID() ==t.ID())
-      return *this;
-    return t.copy(*this);
-  }
-  Tensor & copy_(const Tensor & src) {
-    resize_(src.sizes());
-    type().copy(src,*this);
-    return *this;
-  }
-  Tensor toType(ScalarType t) const {
-    return toType(type().toScalarType(t));
-  }
-  Tensor toBackend(Backend b) const {
-    return toType(type().toBackend(b));
-  }
+  inline Tensor toType(const Type & t) const;
+  inline Tensor & copy_(const Tensor & src);
+  inline Tensor toType(ScalarType t) const;
+  inline Tensor toBackend(Backend b) const;
 
   template<typename T>
   T * data() const;
diff --git a/aten/src/ATen/templates/TensorMethods.h b/aten/src/ATen/templates/TensorMethods.h
index c12365d..cd3adfc 100644
--- a/aten/src/ATen/templates/TensorMethods.h
+++ b/aten/src/ATen/templates/TensorMethods.h
@@ -6,6 +6,27 @@
 
 namespace at {
 
+inline Tensor Tensor::toType(const Type & t) const {
+  if(type().ID() ==t.ID())
+    return *this;
+  return t.copy(*this);
+}
+
+inline Tensor & Tensor::copy_(const Tensor & src) {
+  resize_(src.sizes());
+  type().copy(src,*this);
+  return *this;
+}
+
+inline Tensor Tensor::toType(ScalarType t) const {
+  return toType(type().toScalarType(t));
+}
+
+inline Tensor Tensor::toBackend(Backend b) const {
+  return toType(type().toBackend(b));
+}
+
+
 // all static inline to allow for inlining of the non-dynamic part of dispatch
 ${tensor_method_definitions}
 
diff --git a/aten/src/ATen/templates/Type.cpp b/aten/src/ATen/templates/Type.cpp
index b7b2424..7903245 100644
--- a/aten/src/ATen/templates/Type.cpp
+++ b/aten/src/ATen/templates/Type.cpp
@@ -1,5 +1,4 @@
 #include "ATen/Type.h"
-#include "ATen/TypeMethods.h"
 #include "ATen/Tensor.h"
 #include "ATen/Storage.h"
 #include "ATen/Scalar.h"
@@ -53,7 +52,7 @@
 }
 Tensor Type::scalarTensor(Scalar s) const {
   if(s.isBackedByTensor())
-    return s.t.toType(*this);
+    return Tensor(s.v.t, false).toType(*this);
   return tensor({}).fill_(s);
 }
 
diff --git a/aten/src/ATen/templates/Type.h b/aten/src/ATen/templates/Type.h
index 41a095d..704a33b 100644
--- a/aten/src/ATen/templates/Type.h
+++ b/aten/src/ATen/templates/Type.h
@@ -5,8 +5,11 @@
 
 #include "ATen/ATenGeneral.h"
 #include "ATen/ArrayRef.h"
+#include "ATen/Generator.h"
 #include "ATen/Half.h"
 #include "ATen/SparseTensorRef.h"
+#include "ATen/ScalarType.h"
+#include "ATen/Scalar.h"
 
 // To solve the conflict of s_addr in inaddr.h
 #ifdef _MSC_VER
@@ -19,42 +22,8 @@
 
 class Context;
 struct Storage;
-struct Tensor;
-class Scalar;
 struct Generator;
 
-#define AT_FORALL_SCALAR_TYPES(_) \
-_(uint8_t,Byte,i) \
-_(int8_t,Char,i) \
-_(double,Double,d) \
-_(float,Float,d) \
-_(int,Int,i) \
-_(int64_t,Long,i) \
-_(int16_t,Short,i) \
-_(Half,Half,d)
-
-enum class ScalarType {
-#define DEFINE_ENUM(_1,n,_2) \
-  n,
-  AT_FORALL_SCALAR_TYPES(DEFINE_ENUM)
-#undef DEFINE_ENUM
-  NumOptions
-};
-
-enum class Backend {
-  CPU,
-  CUDA,
-  SparseCPU,
-  SparseCUDA,
-  NumOptions
-};
-
-
-constexpr Backend kCPU = Backend::CPU;
-constexpr Backend kCUDA = Backend::CUDA;
-constexpr Backend kSparseCPU = Backend::SparseCPU;
-constexpr Backend kSparseCUDA = Backend::SparseCUDA;
-
 // Note [Undefined-dim versus 0-dim]
 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 // Unlike Torch, ATen treats zero-dimension tensors as having ONE
@@ -67,43 +36,12 @@
 // situation.
 constexpr int64_t kUndefinedDimensions = std::numeric_limits<int64_t>::min();
 
-static inline const char * toString(Backend b) {
-  switch(b) {
-    case Backend::CPU: return "CPU";
-    case Backend::CUDA: return "CUDA";
-    case Backend::SparseCPU: return "SparseCPU";
-    case Backend::SparseCUDA: return "SparseCUDA";
-    default: return "UNKNOWN_BACKEND";
-  }
-}
-
-#define DEFINE_CONSTANT(_,name,_2) \
-constexpr ScalarType k##name = ScalarType::name;
-
-AT_FORALL_SCALAR_TYPES(DEFINE_CONSTANT)
-#undef DEFINE_CONSTANT
-
-static inline const char * toString(ScalarType t) {
-#define DEFINE_CASE(_,name,_2) \
-  case ScalarType:: name : return #name;
-
-  switch(t) {
-    AT_FORALL_SCALAR_TYPES(DEFINE_CASE)
-    default:
-      return "UNKNOWN_SCALAR_TYPE";
-  }
-#undef DEFINE_CASE
-}
-
 enum class TypeID {
   ${type_ids}
   NumOptions
 };
 
 
-typedef ArrayRef<int64_t> IntList;
-typedef ArrayRef<Tensor> TensorList;
-
 struct ATen_CLASS Type {
   explicit Type(Context * context)
   : context(context) {}
diff --git a/aten/src/ATen/templates/TypeMethods.h b/aten/src/ATen/templates/TypeMethods.h
deleted file mode 100644
index 80875d2..0000000
--- a/aten/src/ATen/templates/TypeMethods.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#pragma once
-
-#include "ATen/Type.h"
-#include "ATen/Context.h"
-#include "ATen/Scalar.h"
-
-namespace at {
-
-// inline non-dynamic type method definitions
-${type_method_inline_definitions}
-
-} //namespace at
diff --git a/aten/src/ATen/test/atest.cpp b/aten/src/ATen/test/atest.cpp
index bfe9c41..d44ae12 100644
--- a/aten/src/ATen/test/atest.cpp
+++ b/aten/src/ATen/test/atest.cpp
@@ -33,7 +33,7 @@
   bool threw = false;
   try {
     Tensor no;
-    add_out(foo,foo,no);
+    add_out(no,foo,foo);
   } catch (std::runtime_error&) {
     threw = true;
   }
diff --git a/aten/src/ATen/test/basic.cpp b/aten/src/ATen/test/basic.cpp
index 81716aa..3d45729 100644
--- a/aten/src/ATen/test/basic.cpp
+++ b/aten/src/ATen/test/basic.cpp
@@ -93,7 +93,7 @@
     Tensor d = type.ones({3, 4});
     Tensor r = type.zeros({3,4});
     for(auto i = 0; i < 100000; i++) {
-      add_out(r, d, r);
+      add_out(r, r, d);
     }
     auto end = std::chrono::high_resolution_clock::now();
     std::cout << std::dec << "   " << std::chrono::duration_cast<std::chrono::milliseconds>(end-begin).count() << " ms" << std::endl;
@@ -132,7 +132,7 @@
     std::cout << a << std::endl;
     std::cout << b << std::endl;
     std::cout << c << std::endl;
-    ASSERT(c.equal(addmv(0, type.zeros({3}), 1, a,b)));
+    ASSERT(c.equal(addmv(type.zeros({3}), a, b, 0, 1)));
   }
 
   {
diff --git a/aten/src/meter/ClassErrorMeter.cc b/aten/src/meter/ClassErrorMeter.cc
index 1412b42..bedfd22 100644
--- a/aten/src/meter/ClassErrorMeter.cc
+++ b/aten/src/meter/ClassErrorMeter.cc
@@ -17,7 +17,7 @@
 }
 
 void ClassErrorMeter::reset() {
-  range_out(1,numel(topkval_),topkval_);
+  range_out(topkval_, 1, numel(topkval_));
   sumval_.fill_(0.);
   n_ = 0;
 }