Add CUDA support for arange

Also enables CUDA for range
diff --git a/test/test_cuda.py b/test/test_cuda.py
index 4c3a0d4..814a7d6 100644
--- a/test/test_cuda.py
+++ b/test/test_cuda.py
@@ -880,6 +880,14 @@
     def test_tensor_scatterFill(self):
         TestTorch._test_scatter_base(self, lambda t: t.cuda(), 'scatter_', True, test_bounds=False)
 
+    def test_arange(self):
+        for t in ['IntTensor', 'LongTensor', 'FloatTensor', 'DoubleTensor']:
+            a = torch.cuda.__dict__[t]()
+            torch.arange(0, 10, out=a)
+            b = torch.__dict__[t]()
+            torch.arange(0, 10, out=b)
+            self.assertEqual(a, b.cuda())
+
     def test_nvtx(self):
         # Just making sure we can see the symbols
         torch.cuda.nvtx.range_push("foo")
diff --git a/torch/csrc/generic/methods/Tensor.cwrap b/torch/csrc/generic/methods/Tensor.cwrap
index c5f8d16..e578cb7 100644
--- a/torch/csrc/generic/methods/Tensor.cwrap
+++ b/torch/csrc/generic/methods/Tensor.cwrap
@@ -670,6 +670,7 @@
     - function
   backends:
     - CPU
+    - CUDA
   return: argument 0
   before_arg_assign: |
     PyErr_WarnEx(PyExc_UserWarning, "torch.range is deprecated in favor of torch.arange "
@@ -690,6 +691,7 @@
     - function
   backends:
     - CPU
+    - CUDA
   return: argument 0
   options:
       - arguments: