Add ONNX symbolics for several ops (#3956)

diff --git a/torch/nn/_functions/thnn/activation.py b/torch/nn/_functions/thnn/activation.py
index 054476c..358360c 100644
--- a/torch/nn/_functions/thnn/activation.py
+++ b/torch/nn/_functions/thnn/activation.py
@@ -85,6 +85,10 @@
     scale = 1.0507009873554804934193349852946
 
     @staticmethod
+    def symbolic(g, input, inplace):
+        return g.op("Selu", input)
+
+    @staticmethod
     def forward(ctx, input, inplace):
         backend = type2backend[type(input)]
         if inplace:
diff --git a/torch/onnx/symbolic.py b/torch/onnx/symbolic.py
index b4b9df6..2f983b0 100644
--- a/torch/onnx/symbolic.py
+++ b/torch/onnx/symbolic.py
@@ -1,6 +1,6 @@
 import torch
 from torch.autograd._functions.utils import check_onnx_broadcast  # TODO: move me
-from torch.nn.modules.utils import _pair
+from torch.nn.modules.utils import _pair, _triple
 import warnings
 
 # EDITING THIS FILE? READ THIS FIRST!
@@ -205,20 +205,17 @@
     return g.op("Squeeze", self, axes_i=dims)
 
 
-# NB: This appears to be dead at the moment
-def prelu(g, input, weight):
-    if all(s == 1 for s in weight.type().sizes()):
-        return _unimplemented("prelu", "single weight shared among input channels")
-    return g.op("PRelu", input, weight)
+def prelu(g, self, weight):
+    return g.op("PRelu", self, weight)
 
 
-def threshold(g, input, threshold, value, inplace=False):
+def threshold(g, self, threshold, value):
     # See Note [Export inplace]
     if _scalar(threshold) != 0:
         return _unimplemented("threshold", "non-zero threshold")
     if _scalar(value) != 0:
         return _unimplemented("threshold", "non-zero value")
-    return g.op("Relu", input)
+    return g.op("Relu", self)
 
 
 def leaky_relu(g, input, negative_slope, inplace=False):
@@ -238,6 +235,12 @@
     return g.op('Softmax', input, axis_i=dim)
 
 
+def softplus(g, self, beta, threshold):
+    if beta != 1:
+        return _unimplemented("beta", "has to be 1")
+    return g.op('Softplus', self)
+
+
 def max_pool2d(g, input, kernel_size, stride, padding, dilation, ceil_mode):
     if ceil_mode:
         return _unimplemented("max_pool2d", "ceil_mode")
@@ -264,6 +267,18 @@
                 pads_i=_pair(padding))
 
 
+def avg_pool3d(g, input, kernel_size, stride, padding, ceil_mode, count_include_pad):
+    if ceil_mode:
+        return _unimplemented("avg_pool3d", "ceil_mode")
+    if not stride:
+        stride = kernel_size
+    # TODO: What about count_include_pad?!
+    return g.op("AveragePool", input,
+                kernel_shape_i=_triple(kernel_size),
+                strides_i=_triple(stride),
+                pads_i=_triple(padding))
+
+
 def log_softmax(g, input, dim=None):
     return g.op("Log", g.op('Softmax', input, axis_i=dim).setTypeAs(input))
 
@@ -280,3 +295,11 @@
 # ignore clone operators that are inserted by PyTorch autograd
 def clone(g, input):
     return input
+
+
+def abs(g, self):
+    return g.op("Abs", self)
+
+
+def pow(g, self, exponent):
+    return g.op("Pow", self, exponent)