Gate engine=NNPACK with nnp_initialize

Summary:
Somehow we're observing mysterious test failures for some nnpack-related tests with gcc5 only on Travis: https://travis-ci.org/caffe2/caffe2/jobs/288804879

Marat suggested that maybe the machine doesn't have avx2 support.

Right now gating is happening for FB-internal only. I think it makes sense to make gating generic. Calling `nnp_initialize` seems like the right way to do so. It returns failure if the hardware is not supported and is a noop after the first call.

Reviewed By: Maratyszcza

Differential Revision: D6073808

fbshipit-source-id: e684668628b5c635368351114b6c502d2cc81fe4
diff --git a/caffe2/contrib/nnpack/nnpack_ops.cc b/caffe2/contrib/nnpack/nnpack_ops.cc
index 611238e..dba966b 100644
--- a/caffe2/contrib/nnpack/nnpack_ops.cc
+++ b/caffe2/contrib/nnpack/nnpack_ops.cc
@@ -44,6 +44,13 @@
 
 namespace {
 
+bool has_nnpack() {
+  // nnp_initialize is a noop after the first call so it's safe to invoke it
+  // repeatedly
+  auto nnpack_status = nnp_initialize();
+  return nnpack_status == nnp_status_success;
+}
+
 nnp_convolution_algorithm get_nnp_convolution_algorithm(
     const std::string& algo) {
   if (algo == "AUTO") {
@@ -117,11 +124,9 @@
     OPERATOR_NEEDS_FEATURE(
         dilation_h() == 1 && dilation_w() == 1,
         "The NNPack convolution does not support dilation yet.");
-#ifdef CAFFE2_USE_FBCODE
-    // Facebook's nnpack build assumes existence of avx2, so we explicitly
-    // check if the machine has avx2 support.
-    OPERATOR_NEEDS_FEATURE(GetCpuId().avx2(), "NNPack requires AVX2");
-#endif
+    // NNPACK can be built with avx2 support only and might not be able to run
+    // on a given machine.
+    OPERATOR_NEEDS_FEATURE(has_nnpack(), "NNPack can't run here. No AVX2?");
   }
 
   bool RunOnDeviceWithOrderNCHW() override {
@@ -252,11 +257,9 @@
     OPERATOR_NEEDS_FEATURE(
         this->pad_b() == 0,
         "NNPack Pooling differs from Caffe2 Pooling when pad > 0!");
-#ifdef CAFFE2_USE_FBCODE
-    // Facebook's nnpack build assumes existence of avx2, so we explicitly
-    // check if the machine has avx2 support.
-    OPERATOR_NEEDS_FEATURE(GetCpuId().avx2(), "NNPack requires AVX2");
-#endif
+    // NNPACK can be built with avx2 support only and might not be able to run
+    // on a given machine.
+    OPERATOR_NEEDS_FEATURE(has_nnpack(), "NNPack can't run here. No AVX2?");
   }
 
   bool RunOnDeviceWithOrderNCHW() override {
@@ -307,11 +310,9 @@
  public:
   NNPACKReluOp(const OperatorDef& operator_def, Workspace* ws)
       : Operator<CPUContext>(operator_def, ws) {
-#ifdef CAFFE2_USE_FBCODE
-    // Facebook's nnpack build assumes existence of avx2, so we explicitly
-    // check if the machine has avx2 support.
-    OPERATOR_NEEDS_FEATURE(GetCpuId().avx2(), "NNPack requires AVX2");
-#endif
+    // NNPACK can be built with avx2 support only and might not be able to run
+    // on a given machine.
+    OPERATOR_NEEDS_FEATURE(has_nnpack(), "NNPack can't run here. No AVX2?");
   }
 
   bool RunOnDevice() override {
@@ -335,11 +336,9 @@
  public:
   NNPACKLeakyReluOp(const OperatorDef& operator_def, Workspace* ws)
       : LeakyReluOp<float, CPUContext>(operator_def, ws) {
-#ifdef CAFFE2_USE_FBCODE
-    // Facebook's nnpack build assumes existence of avx2, so we explicitly
-    // check if the machine has avx2 support.
-    OPERATOR_NEEDS_FEATURE(GetCpuId().avx2(), "NNPack requires AVX2");
-#endif
+    // NNPACK can be built with avx2 support only and might not be able to run
+    // on a given machine.
+    OPERATOR_NEEDS_FEATURE(has_nnpack(), "NNPack can't run here. No AVX2?");
   }
 
   bool RunOnDevice() override {