Enable building of FakeLowP ops (#36170)

Summary:
We open sourced the FakeLowp ops as a reference implementation of fp16 ops. This PR makes it buildable.

```
USE_CUDA=0 USE_ROCM=0 USE_FAKELOWP=ON python setup.py install
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/36170

Test Plan:
Build Onnxifi library in Glow.
```
cp ${GLOW}/build/lib/Onnxifi/libonnxifi-glow.so ${MY_PATH}/ibonnxifi.so
LD_LIBRARY_PATH=${MY_PATH}/ibonnxifi.so python pytorch/caffe2/python/fakelowp/test_sls_nnpi_fp16.py
```

It doesn't run successfully right now because we need to open source the glow gflags and some other ops like `FbgemmPack`.

Reviewed By: houseroad

Differential Revision: D20980681

Pulled By: yinghai

fbshipit-source-id: 6dd31883a985850a77261bcc527029479bbc303f
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1068684..e7ee06b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -147,6 +147,7 @@
     USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
     "USE_CUDNN" OFF)
 option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON)
+option(USE_FAKELOWP "Use FakeLowp operators" OFF)
 option(USE_FFMPEG "Use ffmpeg" OFF)
 option(USE_GFLAGS "Use GFLAGS" OFF)
 option(USE_GLOG "Use GLOG" OFF)
diff --git a/caffe2/contrib/CMakeLists.txt b/caffe2/contrib/CMakeLists.txt
index 9122305..f7e7521 100644
--- a/caffe2/contrib/CMakeLists.txt
+++ b/caffe2/contrib/CMakeLists.txt
@@ -3,6 +3,7 @@
 add_subdirectory(opencl)
 add_subdirectory(prof)
 add_subdirectory(shm_mutex)
+add_subdirectory(fakelowp)
 if(USE_TENSORRT)
 add_subdirectory(tensorrt)
 endif()
diff --git a/caffe2/contrib/fakelowp/CMakeLists.txt b/caffe2/contrib/fakelowp/CMakeLists.txt
new file mode 100644
index 0000000..9a83ee6
--- /dev/null
+++ b/caffe2/contrib/fakelowp/CMakeLists.txt
@@ -0,0 +1,42 @@
+if(USE_FAKELOWP)
+  message(STATUS "Including FAKELOWP operators")
+
+  # ---[ CPU files.
+  file(GLOB_RECURSE tmp *.cc)
+  set(FAKELOWP_CPU_SRCS ${FAKELOWP_CPU_SRCS} ${tmp})
+  # exclude test files and gpu files
+  file(GLOB_RECURSE tmp *_test.cc)
+  exclude(FAKELOWP_CPU_SRCS "${FAKELOWP_CPU_SRCS}" ${tmp})
+
+  # We will only build the perf kernel files if the compiler supports avx2
+  # extensions.
+  if(CAFFE2_COMPILER_SUPPORTS_AVX2_EXTENSIONS)
+    add_library(Caffe2_fakelowp_ops STATIC ${FAKELOWP_CPU_SRCS})  
+    add_dependencies(Caffe2_fakelowp_ops fbgemm Caffe2_PROTO c10)
+    target_include_directories(Caffe2_fakelowp_ops BEFORE
+      PRIVATE $<BUILD_INTERFACE:${FBGEMM_SOURCE_DIR}/include>)
+    if(MSVC AND NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+      target_compile_options(Caffe2_fakelowp_ops
+          PRIVATE "/arch:AVX2"
+          PRIVATE "/D__FMA__"
+          PRIVATE "/D__F16C__")
+    else()
+      target_compile_options(Caffe2_fakelowp_ops
+          PRIVATE "-mavx2"
+          PRIVATE "-mfma"
+          PRIVATE "-mavx"
+          PRIVATE "-mf16c")
+    endif()
+    caffe2_interface_library(
+      Caffe2_fakelowp_ops Caffe2_fakelowp_ops_interface)
+    list(APPEND
+      Caffe2_DEPENDENCY_WHOLE_LINK_LIBS
+      "Caffe2_fakelowp_ops_interface")
+  endif()
+else()
+  message(STATUS "Excluding FakeLowP operators")
+endif()
+
+set(Caffe2_DEPENDENCY_WHOLE_LINK_LIBS
+    ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS}
+    PARENT_SCOPE)
diff --git a/caffe2/python/fakelowp/test_batchmatmul_nnpi_fp16.py b/caffe2/contrib/fakelowp/test/test_batchmatmul_nnpi_fp16.py
similarity index 100%
rename from caffe2/python/fakelowp/test_batchmatmul_nnpi_fp16.py
rename to caffe2/contrib/fakelowp/test/test_batchmatmul_nnpi_fp16.py
diff --git a/caffe2/python/fakelowp/test_batchnorm_nnpi_fp16.py b/caffe2/contrib/fakelowp/test/test_batchnorm_nnpi_fp16.py
similarity index 100%
rename from caffe2/python/fakelowp/test_batchnorm_nnpi_fp16.py
rename to caffe2/contrib/fakelowp/test/test_batchnorm_nnpi_fp16.py
diff --git a/caffe2/python/fakelowp/test_fc_nnpi_fp16.py b/caffe2/contrib/fakelowp/test/test_fc_nnpi_fp16.py
similarity index 100%
rename from caffe2/python/fakelowp/test_fc_nnpi_fp16.py
rename to caffe2/contrib/fakelowp/test/test_fc_nnpi_fp16.py
diff --git a/caffe2/python/fakelowp/test_op_nnpi_fp16.py b/caffe2/contrib/fakelowp/test/test_op_nnpi_fp16.py
similarity index 100%
rename from caffe2/python/fakelowp/test_op_nnpi_fp16.py
rename to caffe2/contrib/fakelowp/test/test_op_nnpi_fp16.py
diff --git a/caffe2/python/fakelowp/test_sls_4bit_nnpi_fp16.py b/caffe2/contrib/fakelowp/test/test_sls_4bit_nnpi_fp16.py
similarity index 100%
rename from caffe2/python/fakelowp/test_sls_4bit_nnpi_fp16.py
rename to caffe2/contrib/fakelowp/test/test_sls_4bit_nnpi_fp16.py
diff --git a/caffe2/python/fakelowp/test_sls_nnpi_fp16.py b/caffe2/contrib/fakelowp/test/test_sls_nnpi_fp16.py
similarity index 100%
rename from caffe2/python/fakelowp/test_sls_nnpi_fp16.py
rename to caffe2/contrib/fakelowp/test/test_sls_nnpi_fp16.py
diff --git a/caffe2/python/fakelowp/__init__.py b/caffe2/python/fakelowp/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/caffe2/python/fakelowp/__init__.py
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 64a9cde..51c4bb7 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -591,6 +591,9 @@
   caffe2_update_option(USE_FBGEMM ON)
 else()
   caffe2_update_option(USE_FBGEMM OFF)
+  message(WARNING 
+    "Turning USE_FAKELOWP off as it depends on USE_FBGEMM.")
+  caffe2_update_option(USE_FAKELOWP OFF)
 endif()
 
 
diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake
index 55c13a8..ec2cc87 100644
--- a/cmake/Summary.cmake
+++ b/cmake/Summary.cmake
@@ -88,6 +88,7 @@
   message(STATUS "  USE_ROCM              : ${USE_ROCM}")
   message(STATUS "  USE_EIGEN_FOR_BLAS    : ${CAFFE2_USE_EIGEN_FOR_BLAS}")
   message(STATUS "  USE_FBGEMM            : ${USE_FBGEMM}")
+  message(STATUS "    USE_FAKELOWP          : ${USE_FAKELOWP}")
   message(STATUS "  USE_FFMPEG            : ${USE_FFMPEG}")
   message(STATUS "  USE_GFLAGS            : ${USE_GFLAGS}")
   message(STATUS "  USE_GLOG              : ${USE_GLOG}")