First commit for dynamic CPU dispatch: general framework in place (need to create dispatch tables and stubs for all functions and make impls have hidden linkage)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5fe6c89..7a55568 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -399,6 +399,8 @@
   generic/THTensorRandom.h
   generic/THVectorDispatch.c
   generic/THVector.h
+  generic/THTensorMathDispatch.c
+  generic/THTensorMathDispatch.h
   DESTINATION "${TH_INSTALL_INCLUDE_SUBDIR}/TH/generic")
 
 
diff --git a/THTensor.c b/THTensor.c
index 37071df..071cdb9 100644
--- a/THTensor.c
+++ b/THTensor.c
@@ -26,6 +26,9 @@
 #include "generic/THTensorMath.c"
 #include "THGenerateAllTypes.h"
 
+#include "generic/THTensorMathDispatch.c"
+#include "THGenerateAllTypes.h"
+
 #include "generic/THTensorConv.c"
 #include "THGenerateAllTypes.h"
 
diff --git a/THTensor.h b/THTensor.h
index d2a1c57..05a52d8 100644
--- a/THTensor.h
+++ b/THTensor.h
@@ -31,6 +31,9 @@
 #include "generic/THTensorMath.h"
 #include "THGenerateAllTypes.h"
 
+#include "generic/THTensorMathDispatch.h"
+#include "THGenerateAllTypes.h"
+
 /* convolutions */
 #include "generic/THTensorConv.h"
 #include "THGenerateAllTypes.h"
diff --git a/generic/THTensorMathDispatch.c b/generic/THTensorMathDispatch.c
new file mode 100644
index 0000000..174ebc5
--- /dev/null
+++ b/generic/THTensorMathDispatch.c
@@ -0,0 +1,50 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/THTensorMathDispatch.c"
+#else
+
+#include "simd/simd.h"
+
+// NOTE This file will contain static function pointers that will be initialized by
+// the initialization call.  It will also have globally linked dispatch stubs
+// which delegate to the function pointers.  The dispatch stubs will be the symbols
+// called by clients
+//
+// Somehow, this guy needs access to all SIMD implementations
+
+// Dispatch pointers.  These guys will be set to point to the most-optimized implementation
+// for the host.
+static void (*THTensor_(dispatchPtrAdd))(THTensor*, THTensor*, real) = NULL;
+
+// Dispatch stubs that just call the pointers
+TH_API void THTensor_(add)(THTensor *r_, THTensor *t, real value)
+{
+  THTensor_(dispatchPtrAdd)(r_, t, value);
+}
+
+
+// Dispatch tables: each optimized implementation of a function
+// is described in a table, and the tables are used to initialize
+// the function pointers for dynamic dispatch
+FunctionDescription THTensor_(dispatchTblAdd)[] = {
+  //FUNCTION_IMPL(THTensor_(add_AVX2), AVX2),
+  //FUNCTION_IMPL(THTensor_(add_AVX), AVX),
+  //FUNCTION_IMPL(THTensor_(add_SSE), SSE),
+  FUNCTION_IMPL((void *)THTensor_(add_Default), DEFAULT)
+};
+
+
+int THTensor_(cpuDispatchInit)()
+{
+  uint32_t hostSimdExts = detectHostSIMDExtensions();
+
+  // Initialize the dispatch pointers to point to the correct functions
+  for (int i = 0; i < sizeof(THTensor_(dispatchTblAdd)) / sizeof(FunctionDescription); ++i) {
+    THTensor_(dispatchPtrAdd) = THTensor_(dispatchTblAdd)[i].function;
+    if (THTensor_(dispatchTblAdd)[i].supportedSimdExt & hostSimdExts) {
+      break;
+    }
+  }
+  return 0;
+}
+
+#endif
diff --git a/generic/THTensorMathDispatch.h b/generic/THTensorMathDispatch.h
new file mode 100644
index 0000000..e9e350c
--- /dev/null
+++ b/generic/THTensorMathDispatch.h
@@ -0,0 +1,12 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/THTensorMathDispatch.h"
+#else
+
+// NOTE This header will contain the declarations for the dispatch stubs the user actually calls,
+// which will be defined in generic/THCpuDispatchInit.c
+
+TH_API int THTensor_(cpuDispatchInit)();
+
+TH_API void THTensor_(add)(THTensor *r_, THTensor *t, real value);
+
+#endif