| #ifndef CAFFE2_UTILS_MATH_H_ |
| #define CAFFE2_UTILS_MATH_H_ |
| // This is a simple translation from the old Caffe math interfaces. We aim to |
| // still keep it simple, so all platforms would be able to support it fairly |
| // easily. |
| |
| // We include the cblas header here so that we can obtain the macros from cblas. |
| extern "C" { |
| #include "caffe2/utils/cblas.h" |
| } |
| |
| #ifdef CAFFE2_USE_ACCELERATE |
| #include <Accelerate/Accelerate.h> |
| #endif // CAFFE2_USE_ACCELERATE |
| |
| #include "caffe2/core/common.h" |
| #include "caffe2/core/types.h" |
| #include "Eigen/Core" |
| #include "Eigen/Dense" |
| |
| namespace caffe2 { |
| |
| // An empty class as a placeholder for a math function that has no specific |
| // engine specified. |
| class DefaultEngine {}; |
| |
| // Common Eigen types that we will often use |
| template <typename T> |
| using EigenMatrixMap = |
| Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> >; |
| template <typename T> |
| using EigenArrayMap = |
| Eigen::Map<Eigen::Array<T, Eigen::Dynamic, Eigen::Dynamic> >; |
| template <typename T> |
| using EigenVectorMap = Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, 1> >; |
| template <typename T> |
| using EigenVectorArrayMap = Eigen::Map<Eigen::Array<T, Eigen::Dynamic, 1> >; |
| template <typename T> |
| using ConstEigenMatrixMap = |
| Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> >; |
| template <typename T> |
| using ConstEigenArrayMap = |
| Eigen::Map<const Eigen::Array<T, Eigen::Dynamic, Eigen::Dynamic> >; |
| template <typename T> |
| using ConstEigenVectorMap = |
| Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1> >; |
| template <typename T> |
| using ConstEigenVectorArrayMap = |
| Eigen::Map<const Eigen::Array<T, Eigen::Dynamic, 1> >; |
| |
| namespace math { |
| |
| template <typename T, class Context> |
| void Exp(const int N, const T* x, T* y, Context* context); |
| template <typename T, class Context> |
| void Log(const int N, const T* x, T* y, Context* context); |
| template <typename T, class Context> |
| void Sqr(const int N, const T* x, T* y, Context* context); |
| |
| template <typename T, class Context> |
| void Not(const int N, const T* x, T* y, Context* context); |
| |
| template <typename T, class Context> |
| void Powx(const int N, const T* a, const T b, T* y, Context* context); |
| |
| #define CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(name) \ |
| template <typename T, class Context> \ |
| void name(const int N, const T* a, const T* b, bool* y, Context* context); \ |
| template <typename T, class Context> \ |
| void name##ToRow( \ |
| const int M, \ |
| const int N, \ |
| const T* a, \ |
| const T* b, \ |
| bool* y, \ |
| Context* context); |
| |
| CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(LT); |
| CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(LE); |
| CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(GT); |
| CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(GE); |
| |
| CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(And); |
| CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(Or); |
| CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(Xor); |
| |
| #undef CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT |
| |
| #define CAFFE2_DECLARE_BINARY_OP(name) \ |
| template <typename T, class Context> \ |
| void name(const int N, const T* a, const T* b, T* y, Context* context); \ |
| template <typename T, class Context> \ |
| void name##ToRow( \ |
| const int M, \ |
| const int N, \ |
| const T* a, \ |
| const T* b, \ |
| T* y, \ |
| Context* context); \ |
| template <typename T, class Context> \ |
| void name##ToRow( \ |
| const int M, const int N, const T* x, T* y, Context* context); \ |
| template <typename T, class Context> \ |
| void name##ToCol( \ |
| const int M, const int N, const T* x, T* y, Context* context); |
| |
| CAFFE2_DECLARE_BINARY_OP(Add); |
| CAFFE2_DECLARE_BINARY_OP(Sub); |
| CAFFE2_DECLARE_BINARY_OP(Mul); |
| CAFFE2_DECLARE_BINARY_OP(Div); |
| |
| #undef CAFFE2_DECLARE_BINARY_OP |
| |
| // Compute the row-wise max of a N*D matrix X, and write it to a N |
| // dimensional vector y. |
| template <typename T, class Context> |
| void RowwiseMax(const int N, const int D, const T* x, T* y, |
| Context* context); |
| |
| // Compute the column-wise max of a N*D matrix X, and write it to a D |
| // dimensional vector y. |
| template <typename T, class Context> |
| void ColwiseMax(const int N, const int D, const T* x, T* y, |
| Context* context); |
| |
| // Decaf gemm provides a simpler interface to the gemm functions, with the |
| // limitation that the data has to be contiguous in memory. |
| template <typename T, class Context, class Engine=DefaultEngine> |
| void Gemm(const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, |
| const int M, const int N, const int K, const T alpha, const T* A, |
| const T* B, const T beta, T* C, Context* context); |
| |
| // We also provide a gemm that has explicit lda, ldb and ldc specified. |
| // In most cases you probably want to use the function above, though. |
| template <typename T, class Context, class Engine = DefaultEngine> |
| void GemmEx( |
| const CBLAS_TRANSPOSE TransA, |
| const CBLAS_TRANSPOSE TransB, |
| const int M, |
| const int N, |
| const int K, |
| const T alpha, |
| const T* A, |
| const int lda, |
| const T* B, |
| const int ldb, |
| const T beta, |
| T* C, |
| const int ldc, |
| Context* context); |
| |
| // Gemv always takes in a M*N matrix A, and depending on whether we set TransA |
| // to Trans, the output is: |
| // CblasNoTrans: x is an N dim vector and y is an M dim vector. |
| // CblasTrans: x is an M dim vector and y is an N dim vector. |
| template <typename T, class Context, class Engine=DefaultEngine> |
| void Gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N, |
| const T alpha, const T* A, const T* x, const T beta, |
| T* y, Context* context); |
| |
| template <typename T, class Context> |
| void Set(const TIndex N, const T alpha, T* X, Context* context); |
| |
| template <typename T, class Context> |
| void RandUniform(const int n, const T a, const T b, T* r, |
| Context* context); |
| |
| template <typename T, class Context> |
| void RandUniformUnique( |
| const size_t n, |
| const T a, |
| const T b, |
| T* r, |
| const size_t m, |
| const T* avoid, |
| Context* context); |
| |
| template <typename T, class Context> |
| void RandGaussian( |
| const int n, |
| const T mean, |
| const T std, |
| T* r, |
| Context* context); |
| |
| // Dot matrix of vector a and b, and writes the result to a single value y. |
| template <typename T, class Context> |
| void Dot(const int N, const T* a, const T* b, T* y, Context* context); |
| |
| // Sum of vector x, and writes the result to a single value y. |
| template <typename T, class Context> |
| void Sum(const int N, const T* x, T* y, Context* context); |
| |
| // Select does index selection of the rows a N*D matrix x, and gives the N |
| // dimensional vector y that contains the selected data. |
| template <typename T, class Context> |
| void Select(const int N, const int D, const T* x, const int* idx, T* y, |
| Context* context); |
| |
| // For small FixedValues (like FixedSize=1) the function might provide more |
| // efficent implementation hard-coded statically for this size. |
| template <typename T, class Context, int FixedSize = -1> |
| void Scale(const int N, const T alpha, const T* x, T* y, |
| Context* context); |
| |
| // Different from the Scale function above, if alpha is passed in |
| // as a pointer, we will assume that it lives on the Context device, |
| // for example on GPU. |
| template <typename T, class Context> |
| void Scale(const int N, const T* alpha, const T* x, T* y, |
| Context* context); |
| |
| // For small FixedValues (like FixedSize=1) the function might provide more |
| // efficent implementation hard-coded statically for this size. |
| template <typename T, class Context, int FixedSize = -1> |
| void Axpy(const int N, const T alpha, const T* x, T* y, Context* context); |
| |
| // Different from the Axpy function above, if alpha is passed in |
| // as a pointer, we will assume that it lives on the Context device, |
| // for example on GPU. |
| template <typename T, class Context> |
| void Axpy(const int N, const T* alpha, const T* x, T* y, |
| Context* context); |
| |
| template <typename T, class Context> |
| void Axpby(const int N, const T alpha, const T* x, const T b, T* y, |
| Context* context); |
| |
| template <typename T, class Context, int order> |
| void Im2col( |
| const T* data_im, |
| const int channels, |
| const int height, |
| const int width, |
| const int kernel_h, |
| const int kernel_w, |
| const int dilation_h, |
| const int dilation_w, |
| const int pad_t, |
| const int pad_l, |
| const int pad_b, |
| const int pad_r, |
| const int stride_h, |
| const int stride_w, |
| T* data_col, |
| Context* context); |
| |
| template <typename T, class Context, int order> |
| void Col2im( |
| const T* data_col, |
| const int channels, |
| const int height, |
| const int width, |
| const int patch_h, |
| const int patch_w, |
| const int dilation_h, |
| const int dilation_w, |
| const int pad_t, |
| const int pad_l, |
| const int pad_b, |
| const int pad_r, |
| const int stride_h, |
| const int stride_w, |
| T* data_im, |
| Context* context); |
| |
| // Applies a per-channel bias value to each channel of the input |
| // image. image_size is H * W |
| template <typename T, class Context> |
| void BiasCHW( |
| const T* bias, |
| const int bias_channels, |
| const int image_size, |
| T* image, |
| Context* context); |
| |
| template <class Context> |
| void CopyMatrix(const size_t item_size, const int M, const int N, const void* A, |
| const int lda, void* B, const int ldb, Context* context); |
| |
| |
| uint32_t randomNumberSeed(); |
| |
| // Function uses casting from int to unsigned to compare if value of |
| // parameter a is greater or equal to zero and lower than value of |
| // parameter b. The b parameter is of type signed and is always |
| // positive, |
| // therefore its value is always lower than 0x800... where casting |
| // negative value of a parameter converts it to value higher than |
| // 0x800... |
| // The casting allows to use one condition instead of two. |
| inline bool is_a_ge_zero_and_a_lt_b(int a, int b) { |
| return static_cast<unsigned>(a) < static_cast<unsigned>(b); |
| } |
| |
| // Calculates ceil(a / b). User must be careful to ensure that there |
| // is no overflow or underflow in the calculation. |
| template <typename T> |
| inline T divUp(T a, T b) { |
| return (a + b - (T) 1) / b; |
| } |
| |
| // Rounds a up to the next highest multiple of b. User must be careful |
| // to ensure that there is no overflow or underflow in the calculation |
| // of divUp. |
| template <typename T> |
| inline T roundUp(T a, T b) { |
| return divUp<T>(a, b) * b; |
| } |
| |
| |
| } // namespace math |
| } // namespace caffe2 |
| |
| #include "caffe2/utils/math-detail.h" |
| #endif // CAFFE2_UTILS_MATH_H_ |