arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp - platform/external/ComputeLibrary - Git at Google

 /*
  * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
  * deal in the Software without restriction, including without limitation the
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in all
  * copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 #pragma once

 #include <memory>
 #include <cstring>

 #include "arm_gemm_local.hpp"
 #include "gemm_common.hpp"

 namespace arm_gemm {

 enum class GemmMethod
 {
     DEFAULT,
     GEMV_BATCHED,
     GEMV_PRETRANSPOSED,
     GEMV_NATIVE_TRANSPOSED,
     GEMM_NATIVE,
     GEMM_HYBRID,
     GEMM_INTERLEAVED,
     QUANTIZE_WRAPPER,
     GEMM_HYBRID_QUANTIZED
 };

 struct KernelDescription
 {
     GemmMethod   method      = GemmMethod::DEFAULT;
     std::string  name        = "";
     bool         is_default  = false;

     KernelDescription(GemmMethod m, std::string n, bool d=false) : method(m), name(n), is_default(d) { }
     KernelDescription() { }
 };

 struct GemmConfig
 {
     GemmMethod   method           = GemmMethod::DEFAULT;
     std::string  filter           = "";
     unsigned int inner_block_size = 0;
     unsigned int outer_block_size = 0;

     GemmConfig(GemmMethod method) : method(method) { }
     GemmConfig() { }
 };

 template<typename T>
 struct GemmArgs
 {
 public:
     const CPUInfo    *_ci;
     unsigned int      _Msize;
     unsigned int      _Nsize;
     unsigned int      _Ksize;
     unsigned int      _nbatches;
     unsigned int      _nmulti;
     bool              _trA;
     bool              _trB;
     T                 _alpha;
     T                 _beta;
     int               _maxthreads;
     bool              _pretransposed_hint;
     const GemmConfig *_cfg;

     GemmArgs(const CPUInfo *ci, const unsigned int M, const unsigned int N,
              const unsigned int K, const unsigned int nbatches,
              const unsigned int nmulti, const bool trA, const bool trB,
              const T alpha, const T beta, const int maxthreads,
              const bool pretransposed_hint, const GemmConfig *cfg=nullptr ) :
              _ci(ci), _Msize(M), _Nsize(N), _Ksize(K), _nbatches(nbatches), _nmulti(nmulti),
              _trA(trA), _trB(trB), _alpha(alpha), _beta(beta), _maxthreads(maxthreads),
              _pretransposed_hint(pretransposed_hint), _cfg(cfg)
     {
     }
 };

 struct ARequantizeLayer32
 {
 public:
     const int32_t  *bias;
     int32_t         a_offset;
     int32_t         b_offset;
     int32_t         c_offset;
     int32_t         requant_shift;
     int32_t         requant_mul;
     int32_t         minval;
     int32_t         maxval;

     ARequantizeLayer32() = default;

     ARequantizeLayer32(int32_t *b, int32_t ao, int32_t bo, int32_t co, int32_t rs, int32_t rm, int32_t minv, int32_t maxv) :
         bias(b), a_offset(ao), b_offset(bo), c_offset(co), requant_shift(rs), requant_mul(rm), minval(minv), maxval(maxv)
     {
     }
 };

 struct Nothing
 {
 };

 template<typename Top, typename Tret>
 using UniqueGemmCommon = std::unique_ptr<GemmCommon<Top, Tret> >;

 /* Low level API calls.
  * These are implemented as 'GemmArgs' versions, or with the arguments explicitly listed. */

 /* get_gemm_method(): Given the templated types and provided parameters,
  * which is the preferred method to implement this GEMM?  */
 template<typename Top, typename Tret, class OutputStage = Nothing>
 KernelDescription get_gemm_method(const GemmArgs<Tret> &args, const OutputStage & ={});

 template<typename Top, typename Tret, class OutputStage = Nothing>
 UniqueGemmCommon<Top, Tret> gemm(const GemmArgs<Tret> &args, const OutputStage & ={});

 template<typename Top, typename Tret, class OutputStage = Nothing>
 std::vector<KernelDescription> get_compatible_kernels(const GemmArgs<Tret> &args, const OutputStage & ={});

 } // namespace arm_gemm
	/*
	* Copyright (c) 2018-2019 ARM Limited.
	*
	* SPDX-License-Identifier: MIT
	*
	* Permission is hereby granted, free of charge, to any person obtaining a copy
	* of this software and associated documentation files (the "Software"), to
	* deal in the Software without restriction, including without limitation the
	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
	* sell copies of the Software, and to permit persons to whom the Software is
	* furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice shall be included in all
	* copies or substantial portions of the Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	* SOFTWARE.
	*/
	#pragma once

	#include <memory>
	#include <cstring>

	#include "arm_gemm_local.hpp"
	#include "gemm_common.hpp"

	namespace arm_gemm {

	enum class GemmMethod
	{
	DEFAULT,
	GEMV_BATCHED,
	GEMV_PRETRANSPOSED,
	GEMV_NATIVE_TRANSPOSED,
	GEMM_NATIVE,
	GEMM_HYBRID,
	GEMM_INTERLEAVED,
	QUANTIZE_WRAPPER,
	GEMM_HYBRID_QUANTIZED
	};

	struct KernelDescription
	{
	GemmMethod method = GemmMethod::DEFAULT;
	std::string name = "";
	bool is_default = false;

	KernelDescription(GemmMethod m, std::string n, bool d=false) : method(m), name(n), is_default(d) { }
	KernelDescription() { }
	};

	struct GemmConfig
	{
	GemmMethod method = GemmMethod::DEFAULT;
	std::string filter = "";
	unsigned int inner_block_size = 0;
	unsigned int outer_block_size = 0;

	GemmConfig(GemmMethod method) : method(method) { }
	GemmConfig() { }
	};

	template<typename T>
	struct GemmArgs
	{
	public:
	const CPUInfo *_ci;
	unsigned int _Msize;
	unsigned int _Nsize;
	unsigned int _Ksize;
	unsigned int _nbatches;
	unsigned int _nmulti;
	bool _trA;
	bool _trB;
	T _alpha;
	T _beta;
	int _maxthreads;
	bool _pretransposed_hint;
	const GemmConfig *_cfg;

	GemmArgs(const CPUInfo *ci, const unsigned int M, const unsigned int N,
	const unsigned int K, const unsigned int nbatches,
	const unsigned int nmulti, const bool trA, const bool trB,
	const T alpha, const T beta, const int maxthreads,
	const bool pretransposed_hint, const GemmConfig *cfg=nullptr ) :
	_ci(ci), _Msize(M), _Nsize(N), _Ksize(K), _nbatches(nbatches), _nmulti(nmulti),
	_trA(trA), _trB(trB), _alpha(alpha), _beta(beta), _maxthreads(maxthreads),
	_pretransposed_hint(pretransposed_hint), _cfg(cfg)
	{
	}
	};

	struct ARequantizeLayer32
	{
	public:
	const int32_t *bias;
	int32_t a_offset;
	int32_t b_offset;
	int32_t c_offset;
	int32_t requant_shift;
	int32_t requant_mul;
	int32_t minval;
	int32_t maxval;

	ARequantizeLayer32() = default;

	ARequantizeLayer32(int32_t *b, int32_t ao, int32_t bo, int32_t co, int32_t rs, int32_t rm, int32_t minv, int32_t maxv) :
	bias(b), a_offset(ao), b_offset(bo), c_offset(co), requant_shift(rs), requant_mul(rm), minval(minv), maxval(maxv)
	{
	}
	};

	struct Nothing
	{
	};

	template<typename Top, typename Tret>
	using UniqueGemmCommon = std::unique_ptr<GemmCommon<Top, Tret> >;

	/* Low level API calls.
	* These are implemented as 'GemmArgs' versions, or with the arguments explicitly listed. */

	/* get_gemm_method(): Given the templated types and provided parameters,
	* which is the preferred method to implement this GEMM? */
	template<typename Top, typename Tret, class OutputStage = Nothing>
	KernelDescription get_gemm_method(const GemmArgs<Tret> &args, const OutputStage & ={});

	template<typename Top, typename Tret, class OutputStage = Nothing>
	UniqueGemmCommon<Top, Tret> gemm(const GemmArgs<Tret> &args, const OutputStage & ={});

	template<typename Top, typename Tret, class OutputStage = Nothing>
	std::vector<KernelDescription> get_compatible_kernels(const GemmArgs<Tret> &args, const OutputStage & ={});

	} // namespace arm_gemm