ruy/ruy.h - platform/external/ruy - Git at Google

 /* Copyright 2019 Google LLC. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

 // This is the main Ruy public header.

 #ifndef RUY_RUY_RUY_H_
 #define RUY_RUY_RUY_H_

 #include "ruy/context.h"
 #include "ruy/context_get_ctx.h"
 #include "ruy/frontend.h"
 #include "ruy/mat.h"
 #include "ruy/matrix.h"
 #include "ruy/mul_params.h"
 #include "ruy/path.h"

 namespace ruy {

 // Entry point allowing to specify a custom OR-ed set of Path's to
 // compile. See the comments in path.h for more details about that.
 // Most users should use the other ruy::Mul overload not taking a Path template
 // parameter, and the main documentation comment is on that overload.
 template <Path CompiledPaths, typename LhsScalar, typename RhsScalar,
           typename AccumScalar, typename DstScalar>
 void Mul(const Matrix<LhsScalar>& lhs, const Matrix<RhsScalar>& rhs,
          const MulParams<AccumScalar, DstScalar>& mul_params, Context* context,
          Matrix<DstScalar>* dst) {
   Mat<LhsScalar> internal_lhs = ToInternal(lhs);
   Mat<RhsScalar> internal_rhs = ToInternal(rhs);
   Mat<DstScalar> internal_dst = ToInternal(*dst);
   MulFrontEnd<CompiledPaths>(internal_lhs, internal_rhs, mul_params,
                              get_ctx(context), &internal_dst);
 }

 // Performs a multiplication of matrices, with some extra features for
 // neural network applications. The basic operation is:
 //
 //   dst = lhs * rhs    // matrix multiplication
 //
 // The `mul_params` argument conveys additional parameters that are not
 // naturally associated with lhs, rhs, dst. That includes typical neural network
 // application domain specific features such as a bias-vector and clamp bounds,
 // as well as integer quantization parameters.
 //
 // A simple reference implementation of the operation performed by ruy::Mul
 // is provided by the ruy::ReferenceMul function in reference_mul.h.
 //
 // The `context` argument can be any ruy::Context object as long as no other
 // thread is going to concurrently access that ruy::Context. The simplest
 // correct (but not efficient) calling pattern is
 //
 //   ruy::Context context;
 //   ruy::Mul(lhs, rhs, mul_params, &context, dst);
 //
 // However, creating and destroying a new context everytime is inefficient
 // because it doesn't allow for resources to persist across ruy calls. Such
 // resources may include heap allocations, a thread pool, and hardware detection
 // results, and can be expensive to obtain. So the recommended usage pattern is
 // more like this:
 //
 //   // Once during initialization:
 //   ruy::Context* context = new ruy::Context;
 //
 //   // Many times
 //   ruy::Mul(lhs, rhs, mul_params, context, dst);
 //
 // If multiple threads may concurrently be calling ruy::Mul, they must either
 // use separate Contexts, or use a lock to ensure that no two threads are
 // concurrently accessing the Context object. There is no lock inside Context,
 // nothing is done to ensure reentrancy with shared Context objects.
 //
 // Ruy defaults to using only 1 thread. Multi-threading is always opted in to,
 // by calling Context::set_max_num_threads() with an explicit thread count.
 // If multiple threads may concurrently be calling ruy::Mul, it is advisable
 // to set up their respective Context objects with set_max_num_threads so that
 // the overall number of threads doesn't exceed the overall number of threads
 // that the system can usefully execute concurrently
 // (e.g. the number of CPU cores in typical scenarios). At least ruy forces
 // each invocation to make an explicit decision here, there is no automatic
 // detection of the best number of threads to use in ruy.
 template <typename LhsScalar, typename RhsScalar, typename AccumScalar,
           typename DstScalar>
 void Mul(const Matrix<LhsScalar>& lhs, const Matrix<RhsScalar>& rhs,
          const MulParams<AccumScalar, DstScalar>& mul_params, Context* context,
          Matrix<DstScalar>* dst) {
   Mul<kDefaultPaths>(lhs, rhs, mul_params, context, dst);
 }

 }  // namespace ruy

 #endif  // RUY_RUY_RUY_H_
	/* Copyright 2019 Google LLC. All Rights Reserved.

	Licensed under the Apache License, Version 2.0 (the "License");
	you may not use this file except in compliance with the License.
	You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and
	limitations under the License.
	==============================================================================*/

	// This is the main Ruy public header.

	#ifndef RUY_RUY_RUY_H_
	#define RUY_RUY_RUY_H_

	#include "ruy/context.h"
	#include "ruy/context_get_ctx.h"
	#include "ruy/frontend.h"
	#include "ruy/mat.h"
	#include "ruy/matrix.h"
	#include "ruy/mul_params.h"
	#include "ruy/path.h"

	namespace ruy {

	// Entry point allowing to specify a custom OR-ed set of Path's to
	// compile. See the comments in path.h for more details about that.
	// Most users should use the other ruy::Mul overload not taking a Path template
	// parameter, and the main documentation comment is on that overload.
	template <Path CompiledPaths, typename LhsScalar, typename RhsScalar,
	typename AccumScalar, typename DstScalar>
	void Mul(const Matrix<LhsScalar>& lhs, const Matrix<RhsScalar>& rhs,
	const MulParams<AccumScalar, DstScalar>& mul_params, Context* context,
	Matrix<DstScalar>* dst) {
	Mat<LhsScalar> internal_lhs = ToInternal(lhs);
	Mat<RhsScalar> internal_rhs = ToInternal(rhs);
	Mat<DstScalar> internal_dst = ToInternal(*dst);
	MulFrontEnd<CompiledPaths>(internal_lhs, internal_rhs, mul_params,
	get_ctx(context), &internal_dst);
	}

	// Performs a multiplication of matrices, with some extra features for
	// neural network applications. The basic operation is:
	//
	// dst = lhs * rhs // matrix multiplication
	//
	// The `mul_params` argument conveys additional parameters that are not
	// naturally associated with lhs, rhs, dst. That includes typical neural network
	// application domain specific features such as a bias-vector and clamp bounds,
	// as well as integer quantization parameters.
	//
	// A simple reference implementation of the operation performed by ruy::Mul
	// is provided by the ruy::ReferenceMul function in reference_mul.h.
	//
	// The `context` argument can be any ruy::Context object as long as no other
	// thread is going to concurrently access that ruy::Context. The simplest
	// correct (but not efficient) calling pattern is
	//
	// ruy::Context context;
	// ruy::Mul(lhs, rhs, mul_params, &context, dst);
	//
	// However, creating and destroying a new context everytime is inefficient
	// because it doesn't allow for resources to persist across ruy calls. Such
	// resources may include heap allocations, a thread pool, and hardware detection
	// results, and can be expensive to obtain. So the recommended usage pattern is
	// more like this:
	//
	// // Once during initialization:
	// ruy::Context* context = new ruy::Context;
	//
	// // Many times
	// ruy::Mul(lhs, rhs, mul_params, context, dst);
	//
	// If multiple threads may concurrently be calling ruy::Mul, they must either
	// use separate Contexts, or use a lock to ensure that no two threads are
	// concurrently accessing the Context object. There is no lock inside Context,
	// nothing is done to ensure reentrancy with shared Context objects.
	//
	// Ruy defaults to using only 1 thread. Multi-threading is always opted in to,
	// by calling Context::set_max_num_threads() with an explicit thread count.
	// If multiple threads may concurrently be calling ruy::Mul, it is advisable
	// to set up their respective Context objects with set_max_num_threads so that
	// the overall number of threads doesn't exceed the overall number of threads
	// that the system can usefully execute concurrently
	// (e.g. the number of CPU cores in typical scenarios). At least ruy forces
	// each invocation to make an explicit decision here, there is no automatic
	// detection of the best number of threads to use in ruy.
	template <typename LhsScalar, typename RhsScalar, typename AccumScalar,
	typename DstScalar>
	void Mul(const Matrix<LhsScalar>& lhs, const Matrix<RhsScalar>& rhs,
	const MulParams<AccumScalar, DstScalar>& mul_params, Context* context,
	Matrix<DstScalar>* dst) {
	Mul<kDefaultPaths>(lhs, rhs, mul_params, context, dst);
	}

	} // namespace ruy

	#endif // RUY_RUY_RUY_H_