caffe2/core/context.h - platform/external/pytorch - Git at Google

 #ifndef CAFFE2_CORE_CONTEXT_H_
 #define CAFFE2_CORE_CONTEXT_H_

 #include <cstdlib>
 #include <ctime>
 #include <random>
 #include <unordered_map>

 #include "caffe2/core/allocator.h"
 #include "caffe2/core/event.h"
 #include "caffe2/core/logging.h"
 #include "caffe2/core/typeid.h"
 #include "caffe2/proto/caffe2.pb.h"
 #include "caffe2/utils/math.h"

 CAFFE2_DECLARE_bool(caffe2_report_cpu_memory_usage);

 namespace caffe2 {

 /**
  * The CPU Context, representing the bare minimum of what a Context class in
  * Caffe2 should implement.
  *
  * See operator.h, especially Operator<Context>, for how Context are used in
  * actual operator implementations that are associated with specific devices.
  * In general, the Context class is passed in as a template argument, and
  * the operator can use the functions defined in the context to execute whatever
  * computation it has.
  *
  * A Context defines all the necessities to run an operator on a specific
  * device. Specific Context classes have the freedom to choose what functions it
  * implements, but there are a few functions that you should consider
  * implementing if you want to write your own context class:
  * - void SwitchToDevice(): any necessary code to switch to the device before
  *     running anything.
  * - void WaitEvent(const Event& ev): make the current context to wait on
  *     an event. For example, for cuda, this is the equivalent of
  *     cudaStreamWaitEvent. For CPU context, it essentially synchronizes the
  *     event.
  * - void Record(Event* ev): record the async activities on the current context
  *     to the event. For example, for cuda, this is the equivalent of
  *     cudaEventRecord on the current stream. For CPU context, it is always
  *     synchronous.
  * - void FinishDeviceComputation(): any wrapping-up work after all the
  *     computation of the operator is done. If there are errors during the
  *     execution, throw exception. For example, in a CUDAContext, this function
  *     carries out a stream synchronization and spots potential errors for
  *     the cuda kernel calls.
  * - static std::pair<void*, MemoryDeleter> New(size_t nbytes): allocates
        memory and returns a deleter.
  * - template <class SrcContext, class DstContext> void CopyBytes(...): does
  *     cross context memory copy.
  * - template <typename T, class SrcContext, class DstContext> void Copy(...):
  *     usually a simple wrapper around the above CopyBytes function.
  *
  * We intentionally did not create a base class for the various possible Context
  * classes there might be, since they are intended to be specified during
  * compile time using templates rather than via polymorphism. You should also
  * not have classes derived from existing context classes.
  */
 class CPUContext final {
  public:
   typedef std::mt19937 rand_gen_type;
   CPUContext() : random_seed_(math::randomNumberSeed()) {}
   explicit CPUContext(const DeviceOption& option)
       : random_seed_(
             option.has_random_seed() ? option.random_seed()
                                      : math::randomNumberSeed()) {
     CAFFE_ENFORCE_EQ(option.device_type(), CPU);
   }

   ~CPUContext() noexcept {}

   inline void SwitchToDevice(int /*stream_id*/) {}
   inline void SwitchToDevice() {
     SwitchToDevice(0);
   }

   inline void WaitEvent(const Event& ev) {
     ev.Wait(CPU, this);
   }
   inline void Record(Event* ev) const {
     CAFFE_ENFORCE(ev, "Event must not be null.");
     ev->Record(CPU, this);
   }

   inline void FinishDeviceComputation() {}

   inline rand_gen_type& RandGenerator() {
     if (!random_generator_.get()) {
       random_generator_.reset(new rand_gen_type(random_seed_));
     }
     return *random_generator_.get();
   }

   static std::pair<void*, MemoryDeleter> New(size_t nbytes) {
     auto data_and_deleter = GetCPUAllocator()->New(nbytes);
     if (FLAGS_caffe2_report_cpu_memory_usage) {
       reporter_.New(data_and_deleter.first, nbytes);
       data_and_deleter.second = ReportAndDelete;
     }
     return data_and_deleter;
   }

   // Two copy functions that deals with cross-device copies.
   template <class SrcContext, class DstContext>
   inline void CopyBytes(size_t nbytes, const void* src, void* dst);

   template <typename T, class SrcContext, class DstContext>
   inline void Copy(size_t n, const T* src, T* dst) {
     if (std::is_fundamental<T>::value) {
       CopyBytes<SrcContext, DstContext>(n * sizeof(T),
                                      static_cast<const void*>(src),
                                      static_cast<void*>(dst));
     } else {
       for (int i = 0; i < n; ++i) {
         dst[i] = src[i];
       }
     }
   }

   template <class SrcContext, class DstContext>
   inline void
   CopyItems(const TypeMeta& meta, size_t n, const void* src, void* dst) {
     if (meta.copy()) {
       meta.copy()(src, dst, n);
     } else {
       CopyBytes<SrcContext, DstContext>(n * meta.itemsize(), src, dst);
     }
   }

  protected:
   // TODO(jiayq): instead of hard-coding a generator, make it more flexible.
   int random_seed_{1701};
   std::unique_ptr<rand_gen_type> random_generator_;
   static MemoryAllocationReporter reporter_;

  private:
   static void ReportAndDelete(void* ptr) {
     reporter_.Delete(ptr);
     GetCPUAllocator()->GetDeleter()(ptr);
   }
 };

 template<>
 inline void CPUContext::CopyBytes<CPUContext, CPUContext>(
     size_t nbytes, const void* src, void* dst) {
   if (nbytes == 0) {
     return;
   }
   CAFFE_ENFORCE(src);
   CAFFE_ENFORCE(dst);
   memcpy(dst, src, nbytes);
 }

 }  // namespace caffe2

 #endif  // CAFFE2_CORE_CONTEXT_H_
	#ifndef CAFFE2_CORE_CONTEXT_H_
	#define CAFFE2_CORE_CONTEXT_H_

	#include <cstdlib>
	#include <ctime>
	#include <random>
	#include <unordered_map>

	#include "caffe2/core/allocator.h"
	#include "caffe2/core/event.h"
	#include "caffe2/core/logging.h"
	#include "caffe2/core/typeid.h"
	#include "caffe2/proto/caffe2.pb.h"
	#include "caffe2/utils/math.h"

	CAFFE2_DECLARE_bool(caffe2_report_cpu_memory_usage);

	namespace caffe2 {

	/**
	* The CPU Context, representing the bare minimum of what a Context class in
	* Caffe2 should implement.
	*
	* See operator.h, especially Operator<Context>, for how Context are used in
	* actual operator implementations that are associated with specific devices.
	* In general, the Context class is passed in as a template argument, and
	* the operator can use the functions defined in the context to execute whatever
	* computation it has.
	*
	* A Context defines all the necessities to run an operator on a specific
	* device. Specific Context classes have the freedom to choose what functions it
	* implements, but there are a few functions that you should consider
	* implementing if you want to write your own context class:
	* - void SwitchToDevice(): any necessary code to switch to the device before
	* running anything.
	* - void WaitEvent(const Event& ev): make the current context to wait on
	* an event. For example, for cuda, this is the equivalent of
	* cudaStreamWaitEvent. For CPU context, it essentially synchronizes the
	* event.
	* - void Record(Event* ev): record the async activities on the current context
	* to the event. For example, for cuda, this is the equivalent of
	* cudaEventRecord on the current stream. For CPU context, it is always
	* synchronous.
	* - void FinishDeviceComputation(): any wrapping-up work after all the
	* computation of the operator is done. If there are errors during the
	* execution, throw exception. For example, in a CUDAContext, this function
	* carries out a stream synchronization and spots potential errors for
	* the cuda kernel calls.
	* - static std::pair<void*, MemoryDeleter> New(size_t nbytes): allocates
	memory and returns a deleter.
	* - template <class SrcContext, class DstContext> void CopyBytes(...): does
	* cross context memory copy.
	* - template <typename T, class SrcContext, class DstContext> void Copy(...):
	* usually a simple wrapper around the above CopyBytes function.
	*
	* We intentionally did not create a base class for the various possible Context
	* classes there might be, since they are intended to be specified during
	* compile time using templates rather than via polymorphism. You should also
	* not have classes derived from existing context classes.
	*/
	class CPUContext final {
	public:
	typedef std::mt19937 rand_gen_type;
	CPUContext() : random_seed_(math::randomNumberSeed()) {}
	explicit CPUContext(const DeviceOption& option)
	: random_seed_(
	option.has_random_seed() ? option.random_seed()
	: math::randomNumberSeed()) {
	CAFFE_ENFORCE_EQ(option.device_type(), CPU);
	}

	~CPUContext() noexcept {}

	inline void SwitchToDevice(int /stream_id/) {}
	inline void SwitchToDevice() {
	SwitchToDevice(0);
	}

	inline void WaitEvent(const Event& ev) {
	ev.Wait(CPU, this);
	}
	inline void Record(Event* ev) const {
	CAFFE_ENFORCE(ev, "Event must not be null.");
	ev->Record(CPU, this);
	}

	inline void FinishDeviceComputation() {}

	inline rand_gen_type& RandGenerator() {
	if (!random_generator_.get()) {
	random_generator_.reset(new rand_gen_type(random_seed_));
	}
	return *random_generator_.get();
	}

	static std::pair<void*, MemoryDeleter> New(size_t nbytes) {
	auto data_and_deleter = GetCPUAllocator()->New(nbytes);
	if (FLAGS_caffe2_report_cpu_memory_usage) {
	reporter_.New(data_and_deleter.first, nbytes);
	data_and_deleter.second = ReportAndDelete;
	}
	return data_and_deleter;
	}

	// Two copy functions that deals with cross-device copies.
	template <class SrcContext, class DstContext>
	inline void CopyBytes(size_t nbytes, const void* src, void* dst);

	template <typename T, class SrcContext, class DstContext>
	inline void Copy(size_t n, const T* src, T* dst) {
	if (std::is_fundamental<T>::value) {
	CopyBytes<SrcContext, DstContext>(n * sizeof(T),
	static_cast<const void*>(src),
	static_cast<void*>(dst));
	} else {
	for (int i = 0; i < n; ++i) {
	dst[i] = src[i];
	}
	}
	}

	template <class SrcContext, class DstContext>
	inline void
	CopyItems(const TypeMeta& meta, size_t n, const void* src, void* dst) {
	if (meta.copy()) {
	meta.copy()(src, dst, n);
	} else {
	CopyBytes<SrcContext, DstContext>(n * meta.itemsize(), src, dst);
	}
	}

	protected:
	// TODO(jiayq): instead of hard-coding a generator, make it more flexible.
	int random_seed_{1701};
	std::unique_ptr<rand_gen_type> random_generator_;
	static MemoryAllocationReporter reporter_;

	private:
	static void ReportAndDelete(void* ptr) {
	reporter_.Delete(ptr);
	GetCPUAllocator()->GetDeleter()(ptr);
	}
	};

	template<>
	inline void CPUContext::CopyBytes<CPUContext, CPUContext>(
	size_t nbytes, const void* src, void* dst) {
	if (nbytes == 0) {
	return;
	}
	CAFFE_ENFORCE(src);
	CAFFE_ENFORCE(dst);
	memcpy(dst, src, nbytes);
	}

	} // namespace caffe2

	#endif // CAFFE2_CORE_CONTEXT_H_