tensorflow/core/common_runtime/process_state.h - platform/external/tensorflow - Git at Google

 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_PROCESS_STATE_H_
 #define TENSORFLOW_CORE_COMMON_RUNTIME_PROCESS_STATE_H_

 #include <functional>
 #include <map>
 #include <unordered_map>
 #include <vector>

 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/thread_annotations.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/protobuf/config.pb.h"

 namespace tensorflow {

 class Allocator;
 class VisitableAllocator;
 class PoolAllocator;

 // Singleton that manages per-process state, e.g. allocation of
 // shared resources.
 class ProcessState {
  public:
   static ProcessState* singleton();

   // Descriptor for memory allocation attributes, used by optional
   // runtime correctness analysis logic.
   struct MemDesc {
     enum MemLoc { CPU, GPU };
     MemLoc loc;
     int dev_index;
     bool gpu_registered;
     bool nic_registered;
     MemDesc()
         : loc(CPU),
           dev_index(0),
           gpu_registered(false),
           nic_registered(false) {}
     string DebugString();
   };

   // If NUMA Allocators are desired, call this before calling any
   // Allocator accessor.
   void EnableNUMA() { numa_enabled_ = true; }

   // Returns what we know about the memory at ptr.
   // If we know nothing, it's called CPU 0 with no other attributes.
   MemDesc PtrType(const void* ptr);

   // Returns the one CPUAllocator used for the given numa_node.
   // TEMPORARY: ignores numa_node.
   VisitableAllocator* GetCPUAllocator(int numa_node);

   typedef std::unordered_map<const void*, MemDesc> MDMap;

  protected:
   ProcessState();
   friend class GPUProcessState;

   // If these flags need to be runtime configurable consider adding
   // them to ConfigProto.
   static const bool FLAGS_brain_mem_reg_cuda_dma = true;
   static const bool FLAGS_brain_gpu_record_mem_types = false;

   // Helper method for unit tests to reset the ProcessState singleton by
   // cleaning up everything. Never use in production.
   virtual void TestOnlyReset();

   static ProcessState* instance_;
   bool numa_enabled_;

   mutex mu_;

   std::vector<VisitableAllocator*> cpu_allocators_ GUARDED_BY(mu_);

   virtual ~ProcessState();

   // Optional RecordingAllocators that wrap the corresponding
   // Allocators for runtime attribute use analysis.
   MDMap mem_desc_map_;
   std::vector<Allocator*> cpu_al_ GUARDED_BY(mu_);
 };

 namespace internal {
 class RecordingAllocator : public Allocator {
  public:
   RecordingAllocator(ProcessState::MDMap* mm, Allocator* a,
                      ProcessState::MemDesc md, mutex* mu)
       : mm_(mm), a_(a), md_(md), mu_(mu) {}

   string Name() override { return a_->Name(); }
   void* AllocateRaw(size_t alignment, size_t num_bytes) override {
     void* p = a_->AllocateRaw(alignment, num_bytes);
     mutex_lock l(*mu_);
     (*mm_)[p] = md_;
     return p;
   }
   void DeallocateRaw(void* p) override {
     mutex_lock l(*mu_);
     auto iter = mm_->find(p);
     mm_->erase(iter);
     a_->DeallocateRaw(p);
   }
   bool TracksAllocationSizes() override { return a_->TracksAllocationSizes(); }
   size_t RequestedSize(const void* p) override { return a_->RequestedSize(p); }
   size_t AllocatedSize(const void* p) override { return a_->AllocatedSize(p); }
   void GetStats(AllocatorStats* stats) override { a_->GetStats(stats); }
   void ClearStats() override { a_->ClearStats(); }
   ProcessState::MDMap* mm_;  // not owned
   Allocator* a_;             // not owned
   ProcessState::MemDesc md_;
   mutex* mu_;
 };
 }  // namespace internal
 }  // namespace tensorflow
 #endif  // TENSORFLOW_CORE_COMMON_RUNTIME_PROCESS_STATE_H_
	/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.

	Licensed under the Apache License, Version 2.0 (the "License");
	you may not use this file except in compliance with the License.
	You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and
	limitations under the License.
	==============================================================================*/

	#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_PROCESS_STATE_H_
	#define TENSORFLOW_CORE_COMMON_RUNTIME_PROCESS_STATE_H_

	#include <functional>
	#include <map>
	#include <unordered_map>
	#include <vector>

	#include "tensorflow/core/framework/allocator.h"
	#include "tensorflow/core/platform/mutex.h"
	#include "tensorflow/core/platform/thread_annotations.h"
	#include "tensorflow/core/platform/types.h"
	#include "tensorflow/core/protobuf/config.pb.h"

	namespace tensorflow {

	class Allocator;
	class VisitableAllocator;
	class PoolAllocator;

	// Singleton that manages per-process state, e.g. allocation of
	// shared resources.
	class ProcessState {
	public:
	static ProcessState* singleton();

	// Descriptor for memory allocation attributes, used by optional
	// runtime correctness analysis logic.
	struct MemDesc {
	enum MemLoc { CPU, GPU };
	MemLoc loc;
	int dev_index;
	bool gpu_registered;
	bool nic_registered;
	MemDesc()
	: loc(CPU),
	dev_index(0),
	gpu_registered(false),
	nic_registered(false) {}
	string DebugString();
	};

	// If NUMA Allocators are desired, call this before calling any
	// Allocator accessor.
	void EnableNUMA() { numa_enabled_ = true; }

	// Returns what we know about the memory at ptr.
	// If we know nothing, it's called CPU 0 with no other attributes.
	MemDesc PtrType(const void* ptr);

	// Returns the one CPUAllocator used for the given numa_node.
	// TEMPORARY: ignores numa_node.
	VisitableAllocator* GetCPUAllocator(int numa_node);

	typedef std::unordered_map<const void*, MemDesc> MDMap;

	protected:
	ProcessState();
	friend class GPUProcessState;

	// If these flags need to be runtime configurable consider adding
	// them to ConfigProto.
	static const bool FLAGS_brain_mem_reg_cuda_dma = true;
	static const bool FLAGS_brain_gpu_record_mem_types = false;

	// Helper method for unit tests to reset the ProcessState singleton by
	// cleaning up everything. Never use in production.
	virtual void TestOnlyReset();

	static ProcessState* instance_;
	bool numa_enabled_;

	mutex mu_;

	std::vector<VisitableAllocator*> cpu_allocators_ GUARDED_BY(mu_);

	virtual ~ProcessState();

	// Optional RecordingAllocators that wrap the corresponding
	// Allocators for runtime attribute use analysis.
	MDMap mem_desc_map_;
	std::vector<Allocator*> cpu_al_ GUARDED_BY(mu_);
	};

	namespace internal {
	class RecordingAllocator : public Allocator {
	public:
	RecordingAllocator(ProcessState::MDMap* mm, Allocator* a,
	ProcessState::MemDesc md, mutex* mu)
	: mm_(mm), a_(a), md_(md), mu_(mu) {}

	string Name() override { return a_->Name(); }
	void* AllocateRaw(size_t alignment, size_t num_bytes) override {
	void* p = a_->AllocateRaw(alignment, num_bytes);
	mutex_lock l(*mu_);
	(*mm_)[p] = md_;
	return p;
	}
	void DeallocateRaw(void* p) override {
	mutex_lock l(*mu_);
	auto iter = mm_->find(p);
	mm_->erase(iter);
	a_->DeallocateRaw(p);
	}
	bool TracksAllocationSizes() override { return a_->TracksAllocationSizes(); }
	size_t RequestedSize(const void* p) override { return a_->RequestedSize(p); }
	size_t AllocatedSize(const void* p) override { return a_->AllocatedSize(p); }
	void GetStats(AllocatorStats* stats) override { a_->GetStats(stats); }
	void ClearStats() override { a_->ClearStats(); }
	ProcessState::MDMap* mm_; // not owned
	Allocator* a_; // not owned
	ProcessState::MemDesc md_;
	mutex* mu_;
	};
	} // namespace internal
	} // namespace tensorflow
	#endif // TENSORFLOW_CORE_COMMON_RUNTIME_PROCESS_STATE_H_