tensorflow/stream_executor/tf_allocator_adapter.h - platform/external/tensorflow - Git at Google

 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

 #ifndef TENSORFLOW_STREAM_EXECUTOR_TF_ALLOCATOR_ADAPTER_H_
 #define TENSORFLOW_STREAM_EXECUTOR_TF_ALLOCATOR_ADAPTER_H_

 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/stream_executor/device_memory.h"
 #include "tensorflow/stream_executor/device_memory_allocator.h"
 #include "tensorflow/stream_executor/lib/statusor.h"
 #include "tensorflow/stream_executor/platform.h"

 namespace stream_executor {

 // Adapter class that wraps a Tensorflow allocator.
 //
 // Assumes that the Tensorflow allocator permits asynchronous deallocation:
 // see comment on `AllowsAsynchronousDeallocation()`.
 class TfAllocatorAdapter : public DeviceMemoryAllocator {
  public:
   TfAllocatorAdapter(const Platform *platform, tensorflow::Allocator *wrapped);
   ~TfAllocatorAdapter() override;

   port::StatusOr<OwningDeviceMemory> Allocate(int device_ordinal, uint64 size,
                                               bool retry_on_failure) override;

   port::Status Deallocate(int device_ordinal, DeviceMemoryBase mem) override;

   // The Tensorflow BFC allocator used on GPU allows host-side deallocation
   // before GPU execution takes place. Tensorflow uses the ordering of the main
   // compute stream to enforce a happens-before relationship between a memory
   // allocation and code that reuses the same memory. If Tensorflow adds
   // support for multiple GPU streams or allocators with different ordering
   // requirements, this code may need to change.
   // (This attribute has no effect on CPU.)
   bool AllowsAsynchronousDeallocation() const override { return true; }

  private:
   tensorflow::Allocator *wrapped_;
 };

 // Adapter class that wraps per-device TF allocators as an XLA allocator.
 // Assumes that the Tensorflow allocator permits asynchronous deallocation;
 // see comment on `AllowsAsynchronousDeallocation()`.
 class MultiDeviceAdapter : public DeviceMemoryAllocator {
  public:
   MultiDeviceAdapter(
       const Platform *platform,
       std::vector<std::unique_ptr<tensorflow::Allocator>> tf_allocators)
       : DeviceMemoryAllocator(platform),
         tf_allocators_(std::move(tf_allocators)) {
     for (const auto &tf_allocator : tf_allocators_) {
       per_device_allocators_.emplace_back(platform, tf_allocator.get());
     }
   }

   port::StatusOr<OwningDeviceMemory> Allocate(int device_ordinal, uint64 size,
                                               bool retry_on_failure) override {
     CHECK_LT(device_ordinal, per_device_allocators_.size());
     return per_device_allocators_[device_ordinal].Allocate(device_ordinal, size,
                                                            retry_on_failure);
   }

   port::Status Deallocate(int device_ordinal, DeviceMemoryBase mem) override {
     CHECK_LT(device_ordinal, per_device_allocators_.size());
     return per_device_allocators_[device_ordinal].Deallocate(device_ordinal,
                                                              mem);
   }

   // The Tensorflow BFC allocator used on GPU allows host-side deallocation
   // before GPU execution takes place. Tensorflow uses the ordering of the main
   // compute stream to enforce a happens-before relationship between a memory
   // allocation and code that reuses the same memory. If Tensorflow adds
   // support for multiple GPU streams or allocators with different ordering
   // requirements, this code may need to change.
   // (This attribute has no effect on CPU.)
   bool AllowsAsynchronousDeallocation() const override { return true; }

  private:
   std::vector<TfAllocatorAdapter> per_device_allocators_;
   // The wrapped TF allocators backing per_device_allocators_ (XlaAllocator does
   // not take ownership of its underlying Allocator).
   std::vector<std::unique_ptr<tensorflow::Allocator>> tf_allocators_;
 };

 }  // namespace stream_executor

 #endif  // TENSORFLOW_STREAM_EXECUTOR_TF_ALLOCATOR_ADAPTER_H_
	/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

	Licensed under the Apache License, Version 2.0 (the "License");
	you may not use this file except in compliance with the License.
	You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and
	limitations under the License.
	==============================================================================*/

	#ifndef TENSORFLOW_STREAM_EXECUTOR_TF_ALLOCATOR_ADAPTER_H_
	#define TENSORFLOW_STREAM_EXECUTOR_TF_ALLOCATOR_ADAPTER_H_

	#include "tensorflow/core/framework/allocator.h"
	#include "tensorflow/stream_executor/device_memory.h"
	#include "tensorflow/stream_executor/device_memory_allocator.h"
	#include "tensorflow/stream_executor/lib/statusor.h"
	#include "tensorflow/stream_executor/platform.h"

	namespace stream_executor {

	// Adapter class that wraps a Tensorflow allocator.
	//
	// Assumes that the Tensorflow allocator permits asynchronous deallocation:
	// see comment on `AllowsAsynchronousDeallocation()`.
	class TfAllocatorAdapter : public DeviceMemoryAllocator {
	public:
	TfAllocatorAdapter(const Platform platform, tensorflow::Allocator wrapped);
	~TfAllocatorAdapter() override;

	port::StatusOr<OwningDeviceMemory> Allocate(int device_ordinal, uint64 size,
	bool retry_on_failure) override;

	port::Status Deallocate(int device_ordinal, DeviceMemoryBase mem) override;

	// The Tensorflow BFC allocator used on GPU allows host-side deallocation
	// before GPU execution takes place. Tensorflow uses the ordering of the main
	// compute stream to enforce a happens-before relationship between a memory
	// allocation and code that reuses the same memory. If Tensorflow adds
	// support for multiple GPU streams or allocators with different ordering
	// requirements, this code may need to change.
	// (This attribute has no effect on CPU.)
	bool AllowsAsynchronousDeallocation() const override { return true; }

	private:
	tensorflow::Allocator *wrapped_;
	};

	// Adapter class that wraps per-device TF allocators as an XLA allocator.
	// Assumes that the Tensorflow allocator permits asynchronous deallocation;
	// see comment on `AllowsAsynchronousDeallocation()`.
	class MultiDeviceAdapter : public DeviceMemoryAllocator {
	public:
	MultiDeviceAdapter(
	const Platform *platform,
	std::vector<std::unique_ptr<tensorflow::Allocator>> tf_allocators)
	: DeviceMemoryAllocator(platform),
	tf_allocators_(std::move(tf_allocators)) {
	for (const auto &tf_allocator : tf_allocators_) {
	per_device_allocators_.emplace_back(platform, tf_allocator.get());
	}
	}

	port::StatusOr<OwningDeviceMemory> Allocate(int device_ordinal, uint64 size,
	bool retry_on_failure) override {
	CHECK_LT(device_ordinal, per_device_allocators_.size());
	return per_device_allocators_[device_ordinal].Allocate(device_ordinal, size,
	retry_on_failure);
	}

	port::Status Deallocate(int device_ordinal, DeviceMemoryBase mem) override {
	CHECK_LT(device_ordinal, per_device_allocators_.size());
	return per_device_allocators_[device_ordinal].Deallocate(device_ordinal,
	mem);
	}

	// The Tensorflow BFC allocator used on GPU allows host-side deallocation
	// before GPU execution takes place. Tensorflow uses the ordering of the main
	// compute stream to enforce a happens-before relationship between a memory
	// allocation and code that reuses the same memory. If Tensorflow adds
	// support for multiple GPU streams or allocators with different ordering
	// requirements, this code may need to change.
	// (This attribute has no effect on CPU.)
	bool AllowsAsynchronousDeallocation() const override { return true; }

	private:
	std::vector<TfAllocatorAdapter> per_device_allocators_;
	// The wrapped TF allocators backing per_device_allocators_ (XlaAllocator does
	// not take ownership of its underlying Allocator).
	std::vector<std::unique_ptr<tensorflow::Allocator>> tf_allocators_;
	};

	} // namespace stream_executor

	#endif // TENSORFLOW_STREAM_EXECUTOR_TF_ALLOCATOR_ADAPTER_H_