torch/csrc/cuda/comm.h - platform/external/pytorch - Git at Google

 #pragma once

 #include <ATen/ATen.h>
 #include <torch/csrc/WindowsTorchApiMacro.h>
 #include <ATen/cuda/ATenCUDAGeneral.h>
 #include <ATen/cuda/CUDAContext.h>
 #include <c10/util/Optional.h>

 #include <cstddef>
 #include <vector>

 namespace torch { namespace cuda {

 using tensor_list2d = std::vector<std::vector<at::Tensor>>;

 TORCH_CUDA_API std::vector<at::Tensor> broadcast(const at::Tensor& tensor, at::IntArrayRef devices);
 TORCH_CUDA_API tensor_list2d broadcast_coalesced(at::TensorList tensors, at::IntArrayRef devices,
                                   size_t buffer_size);

 TORCH_CUDA_API std::vector<at::Tensor> scatter(
     const at::Tensor& tensor,
     at::IntArrayRef devices,
     const c10::optional<std::vector<int64_t>>& chunk_sizes = c10::nullopt,
     int64_t dim = 0,
     const c10::optional<std::vector<c10::optional<at::cuda::CUDAStream>>>& streams =
         c10::nullopt);

 TORCH_CUDA_API at::Tensor gather(
     at::TensorList tensors,
     int64_t dim,
     c10::optional<int32_t> destination_index);
 }}
	#pragma once

	#include <ATen/ATen.h>
	#include <torch/csrc/WindowsTorchApiMacro.h>
	#include <ATen/cuda/ATenCUDAGeneral.h>
	#include <ATen/cuda/CUDAContext.h>
	#include <c10/util/Optional.h>

	#include <cstddef>
	#include <vector>

	namespace torch { namespace cuda {

	using tensor_list2d = std::vector<std::vector<at::Tensor>>;

	TORCH_CUDA_API std::vector<at::Tensor> broadcast(const at::Tensor& tensor, at::IntArrayRef devices);
	TORCH_CUDA_API tensor_list2d broadcast_coalesced(at::TensorList tensors, at::IntArrayRef devices,
	size_t buffer_size);

	TORCH_CUDA_API std::vector<at::Tensor> scatter(
	const at::Tensor& tensor,
	at::IntArrayRef devices,
	const c10::optional<std::vector<int64_t>>& chunk_sizes = c10::nullopt,
	int64_t dim = 0,
	const c10::optional<std::vector<c10::optional<at::cuda::CUDAStream>>>& streams =
	c10::nullopt);

	TORCH_CUDA_API at::Tensor gather(
	at::TensorList tensors,
	int64_t dim,
	c10::optional<int32_t> destination_index);
	}}