caffe2/operators/pack_segments.cc - platform/external/pytorch - Git at Google

 #include "caffe2/operators/pack_segments.h"

 namespace caffe2 {

 template <>
 template <typename T>
 bool PackSegmentsOp<CPUContext>::DoRunWithType() {
   return DispatchHelper<
       TensorTypes2<char, int32_t, int64_t, float, std::string>,
       T>::call(this, Input(DATA));
 }

 template <>
 template <typename T, typename Data_T>
 bool PackSegmentsOp<CPUContext>::DoRunWithType2() {
   const auto& data = Input(DATA);
   const auto& lengths = Input(LENGTHS);

   Tensor* presence_mask = nullptr;
   if (return_presence_mask_) {
     presence_mask = Output(1);
   }

   CAFFE_ENFORCE_GE(data.dim(), 1, "DATA should be at least 1-D");
   CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTH should be 1-D");

   // Find the length of the longest sequence.
   const T* l = lengths.template data<T>();
   T max_length = 0;
   int64_t total_length = 0;
   for (T i = 0; i < lengths.size(0); ++i) {
     max_length = std::max(max_length, l[i]);
     total_length += l[i];
   }
   if (max_length_ != -1) {
     max_length = max_length_;
   }

   // Total lengths must be the same as data.dims(0)
   CAFFE_ENFORCE_EQ(
       data.size(0),
       total_length,
       " PackSegments requires that the sum of the lengths ",
       total_length,
       " is equal to the first data dimension ",
       data.size(0));

   auto shape =
       data.sizes().vec(); // Shape of output is batch_size x max_len x ...
   shape[0] = max_length;
   shape.insert(shape.begin(), lengths.numel());
   auto* output = Output(0, shape, at::dtype(data.dtype()));

   // create output tensor
   auto* out = static_cast<char*>(output->raw_mutable_data(data.dtype()));

   bool* presence_mask_data = nullptr;
   if (return_presence_mask_) {
     // Shape of presence is batch_size x max_len
     std::vector<int64_t> presence_shape{lengths.numel(), max_length};
     // NOLINTNEXTLINE(clang-analyzer-core.CallAndMessage)
     presence_mask->Resize(presence_shape);
     presence_mask_data = presence_mask->template mutable_data<bool>();
   }

   if (!data.size(0)) {
     // Return empty output (with the proper shape)
     return true;
   }

   // Do padding
   // Ignore string since math::Set does not support string.
   // For all other cases, the behavior should mimic the GPU version where the
   // padding is always zero for types other than float.
   // TODO(xinyizhang): potentially restructure to clean up the logic here.
   if (output->template IsType<float>()) {
     math::Set<float, CPUContext>(
         output->numel(),
         padding_,
         output->template mutable_data<float>(),
         &context_);
   } else if (output->template IsType<int32_t>()) {
     math::Set<int32_t, CPUContext>(
         output->numel(),
         0,
         output->template mutable_data<int32_t>(),
         &context_);
   } else if (output->template IsType<int64_t>()) {
     math::Set<int64_t, CPUContext>(
         output->numel(),
         0,
         output->template mutable_data<int64_t>(),
         &context_);
   } else if (output->template IsType<char>()) {
     math::Set<char, CPUContext>(
         output->numel(), 0, output->template mutable_data<char>(), &context_);
   }
   if (return_presence_mask_) {
     // NOLINTNEXTLINE(clang-analyzer-core.CallAndMessage)
     memset(presence_mask_data, (int)false, presence_mask->numel());
   }

   auto block_size = data.size_from_dim(1);
   auto block_bytesize = data.itemsize() * block_size;
   const auto* d = static_cast<const char*>(data.raw_data());
   int64_t start = 0;
   for (int64_t i = 0; i < lengths.size(0); ++i) {
     auto len = l[i] <= max_length ? l[i] : max_length;
     context_.CopyItemsSameDevice(
         data.dtype(),
         len * block_size,
         d + block_bytesize * start,
         out + block_bytesize * max_length * i);
     if (return_presence_mask_) {
       // NOLINTNEXTLINE(clang-analyzer-unix.cstring.NullArg)
       memset(presence_mask_data + max_length * i, (int)true, len);
     }
     start += l[i];
   }

   return true;
 }

 template <>
 template <typename T>
 bool UnpackSegmentsOp<CPUContext>::DoRunWithType() {
   return DispatchHelper<
       TensorTypes2<char, int32_t, int64_t, float, std::string>,
       T>::call(this, Input(DATA));
 }

 template <>
 template <typename T, typename Data_T>
 bool UnpackSegmentsOp<CPUContext>::DoRunWithType2() {
   const auto& data = Input(DATA);
   const auto& lengths = Input(LENGTHS);
   auto* output = Output(0);

   CAFFE_ENFORCE_GE(data.dim(), 2, "DATA should be at least 2-D");
   CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTH should be 1-D");
   if (max_length_ != -1) {
     CAFFE_ENFORCE_EQ(
         max_length_,
         data.size(1),
         "max_length should be equal to the second dimension of the packed segments");
   }
   const T* l = lengths.template data<T>();

   int64_t total_l = 0;
   if (max_length_ != -1) {
     for (int64_t i = 0; i < lengths.size(0); ++i) {
       total_l += (int64_t)(l[i] <= max_length_ ? l[i] : max_length_);
     }
   } else {
     total_l = std::accumulate(l, l + lengths.size(0), (int64_t)0);
   }

   auto shape = data.sizes().vec();
   CAFFE_ENFORCE_EQ(
       shape[0], lengths.size(0), "LENGTH should match DATA in dimension 0");
   shape.erase(shape.begin());
   shape[0] = total_l;
   output->Resize(shape);
   // create output tensor
   auto* out = static_cast<char*>(output->raw_mutable_data(data.dtype()));
   if (!(data.size(0) && data.size(1))) {
     return true;
   }
   auto block_size = data.size_from_dim(2);
   auto block_bytesize = data.itemsize() * block_size;
   const auto* d = static_cast<const char*>(data.raw_data());
   int64_t start = 0;
   for (int64_t i = 0; i < lengths.size(0); ++i) {
     auto len = l[i];
     if (max_length_ != -1 && l[i] > max_length_) {
       len = max_length_;
     }
     context_.CopyItemsSameDevice(
         data.dtype(),
         len * block_size,
         d + block_bytesize * data.size(1) * i,
         out + block_bytesize * start);
     start += len;
   }
   return true;
 }

 REGISTER_CPU_OPERATOR(PackSegments, PackSegmentsOp<CPUContext>);
 REGISTER_CPU_OPERATOR(UnpackSegments, UnpackSegmentsOp<CPUContext>);

 OPERATOR_SCHEMA(PackSegments)
     .NumInputs(2)
     .NumOutputs(1, 2)
     .SetDoc(
         "Map N dim tensor to N+1 dim based on length blob. Sequences that \
     are shorter than the longest sequence are padded with zeros.")
     .Input(
         0,
         "lengths",
         "1-d int/long tensor contains the length in each of the output.")
     .Input(1, "tensor", "N dim Tensor.")
     .Output(
         0,
         "packed_tensor",
         "N + 1 dim Tensor"
         "where dim(1) is the max length"
         ", dim(0) is the batch size.")
     .Output(
         1,
         "presence_mask",
         "2 dim boolean tensor"
         ", false where packed_tensor is padded, true otherwise.")
     .Arg("max_length", "The pre-defined max_length for the packed segments")
     .Arg(
         "pad_minf",
         "Padding number in the packed segments. Use true to pad \
     -infinity, otherwise pad zeros")
     .Arg(
         "return_presence_mask",
         "bool whether to return presence mask, false by default");
 OPERATOR_SCHEMA(UnpackSegments)
     .NumInputs(2)
     .NumOutputs(1)
     .SetDoc("Map N+1 dim tensor to N dim based on length blob")
     .Input(
         0,
         "lengths",
         "1-d int/long tensor contains the length in each of the input.")
     .Input(1, "tensor", "N+1 dim Tensor.")
     .Output(0, "packed_tensor", "N dim Tensor")
     .Arg("max_length", "The pre-defined max_length for the packed segments");

 class GetPackSegmentsGradient : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;
   vector<OperatorDef> GetGradientDefs() override {
     return SingleGradientDef(
         "UnpackSegments",
         "",
         vector<string>{I(0), GO(0)},
         vector<string>{GI(1)});
   }
 };
 REGISTER_GRADIENT(PackSegments, GetPackSegmentsGradient);

 class GetUnpackSegmentsGradient : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;
   vector<OperatorDef> GetGradientDefs() override {
     return SingleGradientDef(
         "PackSegments", "", vector<string>{I(0), GO(0)}, vector<string>{GI(1)});
   }
 };
 REGISTER_GRADIENT(UnpackSegments, GetUnpackSegmentsGradient);
 } // namespace caffe2

 C10_EXPORT_CAFFE2_OP_TO_C10_CPU(
   PackSegments,
   "_caffe2::PackSegments("
     "Tensor lengths, "
     "Tensor tensor, "
     "int max_length = -1, "
     "bool pad_minf = False, "
     "bool return_presence_mask = False"
   ") -> (Tensor packed_tensor, Tensor presence_mask)",
   caffe2::PackSegmentsOp<caffe2::CPUContext>);

 C10_EXPORT_CAFFE2_OP_TO_C10_CPU(
   UnpackSegments,
   "_caffe2::UnpackSegments("
     "Tensor lengths, "
     "Tensor tensor, "
     "int max_length = -1"
   ") -> (Tensor packed_tensor)",
   caffe2::UnpackSegmentsOp<caffe2::CPUContext>);
	#include "caffe2/operators/pack_segments.h"

	namespace caffe2 {

	template <>
	template <typename T>
	bool PackSegmentsOp<CPUContext>::DoRunWithType() {
	return DispatchHelper<
	TensorTypes2<char, int32_t, int64_t, float, std::string>,
	T>::call(this, Input(DATA));
	}

	template <>
	template <typename T, typename Data_T>
	bool PackSegmentsOp<CPUContext>::DoRunWithType2() {
	const auto& data = Input(DATA);
	const auto& lengths = Input(LENGTHS);

	Tensor* presence_mask = nullptr;
	if (return_presence_mask_) {
	presence_mask = Output(1);
	}

	CAFFE_ENFORCE_GE(data.dim(), 1, "DATA should be at least 1-D");
	CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTH should be 1-D");

	// Find the length of the longest sequence.
	const T* l = lengths.template data<T>();
	T max_length = 0;
	int64_t total_length = 0;
	for (T i = 0; i < lengths.size(0); ++i) {
	max_length = std::max(max_length, l[i]);
	total_length += l[i];
	}
	if (max_length_ != -1) {
	max_length = max_length_;
	}

	// Total lengths must be the same as data.dims(0)
	CAFFE_ENFORCE_EQ(
	data.size(0),
	total_length,
	" PackSegments requires that the sum of the lengths ",
	total_length,
	" is equal to the first data dimension ",
	data.size(0));

	auto shape =
	data.sizes().vec(); // Shape of output is batch_size x max_len x ...
	shape[0] = max_length;
	shape.insert(shape.begin(), lengths.numel());
	auto* output = Output(0, shape, at::dtype(data.dtype()));

	// create output tensor
	auto* out = static_cast<char*>(output->raw_mutable_data(data.dtype()));

	bool* presence_mask_data = nullptr;
	if (return_presence_mask_) {
	// Shape of presence is batch_size x max_len
	std::vector<int64_t> presence_shape{lengths.numel(), max_length};
	// NOLINTNEXTLINE(clang-analyzer-core.CallAndMessage)
	presence_mask->Resize(presence_shape);
	presence_mask_data = presence_mask->template mutable_data<bool>();
	}

	if (!data.size(0)) {
	// Return empty output (with the proper shape)
	return true;
	}

	// Do padding
	// Ignore string since math::Set does not support string.
	// For all other cases, the behavior should mimic the GPU version where the
	// padding is always zero for types other than float.
	// TODO(xinyizhang): potentially restructure to clean up the logic here.
	if (output->template IsType<float>()) {
	math::Set<float, CPUContext>(
	output->numel(),
	padding_,
	output->template mutable_data<float>(),
	&context_);
	} else if (output->template IsType<int32_t>()) {
	math::Set<int32_t, CPUContext>(
	output->numel(),
	0,
	output->template mutable_data<int32_t>(),
	&context_);
	} else if (output->template IsType<int64_t>()) {
	math::Set<int64_t, CPUContext>(
	output->numel(),
	0,
	output->template mutable_data<int64_t>(),
	&context_);
	} else if (output->template IsType<char>()) {
	math::Set<char, CPUContext>(
	output->numel(), 0, output->template mutable_data<char>(), &context_);
	}
	if (return_presence_mask_) {
	// NOLINTNEXTLINE(clang-analyzer-core.CallAndMessage)
	memset(presence_mask_data, (int)false, presence_mask->numel());
	}

	auto block_size = data.size_from_dim(1);
	auto block_bytesize = data.itemsize() * block_size;
	const auto* d = static_cast<const char*>(data.raw_data());
	int64_t start = 0;
	for (int64_t i = 0; i < lengths.size(0); ++i) {
	auto len = l[i] <= max_length ? l[i] : max_length;
	context_.CopyItemsSameDevice(
	data.dtype(),
	len * block_size,
	d + block_bytesize * start,
	out + block_bytesize * max_length * i);
	if (return_presence_mask_) {
	// NOLINTNEXTLINE(clang-analyzer-unix.cstring.NullArg)
	memset(presence_mask_data + max_length * i, (int)true, len);
	}
	start += l[i];
	}

	return true;
	}

	template <>
	template <typename T>
	bool UnpackSegmentsOp<CPUContext>::DoRunWithType() {
	return DispatchHelper<
	TensorTypes2<char, int32_t, int64_t, float, std::string>,
	T>::call(this, Input(DATA));
	}

	template <>
	template <typename T, typename Data_T>
	bool UnpackSegmentsOp<CPUContext>::DoRunWithType2() {
	const auto& data = Input(DATA);
	const auto& lengths = Input(LENGTHS);
	auto* output = Output(0);

	CAFFE_ENFORCE_GE(data.dim(), 2, "DATA should be at least 2-D");
	CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTH should be 1-D");
	if (max_length_ != -1) {
	CAFFE_ENFORCE_EQ(
	max_length_,
	data.size(1),
	"max_length should be equal to the second dimension of the packed segments");
	}
	const T* l = lengths.template data<T>();

	int64_t total_l = 0;
	if (max_length_ != -1) {
	for (int64_t i = 0; i < lengths.size(0); ++i) {
	total_l += (int64_t)(l[i] <= max_length_ ? l[i] : max_length_);
	}
	} else {
	total_l = std::accumulate(l, l + lengths.size(0), (int64_t)0);
	}

	auto shape = data.sizes().vec();
	CAFFE_ENFORCE_EQ(
	shape[0], lengths.size(0), "LENGTH should match DATA in dimension 0");
	shape.erase(shape.begin());
	shape[0] = total_l;
	output->Resize(shape);
	// create output tensor
	auto* out = static_cast<char*>(output->raw_mutable_data(data.dtype()));
	if (!(data.size(0) && data.size(1))) {
	return true;
	}
	auto block_size = data.size_from_dim(2);
	auto block_bytesize = data.itemsize() * block_size;
	const auto* d = static_cast<const char*>(data.raw_data());
	int64_t start = 0;
	for (int64_t i = 0; i < lengths.size(0); ++i) {
	auto len = l[i];
	if (max_length_ != -1 && l[i] > max_length_) {
	len = max_length_;
	}
	context_.CopyItemsSameDevice(
	data.dtype(),
	len * block_size,
	d + block_bytesize * data.size(1) * i,
	out + block_bytesize * start);
	start += len;
	}
	return true;
	}

	REGISTER_CPU_OPERATOR(PackSegments, PackSegmentsOp<CPUContext>);
	REGISTER_CPU_OPERATOR(UnpackSegments, UnpackSegmentsOp<CPUContext>);

	OPERATOR_SCHEMA(PackSegments)
	.NumInputs(2)
	.NumOutputs(1, 2)
	.SetDoc(
	"Map N dim tensor to N+1 dim based on length blob. Sequences that \
	are shorter than the longest sequence are padded with zeros.")
	.Input(
	0,
	"lengths",
	"1-d int/long tensor contains the length in each of the output.")
	.Input(1, "tensor", "N dim Tensor.")
	.Output(
	0,
	"packed_tensor",
	"N + 1 dim Tensor"
	"where dim(1) is the max length"
	", dim(0) is the batch size.")
	.Output(
	1,
	"presence_mask",
	"2 dim boolean tensor"
	", false where packed_tensor is padded, true otherwise.")
	.Arg("max_length", "The pre-defined max_length for the packed segments")
	.Arg(
	"pad_minf",
	"Padding number in the packed segments. Use true to pad \
	-infinity, otherwise pad zeros")
	.Arg(
	"return_presence_mask",
	"bool whether to return presence mask, false by default");
	OPERATOR_SCHEMA(UnpackSegments)
	.NumInputs(2)
	.NumOutputs(1)
	.SetDoc("Map N+1 dim tensor to N dim based on length blob")
	.Input(
	0,
	"lengths",
	"1-d int/long tensor contains the length in each of the input.")
	.Input(1, "tensor", "N+1 dim Tensor.")
	.Output(0, "packed_tensor", "N dim Tensor")
	.Arg("max_length", "The pre-defined max_length for the packed segments");

	class GetPackSegmentsGradient : public GradientMakerBase {
	using GradientMakerBase::GradientMakerBase;
	vector<OperatorDef> GetGradientDefs() override {
	return SingleGradientDef(
	"UnpackSegments",
	"",
	vector<string>{I(0), GO(0)},
	vector<string>{GI(1)});
	}
	};
	REGISTER_GRADIENT(PackSegments, GetPackSegmentsGradient);

	class GetUnpackSegmentsGradient : public GradientMakerBase {
	using GradientMakerBase::GradientMakerBase;
	vector<OperatorDef> GetGradientDefs() override {
	return SingleGradientDef(
	"PackSegments", "", vector<string>{I(0), GO(0)}, vector<string>{GI(1)});
	}
	};
	REGISTER_GRADIENT(UnpackSegments, GetUnpackSegmentsGradient);
	} // namespace caffe2

	C10_EXPORT_CAFFE2_OP_TO_C10_CPU(
	PackSegments,
	"_caffe2::PackSegments("
	"Tensor lengths, "
	"Tensor tensor, "
	"int max_length = -1, "
	"bool pad_minf = False, "
	"bool return_presence_mask = False"
	") -> (Tensor packed_tensor, Tensor presence_mask)",
	caffe2::PackSegmentsOp<caffe2::CPUContext>);

	C10_EXPORT_CAFFE2_OP_TO_C10_CPU(
	UnpackSegments,
	"_caffe2::UnpackSegments("
	"Tensor lengths, "
	"Tensor tensor, "
	"int max_length = -1"
	") -> (Tensor packed_tensor)",
	caffe2::UnpackSegmentsOp<caffe2::CPUContext>);