blob: fce3029e4e2457331fe73f3b4751aadbe273baf6 [file] [log] [blame]
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/register_types.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/framework/variant.h"
#include "tensorflow/core/framework/variant_encode_decode.h"
#include "tensorflow/core/lib/gtl/inlined_vector.h"
namespace tensorflow {
namespace {
class DeserializeSparseOp : public OpKernel {
public:
explicit DeserializeSparseOp(OpKernelConstruction* context)
: OpKernel(context) {
OP_REQUIRES_OK(context, context->GetAttr("dtype", &dtype_));
}
void Compute(OpKernelContext* context) override {
const Tensor& input = context->input(0);
OP_REQUIRES(
context, input.dims() > 0,
errors::InvalidArgument("Serialized sparse should have non-zero rank ",
input.shape().DebugString()));
OP_REQUIRES(context, input.shape().dim_size(input.dims() - 1) == 3,
errors::InvalidArgument(
"Serialized sparse should have 3 as the last dimension ",
input.shape().DebugString()));
// `input_dims_to_stack` is the number of dimensions that will be added to
// each of the elements before they are concatenated into the output.
const int64 input_dims_to_stack = input.dims() - 1;
int num_sparse_tensors = 1;
for (int i = 0; i < input_dims_to_stack; ++i) {
num_sparse_tensors *= input.shape().dim_size(i);
}
if (num_sparse_tensors == 1 && input_dims_to_stack == 0) {
// Special case with a single sparse tensor, and no dimensions to add
// to the output indices. We can return the boxed tensors directly (after
// validating them).
const Tensor* output_indices;
const Tensor* output_values;
const Tensor* output_shape;
const auto& input_as_vec = input.vec<Variant>();
int64 total_non_zeros;
OP_REQUIRES_OK(context, GetAndValidateSparseTensorShape(
input_as_vec(1), input_as_vec(2), 0,
&output_shape, &total_non_zeros));
OP_REQUIRES_OK(context, GetAndValidateSparseTensorIndicesAndValues(
input_as_vec(0), input_as_vec(1), 0,
output_shape->NumElements(), &output_indices,
&output_values));
context->set_output(0, *output_indices);
context->set_output(1, *output_values);
context->set_output(2, *output_shape);
return;
}
OP_REQUIRES(
context, num_sparse_tensors > 0,
errors::InvalidArgument(
"Serialized sparse should have at least 1 serialized tensor, "
"but has a zero dimension ",
input.shape().DebugString()));
const auto& input_as_matrix = input.flat_inner_dims<Variant, 2>();
// Compute the output "dense shape" of and number of non-zero elements in
// the stacked sparse tensors. Given an input of shape (S_0, ...,
// S_{input_dims_to_stack-1}, 3), and an element of dense shape (E_0, ...
// E_n), the output dense shape will be (S_0, ...,
// S_{input_dims_to_stack-1}, E_0, ..., E_n).
Tensor* output_shape;
int64 total_non_zeros = 0;
// Allocate and build the initial output shape based on the element shape of
// the 0th sparse tensor in the input.
//
// NOTE(mrry): We define `element_shape` as a `const Tensor*` rather than a
// `Tensor` to avoid the overhead of allocating and deallocating a `Tensor`
// on the stack. While the per-`Tensor` cost is small, this op can unbox a
// large number of tensors (3 per batch element) and these fixed overheads
// dominate when the number of non-zeros per element is small.
const Tensor* element_shape;
OP_REQUIRES_OK(context, GetAndValidateSparseTensorShape(
input_as_matrix(0, 1), input_as_matrix(0, 2), 0,
&element_shape, &total_non_zeros));
OP_REQUIRES_OK(context,
context->allocate_output(
2, {input_dims_to_stack + element_shape->NumElements()},
&output_shape));
const auto element_shape_vec = element_shape->vec<int64>();
auto output_shape_vec = output_shape->vec<int64>();
output_shape_vec(0) = num_sparse_tensors;
for (int64 j = 0; j < input_dims_to_stack; ++j) {
output_shape_vec(j) = input.dim_size(j);
}
for (int64 j = 0; j < element_shape->NumElements(); ++j) {
output_shape_vec(j + input_dims_to_stack) = element_shape_vec(j);
}
// Accumulate the number of non-zero elements from the remaining sparse
// tensors, and validate that they have compatible dense shapes.
//
// NOTE(mrry): For compatibility with the implementations of
// DeserializeManySparse, and many ops that generate SparseTensors to batch
// that do not have a fixed dense_shape (e.g. `tf.parse_single_example()`),
// we compute the maximum in each dimension to find the smallest dense_shape
// that bounds all of the input SparseTensors.
for (int i = 1; i < num_sparse_tensors; ++i) {
int64 num_non_zeros;
OP_REQUIRES_OK(context, GetAndValidateSparseTensorShape(
input_as_matrix(i, 1), input_as_matrix(i, 2),
i, &element_shape, &num_non_zeros));
total_non_zeros += num_non_zeros;
OP_REQUIRES(
context,
output_shape->NumElements() - input_dims_to_stack ==
element_shape->NumElements(),
errors::InvalidArgument(
"Inconsistent shape across SparseTensors: rank prior to "
"SparseTensor[",
i, "] was: ", output_shape->NumElements() - input_dims_to_stack,
" but rank of SparseTensor[", i,
"] is: ", element_shape->NumElements()));
const auto element_shape_vec = element_shape->vec<int64>();
for (int j = 0; j < element_shape->NumElements(); ++j) {
output_shape_vec(j + input_dims_to_stack) = std::max(
output_shape_vec(j + input_dims_to_stack), element_shape_vec(j));
}
}
// Compute the output "indices" matrix and "values" vector.
Tensor* output_indices;
Tensor* output_values;
const int output_rank = output_shape->NumElements();
OP_REQUIRES_OK(context,
context->allocate_output(
0, {static_cast<int64>(total_non_zeros), output_rank},
&output_indices));
OP_REQUIRES_OK(
context, context->allocate_output(
1, {static_cast<int64>(total_non_zeros)}, &output_values));
// The bulk of the work in this method involves building the output indices
// in a tight loop. For cache friendliness, we generate the indices in the
// order that they will be laid out in memory. We use raw pointers instead
// of Eigen element/slice indexing methods, to access the underlying index
// buffer to minimize the amount of work in that tight loop.
int64* output_indices_data = output_indices->matrix<int64>().data();
size_t current_row = 0;
for (int i = 0; i < num_sparse_tensors; ++i) {
const Tensor* element_indices;
const Tensor* element_values;
OP_REQUIRES_OK(context, this->GetAndValidateSparseTensorIndicesAndValues(
input_as_matrix(i, 0), input_as_matrix(i, 1),
i, output_rank - input_dims_to_stack,
&element_indices, &element_values));
const size_t num_index_rows = element_values->NumElements();
// An empty sparse tensor in the input will generate no data
// in the output. We short-circuit the rest of the iteration to avoid
// triggering assertions in the Eigen when manipulating empty tensors (or
// slices of tensors).
if (num_index_rows == 0) continue;
const size_t start_row = current_row;
const size_t next_start_row = current_row + num_index_rows;
// NOTE(mrry): If the element is a scalar SparseTensor,
// `element_indices` will be an empty tensor, and this pointer will not
// be valid. However, we will not dereference the pointer in that case,
// because `input_dims_to_stack == output_rank`.
const int64* element_indices_data =
element_indices->matrix<int64>().data();
// Build the submatrix of `output_indices` for the i^th sparse tensor
// in the input.
//
// Each row of `output_indices` comprises `input_dims_to_stack` indices
// based on the position of the i^th sparse tensor in the input tensor,
// followed by the indices from the corresponding row in
// `element_indices`.
if (input_dims_to_stack == 1 && output_rank == 2) {
// We specialize this case because the compiler can generate
// more efficient code when the number of indices for each element is
// known statically. Since the most common use of this op is to
// serialize batches of SparseTensors, and the most common source of
// SparseTensors is the `tf.parse_single_example()` op, which generates
// 1-D SparseTensors, we statically unroll the loop for the rank 2
// output case.
for (; current_row < next_start_row; ++current_row) {
*output_indices_data++ = i;
*output_indices_data++ = *element_indices_data++;
}
} else {
// `sparse_tensor_index` is the tuple of indices that correspond to
// mapping the flat element index (`i`) back onto the stacked
// coordinates implied by the position of the i^th sparse tensor in the
// input tensor.
//
// We build `sparse_tensor_index` in reverse (innermost/minor dimension
// to outermost/major dimension). The `cumulative_product` represents
// the size of the inner subtensor for which `sparse_tensor_index` has
// already been built.
gtl::InlinedVector<int64, 4> sparse_tensor_index(input_dims_to_stack);
int cumulative_product = 1;
for (size_t j = 0; j < sparse_tensor_index.size(); ++j) {
size_t reverse_index = sparse_tensor_index.size() - j - 1;
sparse_tensor_index[reverse_index] =
(i / cumulative_product) % input.dim_size(reverse_index);
cumulative_product *= input.dim_size(reverse_index);
}
for (; current_row < next_start_row; ++current_row) {
for (int64 sparse_tensor_index_component : sparse_tensor_index) {
*output_indices_data++ = sparse_tensor_index_component;
}
for (size_t k = input_dims_to_stack; k < output_rank; ++k) {
*output_indices_data++ = *element_indices_data++;
}
}
}
// Build the subvector of `output_values` for the i^th sparse tensor
// in the input.
//
// NOTE(mrry): There is a potential optimization here where we use a T*
// to represent the current position in `output_values`, but it would
// require some rejigging of the template parameters.
// NOTE(mrry): Another potential optimization: if we know that this
// operation consumes its input, we could std::move non-primitive elements
// into the output and avoid a copy.
Eigen::DSizes<Eigen::DenseIndex, 1> values_start(start_row);
Eigen::DSizes<Eigen::DenseIndex, 1> values_sizes(num_index_rows);
#define HANDLE_TYPE(T) \
case DataTypeToEnum<T>::value: { \
output_values->vec<T>().slice(values_start, values_sizes) = \
element_values->vec<T>(); \
break; \
}
switch (dtype_) {
TF_CALL_ALL_TYPES(HANDLE_TYPE);
TF_CALL_QUANTIZED_TYPES(HANDLE_TYPE);
#undef HANDLE_TYPE
default:
OP_REQUIRES_OK(
context, errors::Unimplemented(
"DeserializeSparse Unhandled data type: ", dtype_));
}
}
}
private:
Status GetAndValidateSparseTensorShape(const Variant& serialized_values,
const Variant& serialized_shape,
int index, const Tensor** output_shape,
int64* output_num_non_zeros) {
// Deserialize and validate the shape.
*output_shape = serialized_shape.get<Tensor>();
if (*output_shape == nullptr) {
return errors::InvalidArgument(
"Could not get a tensor from serialized_sparse[", index, ", 2]");
}
if ((*output_shape)->dtype() != DT_INT64) {
return errors::InvalidArgument(
"Expected serialized_sparse[", index,
", 2] to be a vector of DT_INT64 but received dtype ",
DataTypeString((*output_shape)->dtype()));
}
if (!TensorShapeUtils::IsVector((*output_shape)->shape())) {
return errors::InvalidArgument(
"Expected serialized_sparse[", index,
", 2] to be a shape vector but its shape is ",
(*output_shape)->shape().DebugString());
}
*output_num_non_zeros = serialized_values.get<Tensor>()->NumElements();
return Status::OK();
}
Status GetAndValidateSparseTensorIndicesAndValues(
const Variant& serialized_indices, const Variant& serialized_values,
int index, int expected_rank, const Tensor** output_indices,
const Tensor** output_values) {
// Deserialize and validate the indices.
*output_indices = serialized_indices.get<Tensor>();
if (*output_indices == nullptr) {
return errors::InvalidArgument(
"Could not get a tensor from serialized_sparse[", index, ", 0]");
}
if ((*output_indices)->dtype() != DT_INT64) {
return errors::InvalidArgument(
"Expected serialized_sparse[", index,
", 0] to be a matrix of DT_INT64 but received dtype ",
DataTypeString((*output_indices)->dtype()));
}
if (!TensorShapeUtils::IsMatrix((*output_indices)->shape())) {
return errors::InvalidArgument(
"Expected serialized_sparse[", index,
", 0] to represent an index matrix but received shape ",
(*output_indices)->shape().DebugString());
}
int64 num_entries = (*output_indices)->dim_size(0);
int rank = (*output_indices)->dim_size(1);
if (rank != expected_rank) {
return errors::InvalidArgument(
"Expected column counts of SparseTensor[", index,
"].indices to match size of SparseTensor[", index,
"].shape but they do not: ", rank, " vs. ", expected_rank);
}
// Deserialize and validate the values.
*output_values = serialized_values.get<Tensor>();
if (*output_values == nullptr) {
return errors::InvalidArgument(
"Could not get a tensor from serialized_sparse[", index, ", 1]");
}
if (!TensorShapeUtils::IsVector((*output_values)->shape())) {
return errors::InvalidArgument(
"Expected serialized_sparse[", index,
", 1] to represent a values vector but received shape ",
(*output_values)->shape().DebugString());
}
if (dtype_ != (*output_values)->dtype()) {
return errors::InvalidArgument(
"Requested SparseTensor of type ", DataTypeString(dtype_),
" but SparseTensor[", index,
"].values.dtype() == ", DataTypeString((*output_values)->dtype()));
}
if (num_entries != (*output_values)->dim_size(0)) {
return errors::InvalidArgument(
"Expected row counts of SparseTensor[", index,
"].indices and SparseTensor[", index,
"].values to match but they do not: ", num_entries, " vs. ",
(*output_values)->dim_size(0));
}
return Status::OK();
}
DataType dtype_;
};
REGISTER_KERNEL_BUILDER(Name("DeserializeSparse")
.Device(DEVICE_CPU)
.TypeConstraint<Variant>("Tserialized"),
DeserializeSparseOp)
} // namespace
} // namespace tensorflow