blob: 9d300ede79e900482f7862c1c224c994be353af1 [file] [log] [blame]
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#define EIGEN_USE_GPU
#endif
#include "tensorflow/c/kernels.h"
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/c/c_api.h"
#include "tensorflow/core/framework/attr_value.pb.h"
#include "tensorflow/core/framework/kernel_def.pb.h"
#include "tensorflow/core/framework/node_def.pb_text.h"
#include "tensorflow/core/framework/node_def_builder.h"
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/framework/types.pb.h"
#include "tensorflow/core/kernels/ops_testutil.h"
#include "tensorflow/core/lib/core/status_test_util.h"
#include "tensorflow/core/platform/test.h"
struct MyCustomKernel {
bool created;
bool compute_called;
};
static bool delete_called = false;
static void* MyCreateFunc(TF_OpKernelConstruction* ctx) {
struct MyCustomKernel* s = new struct MyCustomKernel;
s->created = true;
s->compute_called = false;
// Exercise attribute reads.
TF_DataType type;
TF_Status* status = TF_NewStatus();
TF_OpKernelConstruction_GetAttrType(ctx, "SomeDataTypeAttr", &type, status);
EXPECT_EQ(TF_OK, TF_GetCode(status));
EXPECT_EQ(TF_FLOAT, type);
TF_DeleteStatus(status);
return s;
}
static void MyComputeFunc(void* kernel, TF_OpKernelContext* ctx) {
struct MyCustomKernel* s = static_cast<struct MyCustomKernel*>(kernel);
s->compute_called = true;
if (ctx != nullptr) {
EXPECT_EQ(43, TF_StepId(ctx));
}
}
static void MyDeleteFunc(void* kernel) {
struct MyCustomKernel* s = static_cast<struct MyCustomKernel*>(kernel);
EXPECT_TRUE(s->created);
EXPECT_TRUE(s->compute_called);
delete_called = true;
delete s;
}
namespace tensorflow {
static std::unique_ptr<OpKernel> GetFakeKernel(const char* device_name,
const char* op_name,
Status* status) {
NodeDef def;
def.set_op(op_name);
def.set_device(device_name);
def.add_input("input1");
def.add_input("input2");
AttrValue v;
v.set_type(DataType::DT_FLOAT);
(*def.mutable_attr())["SomeDataTypeAttr"] = v;
return CreateOpKernel(DeviceType(device_name), nullptr, nullptr, def, 1,
status);
}
// Tests registration of a single C kernel and checks that calls through the
// C/C++ boundary are being made.
TEST(TestKernel, TestRegisterKernelBuilder) {
const char* kernel_name = "SomeKernelName";
const char* op_name = "FooOp";
const char* device_name = "FakeDeviceName1";
REGISTER_OP(op_name)
.Input("input1: double")
.Input("input2: uint8")
.Output("output1: uint8")
.Attr("SomeDataTypeAttr: type");
TF_KernelBuilder* builder = TF_NewKernelBuilder(
op_name, device_name, &MyCreateFunc, &MyComputeFunc, &MyDeleteFunc);
{
TF_Status* status = TF_NewStatus();
TF_RegisterKernelBuilder(kernel_name, builder, status);
EXPECT_EQ(TF_OK, TF_GetCode(status));
TF_Buffer* buf = TF_GetRegisteredKernelsForOp(op_name, status);
EXPECT_EQ(TF_OK, TF_GetCode(status));
KernelList list;
list.ParseFromArray(buf->data, buf->length);
ASSERT_EQ(1, list.kernel_size());
ASSERT_EQ(device_name, list.kernel(0).device_type());
TF_DeleteBuffer(buf);
TF_DeleteStatus(status);
}
{
Status status;
std::unique_ptr<OpKernel> kernel =
GetFakeKernel(device_name, op_name, &status);
TF_EXPECT_OK(status);
ASSERT_NE(nullptr, kernel.get());
kernel->Compute(nullptr);
}
ASSERT_TRUE(delete_called);
}
class DummyDevice : public DeviceBase {
public:
DummyDevice(Env* env, bool save) : DeviceBase(env), save_(save) {}
bool RequiresRecordingAccessedTensors() const override { return save_; }
Allocator* GetAllocator(AllocatorAttributes /*attr*/) override {
return cpu_allocator();
}
private:
bool save_;
};
TEST(TestKernel, TestInputAndOutputCount) {
const char* kernel_name = "InputOutputCounterKernel";
const char* op_name = "BarOp";
const char* device_name = "FakeDeviceName2";
REGISTER_OP(op_name)
.Input("input1: double")
.Input("input2: uint8")
.Output("output1: uint8")
.Attr("SomeDataTypeAttr: type");
static int num_inputs = 0;
static int num_outputs = 0;
// A kernel whose Compute function has a side-effect of updating num_inputs
// and num_outputs. Various functions on TF_OpKernelContext are also
// exercised.
auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) {
num_inputs = TF_NumInputs(ctx);
num_outputs = TF_NumOutputs(ctx);
TF_Tensor* input = nullptr;
TF_Status* s = TF_NewStatus();
TF_GetInput(ctx, 0, &input, s);
EXPECT_EQ(TF_OK, TF_GetCode(s)) << "Failed to get input: " << TF_Message(s);
EXPECT_EQ(123, *static_cast<tensorflow::uint8*>(TF_TensorData(input)));
TF_GetInput(ctx, -1, &input, s);
EXPECT_EQ(TF_OUT_OF_RANGE, TF_GetCode(s));
TF_GetInput(ctx, 3, &input, s);
EXPECT_EQ(TF_OUT_OF_RANGE, TF_GetCode(s));
// Copy the input tensor to output.
TF_SetOutput(ctx, 0, input, s);
EXPECT_EQ(TF_OK, TF_GetCode(s));
TF_SetOutput(ctx, 24, input, s);
EXPECT_EQ(TF_OUT_OF_RANGE, TF_GetCode(s));
EXPECT_EQ(TF_UINT8, TF_ExpectedOutputDataType(ctx, 0));
TF_DeleteStatus(s);
if (input != nullptr) {
TF_DeleteTensor(input);
}
};
TF_KernelBuilder* builder = TF_NewKernelBuilder(op_name, device_name, nullptr,
my_compute_func, nullptr);
{
TF_Status* status = TF_NewStatus();
TF_RegisterKernelBuilder(kernel_name, builder, status);
EXPECT_EQ(TF_OK, TF_GetCode(status));
TF_DeleteStatus(status);
}
{
OpKernelContext::Params p;
DummyDevice dummy_device(nullptr, false);
p.device = &dummy_device;
p.step_id = 43;
Tensor t(tensorflow::uint8(123));
gtl::InlinedVector<TensorValue, 4> inputs;
// Simulate 2 inputs
inputs.emplace_back(&t);
inputs.emplace_back();
p.inputs = &inputs;
Status status;
std::unique_ptr<OpKernel> kernel =
GetFakeKernel(device_name, op_name, &status);
TF_EXPECT_OK(status);
ASSERT_NE(nullptr, kernel.get());
p.op_kernel = kernel.get();
OpKernelContext ctx(&p);
kernel->Compute(&ctx);
ASSERT_EQ(2, num_inputs);
ASSERT_EQ(1, num_outputs);
ASSERT_EQ(123, ctx.mutable_output(0)->scalar<tensorflow::uint8>()());
}
}
TEST(TestKernel, DeleteKernelBuilderIsOkOnNull) {
TF_DeleteKernelBuilder(nullptr);
}
TEST(TestKernel, TestTypeConstraint) {
const char* kernel_name = "SomeKernelName";
const char* op_name = "TypeOp";
const char* device_name = "FakeDeviceName1";
REGISTER_OP(op_name)
.Input("input1: double")
.Input("input2: uint8")
.Output("output1: uint8")
.Attr("T: type");
TF_KernelBuilder* builder = TF_NewKernelBuilder(
op_name, device_name, &MyCreateFunc, &MyComputeFunc, &MyDeleteFunc);
TF_Status* status = TF_NewStatus();
TF_KernelBuilder_TypeConstraint(builder, "T", TF_DataType::TF_INT32, status);
EXPECT_EQ(TF_OK, TF_GetCode(status));
TF_RegisterKernelBuilder(kernel_name, builder, status);
EXPECT_EQ(TF_OK, TF_GetCode(status));
TF_Buffer* buf = TF_GetRegisteredKernelsForOp(op_name, status);
EXPECT_EQ(TF_OK, TF_GetCode(status));
KernelList list;
list.ParseFromArray(buf->data, buf->length);
const auto expected_str = R"str(kernel {
op: "TypeOp"
device_type: "FakeDeviceName1"
constraint {
name: "T"
allowed_values {
list {
type: DT_INT32
}
}
}
}
)str";
ASSERT_EQ(expected_str, list.DebugString());
TF_DeleteBuffer(buf);
TF_DeleteStatus(status);
TF_DeleteKernelBuilder(builder);
ASSERT_TRUE(delete_called);
}
TEST(TestKernel, TestHostMemory) {
const char* kernel_name = "SomeKernelName";
const char* op_name = "HostMemoryOp";
const char* device_name = "FakeDeviceName1";
REGISTER_OP(op_name)
.Input("input1: double")
.Input("input2: uint8")
.Output("output1: uint8")
.Attr("T: type");
TF_KernelBuilder* builder = TF_NewKernelBuilder(
op_name, device_name, &MyCreateFunc, &MyComputeFunc, &MyDeleteFunc);
TF_KernelBuilder_HostMemory(builder, "input2");
TF_KernelBuilder_HostMemory(builder, "output1");
TF_Status* status = TF_NewStatus();
TF_RegisterKernelBuilder(kernel_name, builder, status);
EXPECT_EQ(TF_OK, TF_GetCode(status));
TF_Buffer* buf = TF_GetRegisteredKernelsForOp(op_name, status);
EXPECT_EQ(TF_OK, TF_GetCode(status));
KernelList list;
list.ParseFromArray(buf->data, buf->length);
const auto expected_str = R"str(kernel {
op: "HostMemoryOp"
device_type: "FakeDeviceName1"
host_memory_arg: "input2"
host_memory_arg: "output1"
}
)str";
ASSERT_EQ(expected_str, list.DebugString());
TF_DeleteBuffer(buf);
TF_DeleteStatus(status);
TF_DeleteKernelBuilder(builder);
ASSERT_TRUE(delete_called);
}
class DeviceKernelOpTest : public OpsTestBase {
protected:
void SetupOp(const char* op_name, const char* kernel_name,
void (*compute_func)(void*, TF_OpKernelContext*)) {
TF_KernelBuilder* builder = TF_NewKernelBuilder(
op_name, device_name_, nullptr, compute_func, nullptr);
TF_Status* status = TF_NewStatus();
TF_RegisterKernelBuilder(kernel_name, builder, status);
EXPECT_EQ(TF_OK, TF_GetCode(status));
TF_DeleteStatus(status);
#if GOOGLE_CUDA
std::unique_ptr<Device> device(
DeviceFactory::NewDevice(device_name_, {}, "/job:a/replica:0/task:0"));
OpsTestBase::SetDevice(DEVICE_GPU, std::move(device));
#endif
TF_ASSERT_OK(NodeDefBuilder(op_name, op_name).Finalize(node_def()));
TF_ASSERT_OK(InitOp());
}
#if GOOGLE_CUDA
const char* device_name_ = tensorflow::DEVICE_GPU;
#else
const char* device_name_ = tensorflow::DEVICE_CPU;
#endif
};
REGISTER_OP("AllocateOutputOp1").Output("output1: float");
TEST_F(DeviceKernelOpTest, TestAllocateOutputSizeOne) {
auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) {
// Allocate output
int64_t dim = 1;
size_t tensor_size_bytes = TF_DataTypeSize(TF_FLOAT);
TF_Tensor* output = TF_AllocateOutput(
/*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/&dim,
/*num_dims=*/1, /*len=*/tensor_size_bytes);
EXPECT_EQ(TF_FLOAT, TF_TensorType(output));
EXPECT_EQ(1, TF_NumDims(output));
EXPECT_EQ(1, TF_Dim(output, 0));
// Set output to 3
float* data = reinterpret_cast<float*>(TF_TensorData(output));
float value = 3.0f;
#if GOOGLE_CUDA
OpKernelContext* cc_ctx = reinterpret_cast<OpKernelContext*>(ctx);
cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, &value,
tensor_size_bytes);
#else
*data = value;
#endif
TF_Status* s = TF_NewStatus();
TF_SetOutput(ctx, 0, output, s);
EXPECT_EQ(TF_OK, TF_GetCode(s));
TF_DeleteStatus(s);
TF_DeleteTensor(output);
};
SetupOp("AllocateOutputOp1", "AllocateOutput1", my_compute_func);
TF_ASSERT_OK(RunOpKernel());
Tensor* output = GetOutput(0);
EXPECT_EQ("Tensor<type: float shape: [1] values: 3>",
output->DebugString(100));
}
REGISTER_OP("AllocateOutputOp0").Output("output1: float");
TEST_F(DeviceKernelOpTest, TestAllocateEmptyOutput) {
auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) {
// Allocate empty output
int64_t dim = 0;
TF_Tensor* output = TF_AllocateOutput(
/*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/&dim,
/*num_dims=*/1, /*len=*/0);
EXPECT_EQ(TF_FLOAT, TF_TensorType(output));
EXPECT_EQ(1, TF_NumDims(output));
EXPECT_EQ(0, TF_Dim(output, 0));
TF_Status* s = TF_NewStatus();
TF_SetOutput(ctx, 0, output, s);
EXPECT_EQ(TF_OK, TF_GetCode(s));
TF_DeleteStatus(s);
TF_DeleteTensor(output);
};
SetupOp("AllocateOutputOp0", "AllocateOutput0", my_compute_func);
TF_ASSERT_OK(RunOpKernel());
Tensor* output = GetOutput(0);
EXPECT_EQ("Tensor<type: float shape: [0] values: >",
output->DebugString(100));
}
REGISTER_OP("AllocateOutputOp2x3").Output("output1: float");
TEST_F(DeviceKernelOpTest, TestAllocateOutputSize2x3) {
auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) {
// Allocate 2x3 output
int64_t dim[2] = {2, 3};
size_t tensor_size_bytes = 6 * TF_DataTypeSize(TF_FLOAT);
TF_Tensor* output = TF_AllocateOutput(
/*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/dim,
/*num_dims=*/2, /*len=*/tensor_size_bytes);
EXPECT_EQ(TF_FLOAT, TF_TensorType(output));
EXPECT_EQ(2, TF_NumDims(output));
EXPECT_EQ(2, TF_Dim(output, 0));
EXPECT_EQ(3, TF_Dim(output, 1));
// Set output to [1 2 3 4 5 6]
void* data = TF_TensorData(output);
float value[6] = {1, 2, 3, 4, 5, 6};
#if GOOGLE_CUDA
OpKernelContext* cc_ctx = reinterpret_cast<OpKernelContext*>(ctx);
cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, value,
tensor_size_bytes);
#else
memcpy(data, value, tensor_size_bytes);
#endif
TF_Status* s = TF_NewStatus();
TF_SetOutput(ctx, 0, output, s);
EXPECT_EQ(TF_OK, TF_GetCode(s));
TF_DeleteStatus(s);
TF_DeleteTensor(output);
};
SetupOp("AllocateOutputOp2x3", "AllocateOutput2x3", my_compute_func);
TF_ASSERT_OK(RunOpKernel());
Tensor* output = GetOutput(0);
EXPECT_EQ("Tensor<type: float shape: [2,3] values: [1 2 3][4 5 6]>",
output->DebugString(100));
}
} // namespace tensorflow