| /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==============================================================================*/ |
| #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM |
| #define EIGEN_USE_GPU |
| #endif |
| |
| #include "tensorflow/c/kernels.h" |
| |
| #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" |
| #include "tensorflow/c/c_api.h" |
| #include "tensorflow/core/framework/attr_value.pb.h" |
| #include "tensorflow/core/framework/kernel_def.pb.h" |
| #include "tensorflow/core/framework/node_def.pb_text.h" |
| #include "tensorflow/core/framework/node_def_builder.h" |
| #include "tensorflow/core/framework/op.h" |
| #include "tensorflow/core/framework/op_kernel.h" |
| #include "tensorflow/core/framework/types.h" |
| #include "tensorflow/core/framework/types.pb.h" |
| #include "tensorflow/core/kernels/ops_testutil.h" |
| #include "tensorflow/core/lib/core/status_test_util.h" |
| #include "tensorflow/core/platform/test.h" |
| |
| struct MyCustomKernel { |
| bool created; |
| bool compute_called; |
| }; |
| |
| static bool delete_called = false; |
| |
| static void* MyCreateFunc(TF_OpKernelConstruction* ctx) { |
| struct MyCustomKernel* s = new struct MyCustomKernel; |
| s->created = true; |
| s->compute_called = false; |
| |
| // Exercise attribute reads. |
| TF_DataType type; |
| TF_Status* status = TF_NewStatus(); |
| TF_OpKernelConstruction_GetAttrType(ctx, "SomeDataTypeAttr", &type, status); |
| EXPECT_EQ(TF_OK, TF_GetCode(status)); |
| EXPECT_EQ(TF_FLOAT, type); |
| TF_DeleteStatus(status); |
| |
| return s; |
| } |
| |
| static void MyComputeFunc(void* kernel, TF_OpKernelContext* ctx) { |
| struct MyCustomKernel* s = static_cast<struct MyCustomKernel*>(kernel); |
| s->compute_called = true; |
| if (ctx != nullptr) { |
| EXPECT_EQ(43, TF_StepId(ctx)); |
| } |
| } |
| |
| static void MyDeleteFunc(void* kernel) { |
| struct MyCustomKernel* s = static_cast<struct MyCustomKernel*>(kernel); |
| EXPECT_TRUE(s->created); |
| EXPECT_TRUE(s->compute_called); |
| delete_called = true; |
| delete s; |
| } |
| |
| namespace tensorflow { |
| |
| static std::unique_ptr<OpKernel> GetFakeKernel(const char* device_name, |
| const char* op_name, |
| Status* status) { |
| NodeDef def; |
| def.set_op(op_name); |
| def.set_device(device_name); |
| def.add_input("input1"); |
| def.add_input("input2"); |
| |
| AttrValue v; |
| v.set_type(DataType::DT_FLOAT); |
| (*def.mutable_attr())["SomeDataTypeAttr"] = v; |
| |
| return CreateOpKernel(DeviceType(device_name), nullptr, nullptr, def, 1, |
| status); |
| } |
| |
| // Tests registration of a single C kernel and checks that calls through the |
| // C/C++ boundary are being made. |
| TEST(TestKernel, TestRegisterKernelBuilder) { |
| const char* kernel_name = "SomeKernelName"; |
| const char* op_name = "FooOp"; |
| const char* device_name = "FakeDeviceName1"; |
| |
| REGISTER_OP(op_name) |
| .Input("input1: double") |
| .Input("input2: uint8") |
| .Output("output1: uint8") |
| .Attr("SomeDataTypeAttr: type"); |
| |
| TF_KernelBuilder* builder = TF_NewKernelBuilder( |
| op_name, device_name, &MyCreateFunc, &MyComputeFunc, &MyDeleteFunc); |
| |
| { |
| TF_Status* status = TF_NewStatus(); |
| TF_RegisterKernelBuilder(kernel_name, builder, status); |
| EXPECT_EQ(TF_OK, TF_GetCode(status)); |
| TF_Buffer* buf = TF_GetRegisteredKernelsForOp(op_name, status); |
| EXPECT_EQ(TF_OK, TF_GetCode(status)); |
| KernelList list; |
| list.ParseFromArray(buf->data, buf->length); |
| ASSERT_EQ(1, list.kernel_size()); |
| ASSERT_EQ(device_name, list.kernel(0).device_type()); |
| TF_DeleteBuffer(buf); |
| TF_DeleteStatus(status); |
| } |
| |
| { |
| Status status; |
| std::unique_ptr<OpKernel> kernel = |
| GetFakeKernel(device_name, op_name, &status); |
| TF_EXPECT_OK(status); |
| ASSERT_NE(nullptr, kernel.get()); |
| kernel->Compute(nullptr); |
| } |
| |
| ASSERT_TRUE(delete_called); |
| } |
| |
| class DummyDevice : public DeviceBase { |
| public: |
| DummyDevice(Env* env, bool save) : DeviceBase(env), save_(save) {} |
| bool RequiresRecordingAccessedTensors() const override { return save_; } |
| Allocator* GetAllocator(AllocatorAttributes /*attr*/) override { |
| return cpu_allocator(); |
| } |
| |
| private: |
| bool save_; |
| }; |
| |
| TEST(TestKernel, TestInputAndOutputCount) { |
| const char* kernel_name = "InputOutputCounterKernel"; |
| const char* op_name = "BarOp"; |
| const char* device_name = "FakeDeviceName2"; |
| |
| REGISTER_OP(op_name) |
| .Input("input1: double") |
| .Input("input2: uint8") |
| .Output("output1: uint8") |
| .Attr("SomeDataTypeAttr: type"); |
| |
| static int num_inputs = 0; |
| static int num_outputs = 0; |
| |
| // A kernel whose Compute function has a side-effect of updating num_inputs |
| // and num_outputs. Various functions on TF_OpKernelContext are also |
| // exercised. |
| auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) { |
| num_inputs = TF_NumInputs(ctx); |
| num_outputs = TF_NumOutputs(ctx); |
| |
| TF_Tensor* input = nullptr; |
| TF_Status* s = TF_NewStatus(); |
| TF_GetInput(ctx, 0, &input, s); |
| EXPECT_EQ(TF_OK, TF_GetCode(s)) << "Failed to get input: " << TF_Message(s); |
| EXPECT_EQ(123, *static_cast<tensorflow::uint8*>(TF_TensorData(input))); |
| TF_GetInput(ctx, -1, &input, s); |
| EXPECT_EQ(TF_OUT_OF_RANGE, TF_GetCode(s)); |
| TF_GetInput(ctx, 3, &input, s); |
| EXPECT_EQ(TF_OUT_OF_RANGE, TF_GetCode(s)); |
| |
| // Copy the input tensor to output. |
| TF_SetOutput(ctx, 0, input, s); |
| EXPECT_EQ(TF_OK, TF_GetCode(s)); |
| |
| TF_SetOutput(ctx, 24, input, s); |
| EXPECT_EQ(TF_OUT_OF_RANGE, TF_GetCode(s)); |
| |
| EXPECT_EQ(TF_UINT8, TF_ExpectedOutputDataType(ctx, 0)); |
| |
| TF_DeleteStatus(s); |
| if (input != nullptr) { |
| TF_DeleteTensor(input); |
| } |
| }; |
| |
| TF_KernelBuilder* builder = TF_NewKernelBuilder(op_name, device_name, nullptr, |
| my_compute_func, nullptr); |
| |
| { |
| TF_Status* status = TF_NewStatus(); |
| TF_RegisterKernelBuilder(kernel_name, builder, status); |
| EXPECT_EQ(TF_OK, TF_GetCode(status)); |
| TF_DeleteStatus(status); |
| } |
| |
| { |
| OpKernelContext::Params p; |
| DummyDevice dummy_device(nullptr, false); |
| p.device = &dummy_device; |
| p.step_id = 43; |
| |
| Tensor t(tensorflow::uint8(123)); |
| |
| gtl::InlinedVector<TensorValue, 4> inputs; |
| // Simulate 2 inputs |
| inputs.emplace_back(&t); |
| inputs.emplace_back(); |
| p.inputs = &inputs; |
| |
| Status status; |
| std::unique_ptr<OpKernel> kernel = |
| GetFakeKernel(device_name, op_name, &status); |
| TF_EXPECT_OK(status); |
| ASSERT_NE(nullptr, kernel.get()); |
| |
| p.op_kernel = kernel.get(); |
| OpKernelContext ctx(&p); |
| kernel->Compute(&ctx); |
| |
| ASSERT_EQ(2, num_inputs); |
| ASSERT_EQ(1, num_outputs); |
| ASSERT_EQ(123, ctx.mutable_output(0)->scalar<tensorflow::uint8>()()); |
| } |
| } |
| |
| TEST(TestKernel, DeleteKernelBuilderIsOkOnNull) { |
| TF_DeleteKernelBuilder(nullptr); |
| } |
| |
| TEST(TestKernel, TestTypeConstraint) { |
| const char* kernel_name = "SomeKernelName"; |
| const char* op_name = "TypeOp"; |
| const char* device_name = "FakeDeviceName1"; |
| |
| REGISTER_OP(op_name) |
| .Input("input1: double") |
| .Input("input2: uint8") |
| .Output("output1: uint8") |
| .Attr("T: type"); |
| |
| TF_KernelBuilder* builder = TF_NewKernelBuilder( |
| op_name, device_name, &MyCreateFunc, &MyComputeFunc, &MyDeleteFunc); |
| TF_Status* status = TF_NewStatus(); |
| TF_KernelBuilder_TypeConstraint(builder, "T", TF_DataType::TF_INT32, status); |
| EXPECT_EQ(TF_OK, TF_GetCode(status)); |
| TF_RegisterKernelBuilder(kernel_name, builder, status); |
| EXPECT_EQ(TF_OK, TF_GetCode(status)); |
| |
| TF_Buffer* buf = TF_GetRegisteredKernelsForOp(op_name, status); |
| EXPECT_EQ(TF_OK, TF_GetCode(status)); |
| KernelList list; |
| list.ParseFromArray(buf->data, buf->length); |
| const auto expected_str = R"str(kernel { |
| op: "TypeOp" |
| device_type: "FakeDeviceName1" |
| constraint { |
| name: "T" |
| allowed_values { |
| list { |
| type: DT_INT32 |
| } |
| } |
| } |
| } |
| )str"; |
| ASSERT_EQ(expected_str, list.DebugString()); |
| |
| TF_DeleteBuffer(buf); |
| TF_DeleteStatus(status); |
| TF_DeleteKernelBuilder(builder); |
| ASSERT_TRUE(delete_called); |
| } |
| |
| TEST(TestKernel, TestHostMemory) { |
| const char* kernel_name = "SomeKernelName"; |
| const char* op_name = "HostMemoryOp"; |
| const char* device_name = "FakeDeviceName1"; |
| |
| REGISTER_OP(op_name) |
| .Input("input1: double") |
| .Input("input2: uint8") |
| .Output("output1: uint8") |
| .Attr("T: type"); |
| |
| TF_KernelBuilder* builder = TF_NewKernelBuilder( |
| op_name, device_name, &MyCreateFunc, &MyComputeFunc, &MyDeleteFunc); |
| TF_KernelBuilder_HostMemory(builder, "input2"); |
| TF_KernelBuilder_HostMemory(builder, "output1"); |
| TF_Status* status = TF_NewStatus(); |
| TF_RegisterKernelBuilder(kernel_name, builder, status); |
| EXPECT_EQ(TF_OK, TF_GetCode(status)); |
| |
| TF_Buffer* buf = TF_GetRegisteredKernelsForOp(op_name, status); |
| EXPECT_EQ(TF_OK, TF_GetCode(status)); |
| KernelList list; |
| list.ParseFromArray(buf->data, buf->length); |
| const auto expected_str = R"str(kernel { |
| op: "HostMemoryOp" |
| device_type: "FakeDeviceName1" |
| host_memory_arg: "input2" |
| host_memory_arg: "output1" |
| } |
| )str"; |
| ASSERT_EQ(expected_str, list.DebugString()); |
| |
| TF_DeleteBuffer(buf); |
| TF_DeleteStatus(status); |
| TF_DeleteKernelBuilder(builder); |
| ASSERT_TRUE(delete_called); |
| } |
| |
| class DeviceKernelOpTest : public OpsTestBase { |
| protected: |
| void SetupOp(const char* op_name, const char* kernel_name, |
| void (*compute_func)(void*, TF_OpKernelContext*)) { |
| TF_KernelBuilder* builder = TF_NewKernelBuilder( |
| op_name, device_name_, nullptr, compute_func, nullptr); |
| TF_Status* status = TF_NewStatus(); |
| TF_RegisterKernelBuilder(kernel_name, builder, status); |
| EXPECT_EQ(TF_OK, TF_GetCode(status)); |
| TF_DeleteStatus(status); |
| |
| #if GOOGLE_CUDA |
| std::unique_ptr<Device> device( |
| DeviceFactory::NewDevice(device_name_, {}, "/job:a/replica:0/task:0")); |
| OpsTestBase::SetDevice(DEVICE_GPU, std::move(device)); |
| #endif |
| TF_ASSERT_OK(NodeDefBuilder(op_name, op_name).Finalize(node_def())); |
| TF_ASSERT_OK(InitOp()); |
| } |
| |
| #if GOOGLE_CUDA |
| const char* device_name_ = tensorflow::DEVICE_GPU; |
| #else |
| const char* device_name_ = tensorflow::DEVICE_CPU; |
| #endif |
| }; |
| |
| REGISTER_OP("AllocateOutputOp1").Output("output1: float"); |
| |
| TEST_F(DeviceKernelOpTest, TestAllocateOutputSizeOne) { |
| auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) { |
| // Allocate output |
| int64_t dim = 1; |
| size_t tensor_size_bytes = TF_DataTypeSize(TF_FLOAT); |
| TF_Tensor* output = TF_AllocateOutput( |
| /*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/&dim, |
| /*num_dims=*/1, /*len=*/tensor_size_bytes); |
| EXPECT_EQ(TF_FLOAT, TF_TensorType(output)); |
| EXPECT_EQ(1, TF_NumDims(output)); |
| EXPECT_EQ(1, TF_Dim(output, 0)); |
| |
| // Set output to 3 |
| float* data = reinterpret_cast<float*>(TF_TensorData(output)); |
| float value = 3.0f; |
| #if GOOGLE_CUDA |
| OpKernelContext* cc_ctx = reinterpret_cast<OpKernelContext*>(ctx); |
| cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, &value, |
| tensor_size_bytes); |
| #else |
| *data = value; |
| #endif |
| |
| TF_Status* s = TF_NewStatus(); |
| TF_SetOutput(ctx, 0, output, s); |
| EXPECT_EQ(TF_OK, TF_GetCode(s)); |
| |
| TF_DeleteStatus(s); |
| TF_DeleteTensor(output); |
| }; |
| |
| SetupOp("AllocateOutputOp1", "AllocateOutput1", my_compute_func); |
| |
| TF_ASSERT_OK(RunOpKernel()); |
| Tensor* output = GetOutput(0); |
| EXPECT_EQ("Tensor<type: float shape: [1] values: 3>", |
| output->DebugString(100)); |
| } |
| |
| REGISTER_OP("AllocateOutputOp0").Output("output1: float"); |
| |
| TEST_F(DeviceKernelOpTest, TestAllocateEmptyOutput) { |
| auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) { |
| // Allocate empty output |
| int64_t dim = 0; |
| TF_Tensor* output = TF_AllocateOutput( |
| /*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/&dim, |
| /*num_dims=*/1, /*len=*/0); |
| |
| EXPECT_EQ(TF_FLOAT, TF_TensorType(output)); |
| EXPECT_EQ(1, TF_NumDims(output)); |
| EXPECT_EQ(0, TF_Dim(output, 0)); |
| |
| TF_Status* s = TF_NewStatus(); |
| TF_SetOutput(ctx, 0, output, s); |
| EXPECT_EQ(TF_OK, TF_GetCode(s)); |
| |
| TF_DeleteStatus(s); |
| TF_DeleteTensor(output); |
| }; |
| |
| SetupOp("AllocateOutputOp0", "AllocateOutput0", my_compute_func); |
| |
| TF_ASSERT_OK(RunOpKernel()); |
| Tensor* output = GetOutput(0); |
| EXPECT_EQ("Tensor<type: float shape: [0] values: >", |
| output->DebugString(100)); |
| } |
| |
| REGISTER_OP("AllocateOutputOp2x3").Output("output1: float"); |
| |
| TEST_F(DeviceKernelOpTest, TestAllocateOutputSize2x3) { |
| auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) { |
| // Allocate 2x3 output |
| int64_t dim[2] = {2, 3}; |
| size_t tensor_size_bytes = 6 * TF_DataTypeSize(TF_FLOAT); |
| TF_Tensor* output = TF_AllocateOutput( |
| /*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/dim, |
| /*num_dims=*/2, /*len=*/tensor_size_bytes); |
| EXPECT_EQ(TF_FLOAT, TF_TensorType(output)); |
| EXPECT_EQ(2, TF_NumDims(output)); |
| EXPECT_EQ(2, TF_Dim(output, 0)); |
| EXPECT_EQ(3, TF_Dim(output, 1)); |
| |
| // Set output to [1 2 3 4 5 6] |
| void* data = TF_TensorData(output); |
| float value[6] = {1, 2, 3, 4, 5, 6}; |
| #if GOOGLE_CUDA |
| OpKernelContext* cc_ctx = reinterpret_cast<OpKernelContext*>(ctx); |
| cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, value, |
| tensor_size_bytes); |
| #else |
| memcpy(data, value, tensor_size_bytes); |
| #endif |
| |
| TF_Status* s = TF_NewStatus(); |
| TF_SetOutput(ctx, 0, output, s); |
| EXPECT_EQ(TF_OK, TF_GetCode(s)); |
| |
| TF_DeleteStatus(s); |
| TF_DeleteTensor(output); |
| }; |
| |
| SetupOp("AllocateOutputOp2x3", "AllocateOutput2x3", my_compute_func); |
| |
| TF_ASSERT_OK(RunOpKernel()); |
| Tensor* output = GetOutput(0); |
| EXPECT_EQ("Tensor<type: float shape: [2,3] values: [1 2 3][4 5 6]>", |
| output->DebugString(100)); |
| } |
| } // namespace tensorflow |