| /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==============================================================================*/ |
| #ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_KERNELS_C_API_H_ |
| #define TENSORFLOW_CORE_TPU_KERNELS_TPU_KERNELS_C_API_H_ |
| |
| #include <stddef.h> |
| |
| #include <cstdint> |
| |
| #include "absl/types/optional.h" |
| #include "tensorflow/core/tpu/libtftpu.h" |
| #include "tensorflow/stream_executor/tpu/c_api_decl.h" |
| #include "tensorflow/stream_executor/tpu/proto_helper.h" |
| |
| typedef struct TpuSerializedProto TpuSerializedProto; |
| |
| namespace tensorflow { |
| class TpuMeshCommonState; |
| } // namespace tensorflow |
| |
| extern "C" { |
| |
| typedef struct XLA_TpuProgram XLA_TpuProgram; |
| |
| // Enum for choosing sharding/unsharding program from a `XLA_TpuProgram` obj. |
| enum TpuProgramShardingType { kInvalid = 0, kMain, kSharding, kUnsharding }; |
| |
| struct TpuProgramFingerprint { |
| const char* bytes; |
| size_t size; |
| }; |
| |
| struct TpuExecutableSerializedProto { |
| const char* bytes; |
| size_t size; |
| }; |
| |
| struct CompilerMetadataSerializedProto { |
| const char* bytes; |
| size_t size; |
| }; |
| |
| struct HostComputeMetadataSerializedProto { |
| const char* bytes; |
| size_t size; |
| }; |
| |
| typedef struct XLA_TpuMeshState XLA_TpuMeshState; |
| |
| typedef struct TpuProfiler TpuProfiler; |
| |
| typedef struct XLA_DeviceAssignment { |
| const char* bytes; |
| size_t size; |
| } XLA_DeviceAssignment; |
| |
| // Property for creating compilation cache key. |
| struct CompilationCacheKeyProperty { |
| const char* config_prefix; |
| const char* shapes_prefix; |
| const char* function_name; |
| uint64_t mlir_module_fingerprint; |
| const int32_t* device_ids; |
| size_t device_ids_size; |
| int32_t guaranteed_constants_size; |
| uint64_t function_library_fingerprint; |
| int32_t num_cores_per_replica; |
| int32_t num_replicas; |
| const XLA_TpuMeshState* mesh_state; |
| }; |
| |
| // Compilation cache key result returning both the key and a more verbose debug |
| // version. |
| struct CompilationCacheKeyResult { |
| const char* key; |
| const char* debug_string; |
| }; |
| |
| typedef struct XLA_TpuNodeContext XLA_TpuNodeContext; |
| |
| typedef struct TfTpu_OrdinalSelector TfTpuOrdinalSelector; |
| |
| struct TpuPartitionedCall_Params { |
| bool input_shape_opt; |
| bool group_tensors_for_packing; |
| int32_t minimum_input_tensors_packing; |
| int32_t minimum_output_tensors_packing; |
| |
| // Whether to attempt to automatically shard inputs by adding an |
| // XlaSharding op after each input. |
| bool enable_auto_xla_input_sharding; |
| |
| // The dimension of each input to shard if |
| // enable_auto_xla_input_sharding is set to true. Negative numbers are |
| // allowed and refers to dimensions starting from the end. |
| int32_t auto_xla_input_sharding_dim; |
| |
| // If true, only create one variable on the TPU for each variable on the CPU. |
| bool enable_variable_deduplication; |
| }; |
| |
| // Compiles Mlir or TF function computation by lowering into HLO IR and returns |
| // `count` number of TPU programs ready for execution. |
| // The API allocates the `XLA_TpuProgram*[]` array `tpu_programs` and creates |
| // `XLA_TpuProgram` object(s) using the `TpuProgram_New` API. The caller is |
| // responsible to deallocate both the `XLA_TpuProgram*[]` array and the |
| // `XLA_TpuProgram` object(s) using `TpuProgram_FreeArray` and `TpuProgram_Free` |
| // API respectively. |
| TFTPU_CAPI_EXPORT void TpuCompile_CompileAndBuild( |
| TpuSerializedProto compilation_request, const XLA_TpuMeshState* mesh_state, |
| XLA_TpuProgram** tpu_programs[], size_t* count, TF_Status* status); |
| |
| // Compiles a HLO IR and returns `count` number of TPU programs ready for |
| // execution. The API allocates the `XLA_TpuProgram*[]` array `tpu_programs` and |
| // creates `XLA_TpuProgram` object(s) using the `TpuProgram_New` API. The caller |
| // is responsible to deallocate both the `XLA_TpuProgram*[]` array and the |
| // `XLA_TpuProgram` object(s) using `TpuProgram_FreeArray` and `TpuProgram_Free` |
| // API respectively. |
| TFTPU_CAPI_EXPORT void TpuCompile_XrtCompileAndBuild( |
| TpuSerializedProto xrt_computation, const XLA_TpuMeshState* mesh_state, |
| XLA_TpuProgram** tpu_programs[], size_t* count, TF_Status* status); |
| |
| // Creates a TPU profiler that is ready to start profiling. |
| TFTPU_CAPI_EXPORT void TpuProfiler_Create(TpuProfiler** tpu_profiler, |
| TF_Status* status); |
| // Destroys the given TPU profiler. |
| TFTPU_CAPI_EXPORT void TpuProfiler_Destroy(TpuProfiler* tpu_profiler); |
| // Starts profiling if not already started, returns an error otherwise. |
| TFTPU_CAPI_EXPORT void TpuProfiler_Start(TpuProfiler* tpu_profiler, |
| TF_Status* status); |
| // Stops profiling if not already stopped, returns an error otherwise. |
| TFTPU_CAPI_EXPORT void TpuProfiler_Stop(TpuProfiler* tpu_profiler, |
| TF_Status* status); |
| // Serializes profiled data into `buffer` and returns the size of `buffer`. The |
| // profile data held by the TPU driver will be cleared after retrieval. |
| // |
| // Step 1. Query the size of buffer required into `size_in_bytes`. |
| // |
| // size_t size_in_bytes; |
| // TpuProfiler_CollectData(profiler, status, nullptr, &size_in_bytes); |
| // |
| // Step 2. Retrieve the data into a `buffer` of size `size_in_bytes`. |
| // Subsequently,The TPU driver clears its copy of the profile data. |
| // |
| // uint8_t buffer = new uint8_t[size_in_bytes]; |
| // TpuProfiler_CollectData(profiler, status, buffer, size_in_bytes); |
| // |
| // Step 3. Unpack the data into an XSpace. |
| // |
| // tensorflow::profiler::XSpace space; |
| // space.ParseFromArray(buffer, size_in_bytes); |
| // |
| TFTPU_CAPI_EXPORT void TpuProfiler_CollectData(TpuProfiler* tpu_profiler, |
| TF_Status* status, |
| uint8_t* buffer, |
| size_t* size_in_bytes); |
| |
| // Creates a new TPU mesh state object. |
| TFTPU_CAPI_EXPORT XLA_TpuMeshState* TpuMeshState_Create(); |
| |
| // Deletes the given TPU `mesh_state` object. Once deleted the object is |
| // unusable. |
| TFTPU_CAPI_EXPORT void TpuMeshState_Free(XLA_TpuMeshState* mesh_state); |
| |
| // Returns a pointer to an opaque mesh data structure used internally. |
| TFTPU_CAPI_EXPORT void* TpuMeshState_MeshCommonState( |
| XLA_TpuMeshState* mesh_state); |
| |
| TFTPU_CAPI_EXPORT void TfTpuOrdinalSelector_Create( |
| TfTpuOrdinalSelector** ordinal_selector, int num_cores_per_replica); |
| |
| TFTPU_CAPI_EXPORT void TfTpuOrdinalSelector_Destroy( |
| TfTpuOrdinalSelector* ordinal_selector); |
| |
| TFTPU_CAPI_EXPORT void TfTpuOrdinalSelector_GetOrdinal( |
| TfTpuOrdinalSelector* ordinal_selector, absl::optional<uint64_t> key, |
| int64_t* req_id, int64_t* ordinal); |
| |
| TFTPU_CAPI_EXPORT void TfTpuOrdinalSelector_DequeueFromCoreSelector( |
| TfTpuOrdinalSelector* ordinal_selector, int32_t device_ordinal, |
| int64_t req_id); |
| |
| TFTPU_CAPI_EXPORT void TfTpu_GetTpuPartitionedCallParams( |
| TpuPartitionedCall_Params* params); |
| |
| typedef struct TpuExecutable_LoadProgramAndEnqueueToStream_Params { |
| int32_t struct_size; |
| void* priv; |
| |
| const XLA_TpuProgram* program; |
| SE_DeviceMemoryBase* arguments; |
| size_t arguments_len; |
| SE_DeviceMemoryBase* result; |
| bool has_cross_program_prefetch_addr; |
| SE_DeviceMemoryBase* cross_program_prefetch_addr; |
| int32_t rng_seed; |
| XLA_DeviceAssignment* device_assignment; |
| SE_Stream* stream; |
| |
| TF_Status* status; // out |
| } TpuExecutable_LoadProgramAndEnqueueToStream_Params; |
| |
| #define TpuExecutable_LoadProgramAndEnqueueToStream_Params_SIZE \ |
| (sizeof(struct TpuExecutable_LoadProgramAndEnqueueToStream_Params)) |
| |
| TFTPU_CAPI_EXPORT void TpuExecutable_LoadProgramAndEnqueueToStream( |
| TpuExecutable_LoadProgramAndEnqueueToStream_Params* params); |
| |
| TFTPU_CAPI_EXPORT void HardwareLayout_HostShapeToDeviceShape( |
| XLA_Shape* host_shape, XLA_Shape* device_shape); |
| TFTPU_CAPI_EXPORT int64_t HardwareLayout_ShapeSize(XLA_Shape* shape); |
| TFTPU_CAPI_EXPORT int64_t HardwareLayout_ShapeSizeCompact(XLA_Shape* shape); |
| TFTPU_CAPI_EXPORT int64_t HardwareLayout_ShapeSizeCompactRaw(XLA_Shape* shape); |
| TFTPU_CAPI_EXPORT void HardwareLayout_UpdateLayout( |
| const XLA_Shape& device_shape); |
| |
| typedef struct TpuExecute_RuntimeInputToPaddedData_Params { |
| int32_t struct_size; |
| void* priv; |
| |
| uint32_t* runtime_input_ptr; |
| size_t runtime_input_size; |
| int8_t* padded_data_ptr; |
| size_t padded_data_size; |
| XLA_Shape* runtime_shape; |
| XLA_Shape* compile_time_shape; |
| |
| TF_Status* status; // out |
| } TpuExecute_RuntimeInputToPaddedData_Params; |
| |
| #define TpuExecute_RuntimeInputToPaddedData_Params_SIZE \ |
| (sizeof(struct TpuExecute_RuntimeInputToPaddedData_Params)) |
| |
| TFTPU_CAPI_EXPORT void TpuExecute_RuntimeInputToPaddedData( |
| TpuExecute_RuntimeInputToPaddedData_Params* params); |
| |
| typedef struct ConfigureDistributedTpuOp_DoWork_Params { |
| int32_t struct_size; |
| void* priv; |
| |
| size_t num_cores_per_host_size; |
| const int32_t* num_cores_per_host; |
| size_t server_address_size; |
| const char* server_address; |
| |
| size_t* host_config_output_size; // out |
| char** host_config_output; // out |
| TF_Status* status; // out |
| } ConfigureDistributedTpuOp_DoWork_Params; |
| |
| #define ConfigureDistributedTpuOp_DoWork_Params_SIZE \ |
| (sizeof(struct ConfigureDistributedTpuOp_DoWork_Params)) |
| |
| TFTPU_CAPI_EXPORT void ConfigureDistributedTpuOp_DoWork( |
| ConfigureDistributedTpuOp_DoWork_Params* params); |
| |
| typedef struct WaitForDistributedTpuOp_DoWork_Params { |
| int32_t struct_size; |
| void* priv; |
| |
| size_t num_hosts; |
| size_t num_cores_per_host; |
| const int32_t** host_ordinal_to_global_core_id_map; |
| tensorflow::TpuMeshCommonState* tpu_mesh_common_state; |
| |
| size_t* tpu_topology_output_size; // out |
| char** tpu_topology_output; // out |
| TF_Status* status; // out |
| } WaitForDistributedTpuOp_DoWork_Params; |
| |
| #define WaitForDistributedTpuOp_DoWork_Params_SIZE \ |
| (sizeof(struct WaitForDistributedTpuOp_DoWork_Params)) |
| |
| TFTPU_CAPI_EXPORT void WaitForDistributedTpuOp_DoWork( |
| WaitForDistributedTpuOp_DoWork_Params* params); |
| |
| typedef struct InitializeHostForDistributedTpuOp_DoWork_Params { |
| int32_t struct_size; |
| void* priv; |
| |
| size_t tpu_host_config_size; |
| const char* tpu_host_config; |
| bool enable_whole_mesh_compilations; |
| bool is_master_worker; |
| |
| size_t* core_id_output_size; // out |
| int32_t** core_id_output; // out |
| TF_Status* status; // out |
| } InitializeHostForDistributedTpuOp_DoWork_Params; |
| |
| #define InitializeHostForDistributedTpuOp_DoWork_Params_SIZE \ |
| (sizeof(struct InitializeHostForDistributedTpuOp_DoWork_Params)) |
| |
| TFTPU_CAPI_EXPORT void InitializeHostForDistributedTpuOp_DoWork( |
| InitializeHostForDistributedTpuOp_DoWork_Params* params); |
| |
| TFTPU_CAPI_EXPORT void SetGlobalTPUArrayOp_DoWork( |
| const size_t tpu_topology_size, const char* tpu_topology, |
| TF_Status* status); |
| |
| TFTPU_CAPI_EXPORT void DisconnectDistributedTpuChipsOp_DoWork( |
| int32_t* number_of_chips_output, TF_Status* status); |
| |
| TFTPU_CAPI_EXPORT void TpuConfigurationApi_FreeCharArray(char* output); |
| TFTPU_CAPI_EXPORT void TpuConfigurationApi_FreeInt32Array(int32_t* output); |
| |
| TFTPU_CAPI_EXPORT bool TpuConfigurationApi_HasTPUPodState(); |
| |
| TFTPU_CAPI_EXPORT void TpuConfigurationApi_TpusPerHost(int32_t* tpus, |
| TF_Status* status); |
| TFTPU_CAPI_EXPORT void TpuConfigurationApi_TpuMemoryLimit(int64_t* memory_limit, |
| TF_Status* status); |
| TFTPU_CAPI_EXPORT void TpuConfigurationApi_RemoteCompilationCacheSizeInBytes( |
| int64_t* cache_size_in_bytes); |
| |
| typedef struct TpuConfigurationApi_CompilationCacheServerAddrFromConfig_Params { |
| int32_t struct_size; |
| void* priv; |
| |
| size_t tpu_host_config_size; |
| const char* tpu_host_config; |
| |
| size_t* server_address_output_size; // out |
| char** server_address_output; // out |
| TF_Status* status; // out |
| } TpuConfigurationApi_CompilationCacheServerAddressFromConfig_Params; |
| |
| #define TpuConfigurationApi_CompilationCacheServerAddrFromConfig_Params_SIZE \ |
| (sizeof( \ |
| struct TpuConfigurationApi_CompilationCacheServerAddrFromConfig_Params)) |
| |
| TFTPU_CAPI_EXPORT |
| void TpuConfigurationApi_CompilationCacheServerAddressFromConfig( |
| TpuConfigurationApi_CompilationCacheServerAddrFromConfig_Params* params); |
| |
| typedef struct TpuConfigurationApi_GetServerAddressAndPort_Params { |
| int32_t struct_size; |
| void* priv; |
| |
| size_t* server_address_output_size; // out |
| char** server_address_output; // out |
| int* port_output; // out |
| TF_Status* status; // out |
| } TpuConfigurationApi_GetServerAddressAndPort_Params; |
| |
| #define TpuConfigurationApi_GetServerAddressAndPort_Params_SIZE \ |
| (sizeof(struct TpuConfigurationApi_GetServerAddressAndPort_Params)) |
| |
| TFTPU_CAPI_EXPORT void TpuConfigurationApi_GetServerAddressAndPort( |
| TpuConfigurationApi_GetServerAddressAndPort_Params* params); |
| |
| // Creates a new TPU program. |
| TFTPU_CAPI_EXPORT XLA_TpuProgram* TpuProgram_New(); |
| |
| // Destroys the `tpu_program`. |
| TFTPU_CAPI_EXPORT void TpuProgram_Free(XLA_TpuProgram* tpu_program); |
| |
| // Creates an array of `XLA_TpuProgram*`. |
| TFTPU_CAPI_EXPORT XLA_TpuProgram** TpuProgram_NewArray(size_t count); |
| |
| // Destroys an array of `XLA_TpuProgram*`. |
| TFTPU_CAPI_EXPORT void TpuProgram_FreeArray(XLA_TpuProgram* tpu_program[]); |
| |
| // Unloads and destroys the `tpu_program`. Once the TPU program is unloaded and |
| // destroyed, it is in an unusable state. |
| TFTPU_CAPI_EXPORT void TpuProgram_UnloadAndDestroy(XLA_TpuProgram* tpu_program, |
| TF_Status* status); |
| |
| // Gets TPU program size in bytes from the `tpu_program`. |
| TFTPU_CAPI_EXPORT int64_t |
| TpuProgram_GetProgramSize(const XLA_TpuProgram* tpu_program); |
| |
| // Logs the summary of current memory state snapshot of the `tpu_program`. |
| TFTPU_CAPI_EXPORT bool TpuProgram_LogProgramMemorySummary( |
| const XLA_TpuProgram* tpu_program); |
| |
| // Gets TPU program executable info from the `tpu_program`. |
| TFTPU_CAPI_EXPORT void TpuProgram_GetExecutableInfo( |
| const XLA_TpuProgram* tpu_program, TpuSerializedProto* executable_info, |
| TF_Status* status); |
| |
| // Gets host transfer info proto. |
| TFTPU_CAPI_EXPORT void TpuProgram_GetHostTransferInfo( |
| const XLA_TpuProgram* tpu_program, TpuSerializedProto* host_transfer_info, |
| TF_Status* status); |
| |
| // Gets HLO metadata proto. |
| TFTPU_CAPI_EXPORT void TpuProgram_GetHloMetadata( |
| const XLA_TpuProgram* tpu_program, TpuSerializedProto* hlo_metadata, |
| TF_Status* status); |
| |
| // Gets may modify variables boolean value. |
| TFTPU_CAPI_EXPORT void TpuProgram_GetMayModifyVariables( |
| const XLA_TpuProgram* tpu_program, bool* may_modify_variables); |
| |
| // Checks if TPU program has sharding. |
| TFTPU_CAPI_EXPORT bool TpuProgram_HasSharding( |
| const XLA_TpuProgram* tpu_program); |
| |
| // Gets TPU program by sharding type. Return value is valid only when the |
| // `status.status()` returns `OK`. |
| TFTPU_CAPI_EXPORT XLA_TpuProgram* TpuProgram_GetTpuProgram( |
| XLA_TpuProgram* tpu_program, TpuProgramShardingType type); |
| |
| // Gets TPU executable proto from a `tpu_program`. |
| TFTPU_CAPI_EXPORT void TpuProgram_SerializeTpuExecutable( |
| const XLA_TpuProgram* tpu_program, TpuExecutableSerializedProto* executable, |
| TF_Status* status); |
| |
| // Gets compilation metadata proto from a `tpu_program`. |
| TFTPU_CAPI_EXPORT void TpuProgram_SerializeCompilerMetadata( |
| const XLA_TpuProgram* tpu_program, |
| CompilerMetadataSerializedProto* compiler_metadata, TF_Status* status); |
| |
| // Deserializes the `GetTpuProgramResponse` proto into an `XLA_TpuProgram`. |
| TFTPU_CAPI_EXPORT void TpuProgram_DeserializeFromGetTpuProgramResponseProto( |
| TpuSerializedProto get_tpu_program_response, XLA_TpuProgram* tpu_program, |
| TF_Status* status); |
| |
| TFTPU_CAPI_EXPORT TpuProgramFingerprint |
| TpuProgram_GetFingerprint(const XLA_TpuProgram* tpu_program); |
| |
| TFTPU_CAPI_EXPORT void TpuProgram_DestroyFingerprint( |
| TpuProgramFingerprint fingerprint); |
| |
| // Checks if whether a TPU compilation is enabled. |
| TFTPU_CAPI_EXPORT bool TpuCompile_IsTpuCompilationEnabled(); |
| |
| // XLA compilation cannot be cancelled. To avoid hanging the TF worker will exit |
| // when cancellation is requested for an XLA compile op. Some tests require this |
| // behavior to be disabled, and we test for this condition with the following |
| // flag function. |
| TFTPU_CAPI_EXPORT bool TpuCompile_ShouldTpuCompileOpIgnoreCancellation(); |
| |
| // Returns the number of available TPU core count. |
| TFTPU_CAPI_EXPORT int TpuTopology_AvailableCoreCount( |
| const XLA_TpuMeshState* mesh_state, TpuCoreTypeEnum tpu_core_type); |
| |
| // Recycle unused service port. |
| TFTPU_CAPI_EXPORT void TpuNetUtil_RecycleUnusedPort(int port); |
| |
| // Creates a unique compilation cache `key` used for `put` and `get` operations. |
| // Returned buffers are heap-allocated and must be owned. |
| TFTPU_CAPI_EXPORT CompilationCacheKeyResult |
| TpuCompile_CreateCompilationCacheKey(CompilationCacheKeyProperty property); |
| |
| // Destroys the CompilationCacheKeyResult returned by calling the |
| // `TpuCompile_CreateCompilationCacheKey` API. |
| TFTPU_CAPI_EXPORT void TpuCompile_DestroyCompilationCacheKey( |
| CompilationCacheKeyResult result); |
| |
| // Creates a guaranteed const fingerprint. Guarantee const is normally used in |
| // TPU inference to avoid re-copying unchanged variables onto the TPU device. |
| // It promises the value is identical for every execution in the same session |
| // even if the actual value changes in later executions. |
| TFTPU_CAPI_EXPORT uint64_t TpuCompile_CreateGuaranteedConstFingerprint( |
| uint64_t fingerprint, const char* data, size_t size); |
| |
| XLA_TpuNodeContext* TpuNodeContext_Create(int device_ordinal, |
| TF_Status* status); |
| void TpuNodeContext_Free(XLA_TpuNodeContext* node_context); |
| |
| void TpuNodeContext_StopChipHeartbeats(TF_Status* status); |
| |
| void TpuNodeContext_CloseTpuHost(TF_Status* status); |
| |
| void TpuNodeContext_Initialize(int device_ordinal, TF_Status* status); |
| |
| bool TpuNodeContext_CompactionSupported(int device_ordinal); |
| |
| // Globally initialize the TPU system for inference. |
| TFTPU_CAPI_EXPORT void TfTpu_InitializeTpuModelServer(); |
| |
| struct TfTpu_OpsApiFn { |
| TFTPU_ADD_FN_IN_STRUCT(TpuCompile_CompileAndBuild); |
| TFTPU_ADD_FN_IN_STRUCT(TpuCompile_XrtCompileAndBuild); |
| |
| TFTPU_ADD_FN_IN_STRUCT(TpuMeshState_Create); |
| TFTPU_ADD_FN_IN_STRUCT(TpuMeshState_Free); |
| TFTPU_ADD_FN_IN_STRUCT(TpuMeshState_MeshCommonState); |
| |
| TFTPU_ADD_FN_IN_STRUCT(TpuProfiler_Create); |
| TFTPU_ADD_FN_IN_STRUCT(TpuProfiler_Destroy); |
| TFTPU_ADD_FN_IN_STRUCT(TpuProfiler_Start); |
| TFTPU_ADD_FN_IN_STRUCT(TpuProfiler_Stop); |
| TFTPU_ADD_FN_IN_STRUCT(TpuProfiler_CollectData); |
| |
| TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_LoadProgramAndEnqueueToStream); |
| TFTPU_ADD_FN_IN_STRUCT(HardwareLayout_HostShapeToDeviceShape); |
| TFTPU_ADD_FN_IN_STRUCT(HardwareLayout_ShapeSize); |
| TFTPU_ADD_FN_IN_STRUCT(HardwareLayout_ShapeSizeCompact); |
| TFTPU_ADD_FN_IN_STRUCT(HardwareLayout_ShapeSizeCompactRaw); |
| TFTPU_ADD_FN_IN_STRUCT(HardwareLayout_UpdateLayout); |
| TFTPU_ADD_FN_IN_STRUCT(TpuExecute_RuntimeInputToPaddedData); |
| |
| TFTPU_ADD_FN_IN_STRUCT(ConfigureDistributedTpuOp_DoWork); |
| TFTPU_ADD_FN_IN_STRUCT(WaitForDistributedTpuOp_DoWork); |
| TFTPU_ADD_FN_IN_STRUCT(InitializeHostForDistributedTpuOp_DoWork); |
| TFTPU_ADD_FN_IN_STRUCT(SetGlobalTPUArrayOp_DoWork); |
| TFTPU_ADD_FN_IN_STRUCT(DisconnectDistributedTpuChipsOp_DoWork); |
| TFTPU_ADD_FN_IN_STRUCT(TpuConfigurationApi_FreeCharArray); |
| TFTPU_ADD_FN_IN_STRUCT(TpuConfigurationApi_FreeInt32Array); |
| TFTPU_ADD_FN_IN_STRUCT(TpuConfigurationApi_HasTPUPodState); |
| TFTPU_ADD_FN_IN_STRUCT(TpuConfigurationApi_TpusPerHost); |
| TFTPU_ADD_FN_IN_STRUCT(TpuConfigurationApi_TpuMemoryLimit); |
| TFTPU_ADD_FN_IN_STRUCT(TpuConfigurationApi_RemoteCompilationCacheSizeInBytes); |
| TFTPU_ADD_FN_IN_STRUCT( |
| TpuConfigurationApi_CompilationCacheServerAddressFromConfig); |
| TFTPU_ADD_FN_IN_STRUCT(TpuConfigurationApi_GetServerAddressAndPort); |
| |
| TFTPU_ADD_FN_IN_STRUCT(TpuProgram_New); |
| TFTPU_ADD_FN_IN_STRUCT(TpuProgram_Free); |
| TFTPU_ADD_FN_IN_STRUCT(TpuProgram_NewArray); |
| TFTPU_ADD_FN_IN_STRUCT(TpuProgram_FreeArray); |
| TFTPU_ADD_FN_IN_STRUCT(TpuProgram_UnloadAndDestroy); |
| TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetProgramSize); |
| TFTPU_ADD_FN_IN_STRUCT(TpuProgram_LogProgramMemorySummary); |
| TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetExecutableInfo); |
| TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetHostTransferInfo); |
| TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetHloMetadata); |
| TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetMayModifyVariables); |
| TFTPU_ADD_FN_IN_STRUCT(TpuProgram_HasSharding); |
| TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetTpuProgram); |
| TFTPU_ADD_FN_IN_STRUCT(TpuProgram_SerializeTpuExecutable); |
| TFTPU_ADD_FN_IN_STRUCT(TpuProgram_SerializeCompilerMetadata); |
| TFTPU_ADD_FN_IN_STRUCT(TpuProgram_DeserializeFromGetTpuProgramResponseProto); |
| TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetFingerprint); |
| TFTPU_ADD_FN_IN_STRUCT(TpuProgram_DestroyFingerprint); |
| |
| TFTPU_ADD_FN_IN_STRUCT(TpuCompile_IsTpuCompilationEnabled); |
| TFTPU_ADD_FN_IN_STRUCT(TpuCompile_ShouldTpuCompileOpIgnoreCancellation); |
| TFTPU_ADD_FN_IN_STRUCT(TpuTopology_AvailableCoreCount); |
| TFTPU_ADD_FN_IN_STRUCT(TpuNetUtil_RecycleUnusedPort); |
| TFTPU_ADD_FN_IN_STRUCT(TpuCompile_CreateCompilationCacheKey); |
| TFTPU_ADD_FN_IN_STRUCT(TpuCompile_DestroyCompilationCacheKey); |
| TFTPU_ADD_FN_IN_STRUCT(TpuCompile_CreateGuaranteedConstFingerprint); |
| |
| TFTPU_ADD_FN_IN_STRUCT(TpuNodeContext_Create); |
| TFTPU_ADD_FN_IN_STRUCT(TpuNodeContext_Free); |
| TFTPU_ADD_FN_IN_STRUCT(TpuNodeContext_StopChipHeartbeats); |
| TFTPU_ADD_FN_IN_STRUCT(TpuNodeContext_CloseTpuHost); |
| TFTPU_ADD_FN_IN_STRUCT(TpuNodeContext_Initialize); |
| TFTPU_ADD_FN_IN_STRUCT(TpuNodeContext_CompactionSupported); |
| |
| TFTPU_ADD_FN_IN_STRUCT(TfTpu_InitializeTpuModelServer); |
| |
| TFTPU_ADD_FN_IN_STRUCT(TfTpuOrdinalSelector_Create); |
| TFTPU_ADD_FN_IN_STRUCT(TfTpuOrdinalSelector_Destroy); |
| TFTPU_ADD_FN_IN_STRUCT(TfTpuOrdinalSelector_GetOrdinal); |
| TFTPU_ADD_FN_IN_STRUCT(TfTpuOrdinalSelector_DequeueFromCoreSelector); |
| TFTPU_ADD_FN_IN_STRUCT(TfTpu_GetTpuPartitionedCallParams); |
| }; |
| |
| } // extern "C" |
| |
| #endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_KERNELS_C_API_H_ |