blob: 5b1dbd130e0df0e991ac3e2dcce2840e66b1f9b9 [file] [log] [blame]
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_H_
#define TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_H_
// This header defines the macro TF_PLATFORM_STRINGS() which should be used
// once in each dynamically loadable TensorFlow module. It embeds static
// strings into the compilation unit that allow TensorFlow to determine what
// compilation options were in effect when the compilation unit was built. All
// compilation units within the same dynamically loadable library should be
// built with the same options (or at least, the strings should be embedded in
// the compilation unit built with the most restrictive options).
// The platform strings embedded into a binary may be retrieved with the
// GetPlatformStrings function.
// Rationale:
// We wish to load only those libraries that this CPU can execute. For
// example, we should not load a library compiled with avx256 instructions on a
// CPU that cannot execute them.
//
// One might think that one could dlopen() the library, and call a routine that
// would return which cpu type it was compiled for. Alas, this does not work,
// because at dlopen() time, a library containing C++ will execute constructors
// of class variables with static storage class. Even code that looks
// innocuous may use optional platform-specific instructions. For example,
// the fastest way to zero a region of memory might use optional instructions.
//
// One might think one could run a tool such as "objdump" to read flags from
// the libraries' headers, or perhaps disassemble each library to look for
// particular instructions. Unfortunately, the desired flags are not present
// in the headers, and disassembly can be prohibitively slow ("objdump -d" is
// very slow, for example). Moreover, a tool to examine the library may not
// be present on the system unless the user has installed special packages (for
// example, on Windows).
//
// Instead, we adopt a crude but straightforward solution: We require
// developers to use the macro TF_PLATFORM_STRINGS() in their library, to
// embed the compilation options as constant strings. The compiler's
// predefined macros pick which strings are included. We then search for the
// strings in the files, and then dlopen() only those libraries that have or
// lack strings as needed.
//
// We adopt the approach of placing in the binary a fairly raw copy of the
// predefined macros, rather than trying to interpret them in complex ways at
// compile time. This allows the loading binary to alter its interpretation of
// the strings without library developers having to recompile.
#include <stdio.h>
#include <string>
#include <vector>
// Aside from the header guard, the internal macros defined here have the form:
// TF_PLAT_STR_*
// If a macro is removed from the list of tested macros, the major version in
// the following version number should be incremented, and the minor version
// set to zero. Otherwise, if a macro is added to the list of tested macros,
// the minor number should be incremented.
#define TF_PLAT_STR_VERSION_ "1.0"
// Prefix of each option string indicator in the binary.
// After the prefix, such strings have the form:
// [A-Za-z_0-9]=<value>
// followed by a terminating nul. To simplify searching, this prefix is all
// ASCII, starts with a nul, and contains no character twice.
#define TF_PLAT_STR_MAGIC_PREFIX_ "\0S\\s\":^p*L}"
// A helper macro for TF_PLAT_STR_AS_STR_().
#define TF_PLAT_STR_STR_1_(x) #x
// Yield a constant string corresponding to x, after macro expansion.
#define TF_PLAT_STR_AS_STR_(x) TF_PLAT_STR_STR_1_(x)
// An empty definition to make lists more uniform.
#define TF_PLAT_STR_TERMINATOR_
// TF_PLAT_STR_(x) introduces a constant string indicating whether a
// particular compilation option has been turned on.
//
// In gcc and clang, we might imagine using something like
// #define TF_PLAT_STR_(x) \
// (sizeof (#x) != sizeof (TF_PLAT_STR_AS_STR_ (x))? \
// TF_PLAT_STR_MAGIC_PREFIX_ #x "=" TF_PLAT_STR_AS_STR_ (x) : \
// TF_PLAT_STR_MAGIC_PREFIX_ #x "=0"),
// but some compilers (notably MSVC) place both "foo" and "bar" in the binary
// when presented with
// (true? "foo" : "bar")
// so we must use #if to select the strings we need, which is rather verbose.
#define TF_PLAT_STR_(x) TF_PLAT_STR_MAGIC_PREFIX_ #x "=" TF_PLAT_STR_AS_STR_(x)
// Include the #if machinery that sets the macros used below.
// platform_strings_computed.h can be generated by filtering this header file
// through:
// awk '
// header == "" { print; }
// /\*\// && header == "" {
// print "// Generated from platform_strings.h.";
// print "";
// print "#ifndef TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_COMPUTED_H_";
// print "#define TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_COMPUTED_H_";
// print "";
// header = 1;
// }
// /^#define TF_PLAT_STR_LIST_[a-zA-Z0-9_]*\(\) *\\$/ { active = 1; }
// /TF_PLAT_STR_TERMINATOR_/ { active = 0; }
// /^ *TF_PLAT_STR_[A-Za-z0-9_]* *\\$/ && active {
// x = $0;
// sub(/^ *TF_PLAT_STR_/, "", x);
// sub(/ *\\$/, "", x);
// printf ("#if defined(%s)\n", x);
// printf ("#define TF_PLAT_STR_%s TF_PLAT_STR_(%s)\n", x, x);
// printf ("#else\n");
// printf ("#define TF_PLAT_STR_%s\n", x);
// printf ("#endif\n");
// }
// END {
// print "";
// print "#endif // TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_COMPUTED_H_";
// }'
#include "tensorflow/core/platform/platform_strings_computed.h"
// clang-format butchers the following lines.
// clang-format off
// x86_64 and x86_32 optional features.
#define TF_PLAT_STR_LIST___x86_64__() \
TF_PLAT_STR__M_IX86_FP \
TF_PLAT_STR__NO_PREFETCHW \
TF_PLAT_STR___3dNOW_A__ \
TF_PLAT_STR___3dNOW__ \
TF_PLAT_STR___ABM__ \
TF_PLAT_STR___ADX__ \
TF_PLAT_STR___AES__ \
TF_PLAT_STR___AVX2__ \
TF_PLAT_STR___AVX512BW__ \
TF_PLAT_STR___AVX512CD__ \
TF_PLAT_STR___AVX512DQ__ \
TF_PLAT_STR___AVX512ER__ \
TF_PLAT_STR___AVX512F__ \
TF_PLAT_STR___AVX512IFMA__ \
TF_PLAT_STR___AVX512PF__ \
TF_PLAT_STR___AVX512VBMI__ \
TF_PLAT_STR___AVX512VL__ \
TF_PLAT_STR___AVX__ \
TF_PLAT_STR___BMI2__ \
TF_PLAT_STR___BMI__ \
TF_PLAT_STR___CLFLUSHOPT__ \
TF_PLAT_STR___CLZERO__ \
TF_PLAT_STR___F16C__ \
TF_PLAT_STR___FMA4__ \
TF_PLAT_STR___FMA__ \
TF_PLAT_STR___FP_FAST_FMA \
TF_PLAT_STR___FP_FAST_FMAF \
TF_PLAT_STR___FSGSBASE__ \
TF_PLAT_STR___FXSR__ \
TF_PLAT_STR___LWP__ \
TF_PLAT_STR___LZCNT__ \
TF_PLAT_STR___MMX__ \
TF_PLAT_STR___MWAITX__ \
TF_PLAT_STR___PCLMUL__ \
TF_PLAT_STR___PKU__ \
TF_PLAT_STR___POPCNT__ \
TF_PLAT_STR___PRFCHW__ \
TF_PLAT_STR___RDRND__ \
TF_PLAT_STR___RDSEED__ \
TF_PLAT_STR___RTM__ \
TF_PLAT_STR___SHA__ \
TF_PLAT_STR___SSE2_MATH__ \
TF_PLAT_STR___SSE2__ \
TF_PLAT_STR___SSE_MATH__ \
TF_PLAT_STR___SSE__ \
TF_PLAT_STR___SSE3__ \
TF_PLAT_STR___SSE4A__ \
TF_PLAT_STR___SSE4_1__ \
TF_PLAT_STR___SSE4_2__ \
TF_PLAT_STR___SSSE3__ \
TF_PLAT_STR___TBM__ \
TF_PLAT_STR___XOP__ \
TF_PLAT_STR___XSAVEC__ \
TF_PLAT_STR___XSAVEOPT__ \
TF_PLAT_STR___XSAVES__ \
TF_PLAT_STR___XSAVE__ \
TF_PLAT_STR_TERMINATOR_
// PowerPC (64- and 32-bit) optional features.
#define TF_PLAT_STR_LIST___powerpc64__() \
TF_PLAT_STR__SOFT_DOUBLE \
TF_PLAT_STR__SOFT_FLOAT \
TF_PLAT_STR___ALTIVEC__ \
TF_PLAT_STR___APPLE_ALTIVEC__ \
TF_PLAT_STR___CRYPTO__ \
TF_PLAT_STR___FLOAT128_HARDWARE__ \
TF_PLAT_STR___FLOAT128_TYPE__ \
TF_PLAT_STR___FP_FAST_FMA \
TF_PLAT_STR___FP_FAST_FMAF \
TF_PLAT_STR___HTM__ \
TF_PLAT_STR___NO_FPRS__ \
TF_PLAT_STR___NO_LWSYNC__ \
TF_PLAT_STR___POWER8_VECTOR__ \
TF_PLAT_STR___POWER9_VECTOR__ \
TF_PLAT_STR___PPC405__ \
TF_PLAT_STR___QUAD_MEMORY_ATOMIC__ \
TF_PLAT_STR___RECIPF__ \
TF_PLAT_STR___RECIP_PRECISION__ \
TF_PLAT_STR___RECIP__ \
TF_PLAT_STR___RSQRTEF__ \
TF_PLAT_STR___RSQRTE__ \
TF_PLAT_STR___TM_FENCE__ \
TF_PLAT_STR___UPPER_REGS_DF__ \
TF_PLAT_STR___UPPER_REGS_SF__ \
TF_PLAT_STR___VEC__ \
TF_PLAT_STR___VSX__ \
TF_PLAT_STR_TERMINATOR_
// aarch64 and 32-bit arm optional features
#define TF_PLAT_STR_LIST___aarch64__() \
TF_PLAT_STR___ARM_ARCH \
TF_PLAT_STR___ARM_FEATURE_CLZ \
TF_PLAT_STR___ARM_FEATURE_CRC32 \
TF_PLAT_STR___ARM_FEATURE_CRC32 \
TF_PLAT_STR___ARM_FEATURE_CRYPTO \
TF_PLAT_STR___ARM_FEATURE_DIRECTED_ROUNDING \
TF_PLAT_STR___ARM_FEATURE_DSP \
TF_PLAT_STR___ARM_FEATURE_FMA \
TF_PLAT_STR___ARM_FEATURE_IDIV \
TF_PLAT_STR___ARM_FEATURE_LDREX \
TF_PLAT_STR___ARM_FEATURE_NUMERIC_MAXMIN \
TF_PLAT_STR___ARM_FEATURE_QBIT \
TF_PLAT_STR___ARM_FEATURE_QRDMX \
TF_PLAT_STR___ARM_FEATURE_SAT \
TF_PLAT_STR___ARM_FEATURE_SIMD32 \
TF_PLAT_STR___ARM_FEATURE_UNALIGNED \
TF_PLAT_STR___ARM_FP \
TF_PLAT_STR___ARM_NEON_FP \
TF_PLAT_STR___ARM_NEON__ \
TF_PLAT_STR___ARM_WMMX \
TF_PLAT_STR___IWMMXT2__ \
TF_PLAT_STR___IWMMXT__ \
TF_PLAT_STR___VFP_FP__ \
TF_PLAT_STR_TERMINATOR_
// Generic features, including indication of architecture and OS.
// The _M_* macros are defined by Visual Studio.
// It doesn't define __LITTLE_ENDIAN__ or __BYTE_ORDER__;
// Windows is assumed to be little endian.
#define TF_PLAT_STR_LIST___generic__() \
TF_PLAT_STR_TARGET_IPHONE_SIMULATOR \
TF_PLAT_STR_TARGET_OS_IOS \
TF_PLAT_STR_TARGET_OS_IPHONE \
TF_PLAT_STR__MSC_VER \
TF_PLAT_STR__M_ARM \
TF_PLAT_STR__M_ARM64 \
TF_PLAT_STR__M_ARM_ARMV7VE \
TF_PLAT_STR__M_ARM_FP \
TF_PLAT_STR__M_IX86 \
TF_PLAT_STR__M_X64 \
TF_PLAT_STR__WIN32 \
TF_PLAT_STR__WIN64 \
TF_PLAT_STR___ANDROID__ \
TF_PLAT_STR___APPLE__ \
TF_PLAT_STR___BYTE_ORDER__ \
TF_PLAT_STR___CYGWIN__ \
TF_PLAT_STR___FreeBSD__ \
TF_PLAT_STR___LITTLE_ENDIAN__ \
TF_PLAT_STR___NetBSD__ \
TF_PLAT_STR___OpenBSD__ \
TF_PLAT_STR_____MSYS__ \
TF_PLAT_STR___aarch64__ \
TF_PLAT_STR___alpha__ \
TF_PLAT_STR___arm__ \
TF_PLAT_STR___i386__ \
TF_PLAT_STR___i686__ \
TF_PLAT_STR___ia64__ \
TF_PLAT_STR___linux__ \
TF_PLAT_STR___mips32__ \
TF_PLAT_STR___mips64__ \
TF_PLAT_STR___powerpc64__ \
TF_PLAT_STR___powerpc__ \
TF_PLAT_STR___riscv___ \
TF_PLAT_STR___s390x__ \
TF_PLAT_STR___sparc64__ \
TF_PLAT_STR___sparc__ \
TF_PLAT_STR___x86_64__ \
TF_PLAT_STR_TERMINATOR_
#if !defined(__x86_64__) && !defined(_M_X64) && \
!defined(__i386__) && !defined(_M_IX86)
#undef TF_PLAT_STR_LIST___x86_64__
#define TF_PLAT_STR_LIST___x86_64__()
#endif
#if !defined(__powerpc64__) && !defined(__powerpc__)
#undef TF_PLAT_STR_LIST___powerpc64__
#define TF_PLAT_STR_LIST___powerpc64__()
#endif
#if !defined(__aarch64__) && !defined(_M_ARM64) && \
!defined(__arm__) && !defined(_M_ARM)
#undef TF_PLAT_STR_LIST___aarch64__
#define TF_PLAT_STR_LIST___aarch64__()
#endif
// Macro to be used in each dynamically loadable library.
//
// The BSS global variable tf_cpu_option_global and the class
// instance tf_cpu_option_avoid_omit_class are needed to prevent
// compilers/linkers such as clang from omitting the static variable
// tf_cpu_option[], which would otherwise appear to be unused. We cannot make
// tf_cpu_option[] global, because we then might get multiply-defined symbols
// if TF_PLAT_STR() is used twice in the same library.
// (tf_cpu_option_global doesn't see such errors because it is
// defined in BSS, so multiple definitions are combined by the linker.) gcc's
// __attribute__((used)) is insufficient because it seems to be ignored by
// linkers.
#define TF_PLATFORM_STRINGS() \
static const char tf_cpu_option[] = \
TF_PLAT_STR_MAGIC_PREFIX_ "TF_PLAT_STR_VERSION=" TF_PLAT_STR_VERSION_ \
TF_PLAT_STR_LIST___x86_64__() \
TF_PLAT_STR_LIST___powerpc64__() \
TF_PLAT_STR_LIST___aarch64__() \
TF_PLAT_STR_LIST___generic__() \
; \
const char *tf_cpu_option_global; \
namespace { \
class TFCPUOptionHelper { \
public: \
TFCPUOptionHelper() { \
/* Compilers/linkers remove unused variables aggressively. The */ \
/* following gyrations subvert most such optimizations. */ \
tf_cpu_option_global = tf_cpu_option; \
/* Nothing is printed because the string starts with a nul. */ \
printf("%s", tf_cpu_option); \
} \
} tf_cpu_option_avoid_omit_class; \
} /* anonymous namespace */
// clang-format on
namespace tensorflow {
class Status;
// Retrieves the platform strings from the file at the given path and appends
// them to the given vector. If the returned int is non-zero, an error occurred
// reading the file and vector may or may not be modified. The returned error
// code is suitable for use with strerror().
int GetPlatformStrings(const std::string& path,
std::vector<std::string>* found);
} // namespace tensorflow
#endif // TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_H_