clang-r407598b/include/llvm/Frontend/OpenMP/OMPGridValues.h - platform/prebuilts/clang/host/darwin-x86 - Git at Google

 //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 ///
 /// \file
 /// \brief Provides definitions for Target specific Grid Values
 ///
 //===----------------------------------------------------------------------===//

 #ifndef LLVM_OPENMP_GRIDVALUES_H
 #define LLVM_OPENMP_GRIDVALUES_H

 namespace llvm {

 namespace omp {

 /// \brief Defines various target-specific GPU grid values that must be
 ///        consistent between host RTL (plugin), device RTL, and clang.
 ///        We can change grid values for a "fat" binary so that different
 ///        passes get the correct values when generating code for a
 ///        multi-target binary. Both amdgcn and nvptx values are stored in
 ///        this file. In the future, should there be differences between GPUs
 ///        of the same architecture, then simply make a different array and
 ///        use the new array name.
 ///
 /// Example usage in clang:
 ///   const unsigned slot_size = ctx.GetTargetInfo().getGridValue(GV_Warp_Size);
 ///
 /// Example usage in libomptarget/deviceRTLs:
 ///   #include "OMPGridValues.h"
 ///   #ifdef __AMDGPU__
 ///     #define GRIDVAL AMDGPUGpuGridValues
 ///   #else
 ///     #define GRIDVAL NVPTXGpuGridValues
 ///   #endif
 ///   ... Then use this reference for GV_Warp_Size in the deviceRTL source.
 ///   GRIDVAL[GV_Warp_Size]
 ///
 /// Example usage in libomptarget hsa plugin:
 ///   #include "OMPGridValues.h"
 ///   #define GRIDVAL AMDGPUGpuGridValues
 ///   ... Then use this reference to access GV_Warp_Size in the hsa plugin.
 ///   GRIDVAL[GV_Warp_Size]
 ///
 /// Example usage in libomptarget cuda plugin:
 ///    #include "OMPGridValues.h"
 ///    #define GRIDVAL NVPTXGpuGridValues
 ///   ... Then use this reference to access GV_Warp_Size in the cuda plugin.
 ///    GRIDVAL[GV_Warp_Size]
 ///
 enum GVIDX {
   /// The maximum number of workers in a kernel.
   /// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z
   GV_Threads,
   /// The size reserved for data in a shared memory slot.
   GV_Slot_Size,
   /// The default value of maximum number of threads in a worker warp.
   GV_Warp_Size,
   /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
   /// for NVPTX.
   GV_Warp_Size_32,
   /// The number of bits required to represent the max number of threads in warp
   GV_Warp_Size_Log2,
   /// GV_Warp_Size * GV_Slot_Size,
   GV_Warp_Slot_Size,
   /// the maximum number of teams.
   GV_Max_Teams,
   /// Global Memory Alignment
   GV_Mem_Align,
   /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
   GV_Warp_Size_Log2_Mask,
   // An alternative to the heavy data sharing infrastructure that uses global
   // memory is one that uses device __shared__ memory.  The amount of such space
   // (in bytes) reserved by the OpenMP runtime is noted here.
   GV_SimpleBufferSize,
   // The absolute maximum team size for a working group
   GV_Max_WG_Size,
   // The default maximum team size for a working group
   GV_Default_WG_Size,
   // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
   GV_Max_Warp_Number,
   /// The slot size that should be reserved for a working warp.
   /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
   GV_Warp_Size_Log2_MaskL
 };

 /// For AMDGPU GPUs
 static constexpr unsigned AMDGPUGpuGridValues[] = {
     448,       // GV_Threads
     256,       // GV_Slot_Size
     64,        // GV_Warp_Size
     32,        // GV_Warp_Size_32
     6,         // GV_Warp_Size_Log2
     64 * 256,  // GV_Warp_Slot_Size
     128,       // GV_Max_Teams
     256,       // GV_Mem_Align
     63,        // GV_Warp_Size_Log2_Mask
     896,       // GV_SimpleBufferSize
     1024,      // GV_Max_WG_Size,
     256,       // GV_Defaut_WG_Size
     1024 / 64, // GV_Max_WG_Size / GV_WarpSize
     63         // GV_Warp_Size_Log2_MaskL
 };

 /// For Nvidia GPUs
 static constexpr unsigned NVPTXGpuGridValues[] = {
     992,               // GV_Threads
     256,               // GV_Slot_Size
     32,                // GV_Warp_Size
     32,                // GV_Warp_Size_32
     5,                 // GV_Warp_Size_Log2
     32 * 256,          // GV_Warp_Slot_Size
     1024,              // GV_Max_Teams
     256,               // GV_Mem_Align
     (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
     896,               // GV_SimpleBufferSize
     1024,              // GV_Max_WG_Size
     128,               // GV_Defaut_WG_Size
     1024 / 32,         // GV_Max_WG_Size / GV_WarpSize
     31                 // GV_Warp_Size_Log2_MaskL
 };

 } // namespace omp
 } // namespace llvm

 #endif // LLVM_OPENMP_GRIDVALUES_H
	//====--- OMPGridValues.h - Language-specific address spaces --- C++ --====//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	///
	/// \file
	/// \brief Provides definitions for Target specific Grid Values
	///
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_OPENMP_GRIDVALUES_H
	#define LLVM_OPENMP_GRIDVALUES_H

	namespace llvm {

	namespace omp {

	/// \brief Defines various target-specific GPU grid values that must be
	/// consistent between host RTL (plugin), device RTL, and clang.
	/// We can change grid values for a "fat" binary so that different
	/// passes get the correct values when generating code for a
	/// multi-target binary. Both amdgcn and nvptx values are stored in
	/// this file. In the future, should there be differences between GPUs
	/// of the same architecture, then simply make a different array and
	/// use the new array name.
	///
	/// Example usage in clang:
	/// const unsigned slot_size = ctx.GetTargetInfo().getGridValue(GV_Warp_Size);
	///
	/// Example usage in libomptarget/deviceRTLs:
	/// #include "OMPGridValues.h"
	/// #ifdef __AMDGPU__
	/// #define GRIDVAL AMDGPUGpuGridValues
	/// #else
	/// #define GRIDVAL NVPTXGpuGridValues
	/// #endif
	/// ... Then use this reference for GV_Warp_Size in the deviceRTL source.
	/// GRIDVAL[GV_Warp_Size]
	///
	/// Example usage in libomptarget hsa plugin:
	/// #include "OMPGridValues.h"
	/// #define GRIDVAL AMDGPUGpuGridValues
	/// ... Then use this reference to access GV_Warp_Size in the hsa plugin.
	/// GRIDVAL[GV_Warp_Size]
	///
	/// Example usage in libomptarget cuda plugin:
	/// #include "OMPGridValues.h"
	/// #define GRIDVAL NVPTXGpuGridValues
	/// ... Then use this reference to access GV_Warp_Size in the cuda plugin.
	/// GRIDVAL[GV_Warp_Size]
	///
	enum GVIDX {
	/// The maximum number of workers in a kernel.
	/// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z
	GV_Threads,
	/// The size reserved for data in a shared memory slot.
	GV_Slot_Size,
	/// The default value of maximum number of threads in a worker warp.
	GV_Warp_Size,
	/// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
	/// for NVPTX.
	GV_Warp_Size_32,
	/// The number of bits required to represent the max number of threads in warp
	GV_Warp_Size_Log2,
	/// GV_Warp_Size * GV_Slot_Size,
	GV_Warp_Slot_Size,
	/// the maximum number of teams.
	GV_Max_Teams,
	/// Global Memory Alignment
	GV_Mem_Align,
	/// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
	GV_Warp_Size_Log2_Mask,
	// An alternative to the heavy data sharing infrastructure that uses global
	// memory is one that uses device __shared__ memory. The amount of such space
	// (in bytes) reserved by the OpenMP runtime is noted here.
	GV_SimpleBufferSize,
	// The absolute maximum team size for a working group
	GV_Max_WG_Size,
	// The default maximum team size for a working group
	GV_Default_WG_Size,
	// This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
	GV_Max_Warp_Number,
	/// The slot size that should be reserved for a working warp.
	/// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
	GV_Warp_Size_Log2_MaskL
	};

	/// For AMDGPU GPUs
	static constexpr unsigned AMDGPUGpuGridValues[] = {
	448, // GV_Threads
	256, // GV_Slot_Size
	64, // GV_Warp_Size
	32, // GV_Warp_Size_32
	6, // GV_Warp_Size_Log2
	64 * 256, // GV_Warp_Slot_Size
	128, // GV_Max_Teams
	256, // GV_Mem_Align
	63, // GV_Warp_Size_Log2_Mask
	896, // GV_SimpleBufferSize
	1024, // GV_Max_WG_Size,
	256, // GV_Defaut_WG_Size
	1024 / 64, // GV_Max_WG_Size / GV_WarpSize
	63 // GV_Warp_Size_Log2_MaskL
	};

	/// For Nvidia GPUs
	static constexpr unsigned NVPTXGpuGridValues[] = {
	992, // GV_Threads
	256, // GV_Slot_Size
	32, // GV_Warp_Size
	32, // GV_Warp_Size_32
	5, // GV_Warp_Size_Log2
	32 * 256, // GV_Warp_Slot_Size
	1024, // GV_Max_Teams
	256, // GV_Mem_Align
	(~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
	896, // GV_SimpleBufferSize
	1024, // GV_Max_WG_Size
	128, // GV_Defaut_WG_Size
	1024 / 32, // GV_Max_WG_Size / GV_WarpSize
	31 // GV_Warp_Size_Log2_MaskL
	};

	} // namespace omp
	} // namespace llvm

	#endif // LLVM_OPENMP_GRIDVALUES_H