backends/vulkan/runtime/utils/VecUtils.h - platform/external/executorch - Git at Google

 /*
  * Copyright (c) Meta Platforms, Inc. and affiliates.
  * All rights reserved.
  *
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
  */

 #pragma once

 #include <executorch/backends/vulkan/runtime/vk_api/vk_api.h>

 #include <executorch/backends/vulkan/runtime/vk_api/Exception.h>

 #include <cmath>
 #include <limits>
 #include <numeric>
 #include <type_traits>

 namespace vkcompute {
 namespace utils {

 //
 // Hashing
 //

 /**
  * hash_combine is taken from c10/util/hash.h, which in turn is based on
  * implementation from Boost
  */
 inline size_t hash_combine(size_t seed, size_t value) {
   return seed ^ (value + 0x9e3779b9 + (seed << 6u) + (seed >> 2u));
 }

 //
 // Alignment
 //

 template <typename Type>
 inline constexpr Type align_down(const Type& number, const Type& multiple) {
   return (number / multiple) * multiple;
 }

 template <typename Type>
 inline constexpr Type align_up(const Type& number, const Type& multiple) {
   return align_down(number + multiple - 1, multiple);
 }

 template <typename Type>
 inline constexpr Type align_up_4(const Type& numerator) {
   return (numerator + 3) & -4;
 }

 template <typename Type>
 inline constexpr Type div_up(const Type& numerator, const Type& denominator) {
   return (numerator + denominator - 1) / denominator;
 }

 template <typename Type>
 inline constexpr Type div_up_4(const Type& numerator) {
   return (numerator + 3) / 4;
 }

 //
 // Casting Utilities
 //

 namespace detail {

 /*
  * x cannot be less than 0 if x is unsigned
  */
 template <typename T>
 static inline constexpr bool is_negative(
     const T& /*x*/,
     std::true_type /*is_unsigned*/) {
   return false;
 }

 /*
  * check if x is less than 0 if x is signed
  */
 template <typename T>
 static inline constexpr bool is_negative(
     const T& x,
     std::false_type /*is_unsigned*/) {
   return x < T(0);
 }

 /*
  * Returns true if x < 0
  */
 template <typename T>
 inline constexpr bool is_negative(const T& x) {
   return is_negative(x, std::is_unsigned<T>());
 }

 /*
  * Returns true if x < lowest(Limit); standard comparison
  */
 template <typename Limit, typename T>
 static inline constexpr bool less_than_lowest(
     const T& x,
     std::false_type /*limit_is_unsigned*/,
     std::false_type /*x_is_unsigned*/) {
   return x < std::numeric_limits<Limit>::lowest();
 }

 /*
  * Limit can contained negative values, but x cannot; return false
  */
 template <typename Limit, typename T>
 static inline constexpr bool less_than_lowest(
     const T& /*x*/,
     std::false_type /*limit_is_unsigned*/,
     std::true_type /*x_is_unsigned*/) {
   return false;
 }

 /*
  * Limit cannot contained negative values, but x can; check if x is negative
  */
 template <typename Limit, typename T>
 static inline constexpr bool less_than_lowest(
     const T& x,
     std::true_type /*limit_is_unsigned*/,
     std::false_type /*x_is_unsigned*/) {
   return x < T(0);
 }

 /*
  * Both x and Limit cannot be negative; return false
  */
 template <typename Limit, typename T>
 static inline constexpr bool less_than_lowest(
     const T& /*x*/,
     std::true_type /*limit_is_unsigned*/,
     std::true_type /*x_is_unsigned*/) {
   return false;
 }

 /*
  * Returns true if x is less than the lowest value of type T
  */
 template <typename Limit, typename T>
 inline constexpr bool less_than_lowest(const T& x) {
   return less_than_lowest<Limit>(
       x, std::is_unsigned<Limit>(), std::is_unsigned<T>());
 }

 // Suppress sign compare warning when compiling with GCC
 // as later does not account for short-circuit rule before
 // raising the warning, see https://godbolt.org/z/Tr3Msnz99
 #ifdef __GNUC__
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wsign-compare"
 #endif

 /*
  * Returns true if x is greater than the greatest value of the type Limit
  */
 template <typename Limit, typename T>
 inline constexpr bool greater_than_max(const T& x) {
   constexpr bool can_overflow =
       std::numeric_limits<T>::digits > std::numeric_limits<Limit>::digits;
   return can_overflow && x > std::numeric_limits<Limit>::max();
 }

 #ifdef __GNUC__
 #pragma GCC diagnostic pop
 #endif

 template <typename To, typename From>
 std::enable_if_t<
     std::is_integral<From>::value && !std::is_same<From, bool>::value,
     bool>
 overflows(From f) {
   using limit = std::numeric_limits<To>;
   // Casting from signed to unsigned; allow for negative numbers to wrap using
   // two's complement arithmetic.
   if (!limit::is_signed && std::numeric_limits<From>::is_signed) {
     return greater_than_max<To>(f) ||
         (is_negative(f) && -static_cast<uint64_t>(f) > limit::max());
   }
   // standard case, check if f is outside the range of type To
   else {
     return less_than_lowest<To>(f) || greater_than_max<To>(f);
   }
 }

 template <typename To, typename From>
 std::enable_if_t<std::is_floating_point<From>::value, bool> overflows(From f) {
   using limit = std::numeric_limits<To>;
   if (limit::has_infinity && std::isinf(static_cast<double>(f))) {
     return false;
   }
   return f < limit::lowest() || f > limit::max();
 }

 template <typename To, typename From>
 inline constexpr To safe_downcast(const From& v) {
   VK_CHECK_COND(!overflows<To>(v), "Cast failed: out of range!");
   return static_cast<To>(v);
 }

 template <typename To, typename From>
 inline constexpr bool is_signed_to_unsigned() {
   return std::is_signed<From>::value && std::is_unsigned<To>::value;
 }

 } // namespace detail

 template <
     typename To,
     typename From,
     std::enable_if_t<detail::is_signed_to_unsigned<To, From>(), bool> = true>
 inline constexpr To safe_downcast(const From& v) {
   VK_CHECK_COND(v >= From{}, "Cast failed: negative signed to unsigned!");
   return detail::safe_downcast<To, From>(v);
 }

 template <
     typename To,
     typename From,
     std::enable_if_t<!detail::is_signed_to_unsigned<To, From>(), bool> = true>
 inline constexpr To safe_downcast(const From& v) {
   return detail::safe_downcast<To, From>(v);
 }

 //
 // Vector Types
 //

 namespace detail {

 template <typename Type, uint32_t N>
 struct vec final {
   // NOLINTNEXTLINE
   Type data[N];

   vec() = default;

   // Standard constructor with initializer list
   vec(std::initializer_list<Type> values) {
     VK_CHECK_COND(values.size() == N);
     std::copy(values.begin(), values.end(), data);
   }

   // Conversion constructor from an _integral_ vec type. Note that this is only
   // defined if `OtherType` is an integral type to disallow implicit narrowing.
   template <
       typename OtherType,
       typename std::enable_if<
           !std::is_same<Type, OtherType>::value &&
               std::is_integral<OtherType>::value,
           int>::type = 0>
   /* implicit */ vec(const vec<OtherType, N>& other) {
     for (int i = 0; i < N; ++i) {
       data[i] = safe_downcast<Type>(other[i]);
     }
   }

   const Type& operator[](const uint32_t& i) const {
     VK_CHECK_COND(i >= 0 && i < N, "Index out of bounds!");
     return data[i];
   }

   Type& operator[](const uint32_t& i) {
     VK_CHECK_COND(i >= 0 && i < N, "Index out of bounds!");
     return data[i];
   }
 };

 } // namespace detail

 template <uint32_t N>
 using ivec = detail::vec<int32_t, N>;
 using ivec2 = ivec<2u>;
 using ivec3 = ivec<3u>;
 using ivec4 = ivec<4u>;

 template <uint32_t N>
 using uvec = detail::vec<uint32_t, N>;
 using uvec2 = uvec<2u>;
 using uvec3 = uvec<3u>;
 using uvec4 = uvec<4u>;

 template <uint32_t N>
 using vec = detail::vec<float, N>;
 using vec2 = vec<2u>;
 using vec3 = vec<3u>;
 using vec4 = vec<4u>;

 // uvec3 is the type representing tensor extents. Useful for debugging.
 inline std::ostream& operator<<(std::ostream& os, const uvec3& v) {
   os << "(" << v[0u] << ", " << v[1u] << ", " << v[2u] << ")";
   return os;
 }

 inline std::ostream& operator<<(std::ostream& os, const ivec3& v) {
   os << "(" << v[0u] << ", " << v[1u] << ", " << v[2u] << ")";
   return os;
 }

 inline std::ostream& operator<<(std::ostream& os, const uvec4& v) {
   os << "(" << v[0u] << ", " << v[1u] << ", " << v[2u] << ", " << v[3u] << ")";
   return os;
 }

 inline std::ostream& operator<<(std::ostream& os, const ivec4& v) {
   os << "(" << v[0u] << ", " << v[1u] << ", " << v[2u] << ", " << v[3u] << ")";
   return os;
 }

 template <typename T, uint32_t N>
 inline detail::vec<T, N> divup_vec(
     const detail::vec<T, N>& a,
     const detail::vec<T, N>& b) {
   detail::vec<T, N> result;
   for (uint32_t i = 0; i < N; ++i) {
     result[i] = utils::div_up(a[i], b[i]);
   }
   return result;
 }

 //
 // std::vector<T> Handling
 //

 /*
  * Utility function to perform indexing on an std::vector<T>. Negative indexing
  * is allowed. For instance, passing an index of -1 will retrieve the last
  * element. If the requested index is out of bounds, then 1u will be returned.
  */
 template <typename T>
 inline T val_at(const int64_t index, const std::vector<T>& sizes) {
   const int64_t ndim = static_cast<int64_t>(sizes.size());
   if (index >= 0) {
     return index >= ndim ? 1 : sizes[index];
   } else {
     return ndim + index < 0 ? 1 : sizes[ndim + index];
   }
 }

 inline ivec2 make_ivec2(
     const std::vector<int64_t>& ints,
     bool reverse = false) {
   VK_CHECK_COND(ints.size() == 2);
   if (reverse) {
     return {safe_downcast<int32_t>(ints[1]), safe_downcast<int32_t>(ints[0])};
   } else {
     return {safe_downcast<int32_t>(ints[0]), safe_downcast<int32_t>(ints[1])};
   }
 }

 inline ivec3 make_ivec3(
     const std::vector<int64_t>& ints,
     bool reverse = false) {
   VK_CHECK_COND(ints.size() == 3);
   if (reverse) {
     return {
         safe_downcast<int32_t>(ints[2]),
         safe_downcast<int32_t>(ints[1]),
         safe_downcast<int32_t>(ints[0]),
     };
   } else {
     return {
         safe_downcast<int32_t>(ints[0]),
         safe_downcast<int32_t>(ints[1]),
         safe_downcast<int32_t>(ints[2]),
     };
   }
 }

 inline ivec4 make_ivec4(
     const std::vector<int64_t>& ints,
     bool reverse = false) {
   VK_CHECK_COND(ints.size() == 4);
   if (reverse) {
     return {
         safe_downcast<int32_t>(ints[3]),
         safe_downcast<int32_t>(ints[2]),
         safe_downcast<int32_t>(ints[1]),
         safe_downcast<int32_t>(ints[0]),
     };
   } else {
     return {
         safe_downcast<int32_t>(ints[0]),
         safe_downcast<int32_t>(ints[1]),
         safe_downcast<int32_t>(ints[2]),
         safe_downcast<int32_t>(ints[3]),
     };
   }
 }

 inline ivec4 make_ivec4_prepadded1(const std::vector<int64_t>& ints) {
   VK_CHECK_COND(ints.size() <= 4);

   ivec4 result = {1, 1, 1, 1};
   size_t base = 4 - ints.size();
   for (size_t i = 0; i < ints.size(); ++i) {
     result[i + base] = safe_downcast<int32_t>(ints[i]);
   }

   return result;
 }

 inline ivec3 make_ivec3(uvec3 ints) {
   return {
       safe_downcast<int32_t>(ints[0u]),
       safe_downcast<int32_t>(ints[1u]),
       safe_downcast<int32_t>(ints[2u])};
 }

 inline uvec3 make_uvec3(ivec3 ints) {
   return {
       safe_downcast<uint32_t>(ints[0u]),
       safe_downcast<uint32_t>(ints[1u]),
       safe_downcast<uint32_t>(ints[2u])};
 }

 /*
  * Given an vector of up to 4 uint64_t representing the sizes of a tensor,
  * constructs a uvec4 containing those elements in reverse order.
  */
 inline uvec4 make_whcn_uvec4(const std::vector<int64_t>& arr) {
   uint32_t w = safe_downcast<uint32_t>(val_at(-1, arr));
   uint32_t h = safe_downcast<uint32_t>(val_at(-2, arr));
   uint32_t c = safe_downcast<uint32_t>(val_at(-3, arr));
   uint32_t n = safe_downcast<uint32_t>(val_at(-4, arr));

   return {w, h, c, n};
 }

 /*
  * Given an vector of up to 4 int64_t representing the sizes of a tensor,
  * constructs an ivec4 containing those elements in reverse order.
  */
 inline ivec4 make_whcn_ivec4(const std::vector<int64_t>& arr) {
   int32_t w = val_at(-1, arr);
   int32_t h = val_at(-2, arr);
   int32_t c = val_at(-3, arr);
   int32_t n = val_at(-4, arr);

   return {w, h, c, n};
 }

 /*
  * Wrapper around std::accumulate that accumulates values of a container of
  * integral types into int64_t. Taken from `multiply_integers` in
  * <c10/util/accumulate.h>
  */
 template <
     typename C,
     std::enable_if_t<std::is_integral<typename C::value_type>::value, int> = 0>
 inline int64_t multiply_integers(const C& container) {
   return std::accumulate(
       container.begin(),
       container.end(),
       static_cast<int64_t>(1),
       std::multiplies<>());
 }

 /*
  * Product of integer elements referred to by iterators; accumulates into the
  * int64_t datatype. Taken from `multiply_integers` in <c10/util/accumulate.h>
  */
 template <
     typename Iter,
     std::enable_if_t<
         std::is_integral<
             typename std::iterator_traits<Iter>::value_type>::value,
         int> = 0>
 inline int64_t multiply_integers(Iter begin, Iter end) {
   // std::accumulate infers return type from `init` type, so if the `init` type
   // is not large enough to hold the result, computation can overflow. We use
   // `int64_t` here to avoid this.
   return std::accumulate(
       begin, end, static_cast<int64_t>(1), std::multiplies<>());
 }

 } // namespace utils
 } // namespace vkcompute
	/*
	* Copyright (c) Meta Platforms, Inc. and affiliates.
	* All rights reserved.
	*
	* This source code is licensed under the BSD-style license found in the
	* LICENSE file in the root directory of this source tree.
	*/

	#pragma once

	#include <executorch/backends/vulkan/runtime/vk_api/vk_api.h>

	#include <executorch/backends/vulkan/runtime/vk_api/Exception.h>

	#include <cmath>
	#include <limits>
	#include <numeric>
	#include <type_traits>

	namespace vkcompute {
	namespace utils {

	//
	// Hashing
	//

	/**
	* hash_combine is taken from c10/util/hash.h, which in turn is based on
	* implementation from Boost
	*/
	inline size_t hash_combine(size_t seed, size_t value) {
	return seed ^ (value + 0x9e3779b9 + (seed << 6u) + (seed >> 2u));
	}

	//
	// Alignment
	//

	template <typename Type>
	inline constexpr Type align_down(const Type& number, const Type& multiple) {
	return (number / multiple) * multiple;
	}

	template <typename Type>
	inline constexpr Type align_up(const Type& number, const Type& multiple) {
	return align_down(number + multiple - 1, multiple);
	}

	template <typename Type>
	inline constexpr Type align_up_4(const Type& numerator) {
	return (numerator + 3) & -4;
	}

	template <typename Type>
	inline constexpr Type div_up(const Type& numerator, const Type& denominator) {
	return (numerator + denominator - 1) / denominator;
	}

	template <typename Type>
	inline constexpr Type div_up_4(const Type& numerator) {
	return (numerator + 3) / 4;
	}

	//
	// Casting Utilities
	//

	namespace detail {

	/*
	* x cannot be less than 0 if x is unsigned
	*/
	template <typename T>
	static inline constexpr bool is_negative(
	const T& /x/,
	std::true_type /is_unsigned/) {
	return false;
	}

	/*
	* check if x is less than 0 if x is signed
	*/
	template <typename T>
	static inline constexpr bool is_negative(
	const T& x,
	std::false_type /is_unsigned/) {
	return x < T(0);
	}

	/*
	* Returns true if x < 0
	*/
	template <typename T>
	inline constexpr bool is_negative(const T& x) {
	return is_negative(x, std::is_unsigned<T>());
	}

	/*
	* Returns true if x < lowest(Limit); standard comparison
	*/
	template <typename Limit, typename T>
	static inline constexpr bool less_than_lowest(
	const T& x,
	std::false_type /limit_is_unsigned/,
	std::false_type /x_is_unsigned/) {
	return x < std::numeric_limits<Limit>::lowest();
	}

	/*
	* Limit can contained negative values, but x cannot; return false
	*/
	template <typename Limit, typename T>
	static inline constexpr bool less_than_lowest(
	const T& /x/,
	std::false_type /limit_is_unsigned/,
	std::true_type /x_is_unsigned/) {
	return false;
	}

	/*
	* Limit cannot contained negative values, but x can; check if x is negative
	*/
	template <typename Limit, typename T>
	static inline constexpr bool less_than_lowest(
	const T& x,
	std::true_type /limit_is_unsigned/,
	std::false_type /x_is_unsigned/) {
	return x < T(0);
	}

	/*
	* Both x and Limit cannot be negative; return false
	*/
	template <typename Limit, typename T>
	static inline constexpr bool less_than_lowest(
	const T& /x/,
	std::true_type /limit_is_unsigned/,
	std::true_type /x_is_unsigned/) {
	return false;
	}

	/*
	* Returns true if x is less than the lowest value of type T
	*/
	template <typename Limit, typename T>
	inline constexpr bool less_than_lowest(const T& x) {
	return less_than_lowest<Limit>(
	x, std::is_unsigned<Limit>(), std::is_unsigned<T>());
	}

	// Suppress sign compare warning when compiling with GCC
	// as later does not account for short-circuit rule before
	// raising the warning, see https://godbolt.org/z/Tr3Msnz99
	#ifdef __GNUC__
	#pragma GCC diagnostic push
	#pragma GCC diagnostic ignored "-Wsign-compare"
	#endif

	/*
	* Returns true if x is greater than the greatest value of the type Limit
	*/
	template <typename Limit, typename T>
	inline constexpr bool greater_than_max(const T& x) {
	constexpr bool can_overflow =
	std::numeric_limits<T>::digits > std::numeric_limits<Limit>::digits;
	return can_overflow && x > std::numeric_limits<Limit>::max();
	}

	#ifdef __GNUC__
	#pragma GCC diagnostic pop
	#endif

	template <typename To, typename From>
	std::enable_if_t<
	std::is_integral<From>::value && !std::is_same<From, bool>::value,
	bool>
	overflows(From f) {
	using limit = std::numeric_limits<To>;
	// Casting from signed to unsigned; allow for negative numbers to wrap using
	// two's complement arithmetic.
	if (!limit::is_signed && std::numeric_limits<From>::is_signed) {
	return greater_than_max<To>(f) \|\|
	(is_negative(f) && -static_cast<uint64_t>(f) > limit::max());
	}
	// standard case, check if f is outside the range of type To
	else {
	return less_than_lowest<To>(f) \|\| greater_than_max<To>(f);
	}
	}

	template <typename To, typename From>
	std::enable_if_t<std::is_floating_point<From>::value, bool> overflows(From f) {
	using limit = std::numeric_limits<To>;
	if (limit::has_infinity && std::isinf(static_cast<double>(f))) {
	return false;
	}
	return f < limit::lowest() \|\| f > limit::max();
	}

	template <typename To, typename From>
	inline constexpr To safe_downcast(const From& v) {
	VK_CHECK_COND(!overflows<To>(v), "Cast failed: out of range!");
	return static_cast<To>(v);
	}

	template <typename To, typename From>
	inline constexpr bool is_signed_to_unsigned() {
	return std::is_signed<From>::value && std::is_unsigned<To>::value;
	}

	} // namespace detail

	template <
	typename To,
	typename From,
	std::enable_if_t<detail::is_signed_to_unsigned<To, From>(), bool> = true>
	inline constexpr To safe_downcast(const From& v) {
	VK_CHECK_COND(v >= From{}, "Cast failed: negative signed to unsigned!");
	return detail::safe_downcast<To, From>(v);
	}

	template <
	typename To,
	typename From,
	std::enable_if_t<!detail::is_signed_to_unsigned<To, From>(), bool> = true>
	inline constexpr To safe_downcast(const From& v) {
	return detail::safe_downcast<To, From>(v);
	}

	//
	// Vector Types
	//

	namespace detail {

	template <typename Type, uint32_t N>
	struct vec final {
	// NOLINTNEXTLINE
	Type data[N];

	vec() = default;

	// Standard constructor with initializer list
	vec(std::initializer_list<Type> values) {
	VK_CHECK_COND(values.size() == N);
	std::copy(values.begin(), values.end(), data);
	}

	// Conversion constructor from an _integral_ vec type. Note that this is only
	// defined if `OtherType` is an integral type to disallow implicit narrowing.
	template <
	typename OtherType,
	typename std::enable_if<
	!std::is_same<Type, OtherType>::value &&
	std::is_integral<OtherType>::value,
	int>::type = 0>
	/* implicit */ vec(const vec<OtherType, N>& other) {
	for (int i = 0; i < N; ++i) {
	data[i] = safe_downcast<Type>(other[i]);
	}
	}

	const Type& operator[](const uint32_t& i) const {
	VK_CHECK_COND(i >= 0 && i < N, "Index out of bounds!");
	return data[i];
	}

	Type& operator[](const uint32_t& i) {
	VK_CHECK_COND(i >= 0 && i < N, "Index out of bounds!");
	return data[i];
	}
	};

	} // namespace detail

	template <uint32_t N>
	using ivec = detail::vec<int32_t, N>;
	using ivec2 = ivec<2u>;
	using ivec3 = ivec<3u>;
	using ivec4 = ivec<4u>;

	template <uint32_t N>
	using uvec = detail::vec<uint32_t, N>;
	using uvec2 = uvec<2u>;
	using uvec3 = uvec<3u>;
	using uvec4 = uvec<4u>;

	template <uint32_t N>
	using vec = detail::vec<float, N>;
	using vec2 = vec<2u>;
	using vec3 = vec<3u>;
	using vec4 = vec<4u>;

	// uvec3 is the type representing tensor extents. Useful for debugging.
	inline std::ostream& operator<<(std::ostream& os, const uvec3& v) {
	os << "(" << v[0u] << ", " << v[1u] << ", " << v[2u] << ")";
	return os;
	}

	inline std::ostream& operator<<(std::ostream& os, const ivec3& v) {
	os << "(" << v[0u] << ", " << v[1u] << ", " << v[2u] << ")";
	return os;
	}

	inline std::ostream& operator<<(std::ostream& os, const uvec4& v) {
	os << "(" << v[0u] << ", " << v[1u] << ", " << v[2u] << ", " << v[3u] << ")";
	return os;
	}

	inline std::ostream& operator<<(std::ostream& os, const ivec4& v) {
	os << "(" << v[0u] << ", " << v[1u] << ", " << v[2u] << ", " << v[3u] << ")";
	return os;
	}

	template <typename T, uint32_t N>
	inline detail::vec<T, N> divup_vec(
	const detail::vec<T, N>& a,
	const detail::vec<T, N>& b) {
	detail::vec<T, N> result;
	for (uint32_t i = 0; i < N; ++i) {
	result[i] = utils::div_up(a[i], b[i]);
	}
	return result;
	}

	//
	// std::vector<T> Handling
	//

	/*
	* Utility function to perform indexing on an std::vector<T>. Negative indexing
	* is allowed. For instance, passing an index of -1 will retrieve the last
	* element. If the requested index is out of bounds, then 1u will be returned.
	*/
	template <typename T>
	inline T val_at(const int64_t index, const std::vector<T>& sizes) {
	const int64_t ndim = static_cast<int64_t>(sizes.size());
	if (index >= 0) {
	return index >= ndim ? 1 : sizes[index];
	} else {
	return ndim + index < 0 ? 1 : sizes[ndim + index];
	}
	}

	inline ivec2 make_ivec2(
	const std::vector<int64_t>& ints,
	bool reverse = false) {
	VK_CHECK_COND(ints.size() == 2);
	if (reverse) {
	return {safe_downcast<int32_t>(ints[1]), safe_downcast<int32_t>(ints[0])};
	} else {
	return {safe_downcast<int32_t>(ints[0]), safe_downcast<int32_t>(ints[1])};
	}
	}

	inline ivec3 make_ivec3(
	const std::vector<int64_t>& ints,
	bool reverse = false) {
	VK_CHECK_COND(ints.size() == 3);
	if (reverse) {
	return {
	safe_downcast<int32_t>(ints[2]),
	safe_downcast<int32_t>(ints[1]),
	safe_downcast<int32_t>(ints[0]),
	};
	} else {
	return {
	safe_downcast<int32_t>(ints[0]),
	safe_downcast<int32_t>(ints[1]),
	safe_downcast<int32_t>(ints[2]),
	};
	}
	}

	inline ivec4 make_ivec4(
	const std::vector<int64_t>& ints,
	bool reverse = false) {
	VK_CHECK_COND(ints.size() == 4);
	if (reverse) {
	return {
	safe_downcast<int32_t>(ints[3]),
	safe_downcast<int32_t>(ints[2]),
	safe_downcast<int32_t>(ints[1]),
	safe_downcast<int32_t>(ints[0]),
	};
	} else {
	return {
	safe_downcast<int32_t>(ints[0]),
	safe_downcast<int32_t>(ints[1]),
	safe_downcast<int32_t>(ints[2]),
	safe_downcast<int32_t>(ints[3]),
	};
	}
	}

	inline ivec4 make_ivec4_prepadded1(const std::vector<int64_t>& ints) {
	VK_CHECK_COND(ints.size() <= 4);

	ivec4 result = {1, 1, 1, 1};
	size_t base = 4 - ints.size();
	for (size_t i = 0; i < ints.size(); ++i) {
	result[i + base] = safe_downcast<int32_t>(ints[i]);
	}

	return result;
	}

	inline ivec3 make_ivec3(uvec3 ints) {
	return {
	safe_downcast<int32_t>(ints[0u]),
	safe_downcast<int32_t>(ints[1u]),
	safe_downcast<int32_t>(ints[2u])};
	}

	inline uvec3 make_uvec3(ivec3 ints) {
	return {
	safe_downcast<uint32_t>(ints[0u]),
	safe_downcast<uint32_t>(ints[1u]),
	safe_downcast<uint32_t>(ints[2u])};
	}

	/*
	* Given an vector of up to 4 uint64_t representing the sizes of a tensor,
	* constructs a uvec4 containing those elements in reverse order.
	*/
	inline uvec4 make_whcn_uvec4(const std::vector<int64_t>& arr) {
	uint32_t w = safe_downcast<uint32_t>(val_at(-1, arr));
	uint32_t h = safe_downcast<uint32_t>(val_at(-2, arr));
	uint32_t c = safe_downcast<uint32_t>(val_at(-3, arr));
	uint32_t n = safe_downcast<uint32_t>(val_at(-4, arr));

	return {w, h, c, n};
	}

	/*
	* Given an vector of up to 4 int64_t representing the sizes of a tensor,
	* constructs an ivec4 containing those elements in reverse order.
	*/
	inline ivec4 make_whcn_ivec4(const std::vector<int64_t>& arr) {
	int32_t w = val_at(-1, arr);
	int32_t h = val_at(-2, arr);
	int32_t c = val_at(-3, arr);
	int32_t n = val_at(-4, arr);

	return {w, h, c, n};
	}

	/*
	* Wrapper around std::accumulate that accumulates values of a container of
	* integral types into int64_t. Taken from `multiply_integers` in
	* <c10/util/accumulate.h>
	*/
	template <
	typename C,
	std::enable_if_t<std::is_integral<typename C::value_type>::value, int> = 0>
	inline int64_t multiply_integers(const C& container) {
	return std::accumulate(
	container.begin(),
	container.end(),
	static_cast<int64_t>(1),
	std::multiplies<>());
	}

	/*
	* Product of integer elements referred to by iterators; accumulates into the
	* int64_t datatype. Taken from `multiply_integers` in <c10/util/accumulate.h>
	*/
	template <
	typename Iter,
	std::enable_if_t<
	std::is_integral<
	typename std::iterator_traits<Iter>::value_type>::value,
	int> = 0>
	inline int64_t multiply_integers(Iter begin, Iter end) {
	// std::accumulate infers return type from `init` type, so if the `init` type
	// is not large enough to hold the result, computation can overflow. We use
	// `int64_t` here to avoid this.
	return std::accumulate(
	begin, end, static_cast<int64_t>(1), std::multiplies<>());
	}

	} // namespace utils
	} // namespace vkcompute