linux-musl-x86/1.74.1/lib/rustlib/src/rust/vendor/compiler_builtins/src/int/specialized_div_rem/mod.rs - platform/prebuilts/rust - Git at Google

 // TODO: when `unsafe_block_in_unsafe_fn` is stabilized, remove this
 #![allow(unused_unsafe)]
 // The functions are complex with many branches, and explicit
 // `return`s makes it clear where function exit points are
 #![allow(clippy::needless_return)]
 #![allow(clippy::comparison_chain)]
 // Clippy is confused by the complex configuration
 #![allow(clippy::if_same_then_else)]
 #![allow(clippy::needless_bool)]

 //! This `specialized_div_rem` module is originally from version 1.0.0 of the
 //! `specialized-div-rem` crate. Note that `for` loops with ranges are not used in this
 //! module, since unoptimized compilation may generate references to `memcpy`.
 //!
 //! The purpose of these macros is to easily change the both the division algorithm used
 //! for a given integer size and the half division used by that algorithm. The way
 //! functions call each other is also constructed such that linkers will find the chain of
 //! software and hardware divisions needed for every size of signed and unsigned division.
 //! For example, most target compilations do the following:
 //!
 //!  - Many 128 bit division functions like `u128::wrapping_div` use
 //!    `std::intrinsics::unchecked_div`, which gets replaced by `__udivti3` because there
 //!    is not a 128 bit by 128 bit hardware division function in most architectures.
 //!    `__udivti3` uses `u128_div_rem` (this extra level of function calls exists because
 //!    `__umodti3` and `__udivmodti4` also exist, and `specialized_div_rem` supplies just
 //!    one function to calculate both the quotient and remainder. If configuration flags
 //!    enable it, `impl_trifecta!` defines `u128_div_rem` to use the trifecta algorithm,
 //!    which requires the half sized division `u64_by_u64_div_rem`. If the architecture
 //!    supplies a 64 bit hardware division instruction, `u64_by_u64_div_rem` will be
 //!    reduced to those instructions. Note that we do not specify the half size division
 //!    directly to be `__udivdi3`, because hardware division would never be introduced.
 //!  - If the architecture does not supply a 64 bit hardware division instruction, u64
 //!    divisions will use functions such as `__udivdi3`. This will call `u64_div_rem`
 //!    which is defined by `impl_delegate!`. The half division for this algorithm is
 //!    `u32_by_u32_div_rem` which in turn becomes hardware division instructions or more
 //!    software division algorithms.
 //!  - If the architecture does not supply a 32 bit hardware instruction, linkers will
 //!    look for `__udivsi3`. `impl_binary_long!` is used, but this  algorithm uses no half
 //!    division, so the chain of calls ends here.
 //!
 //! On some architectures like x86_64, an asymmetrically sized division is supplied, in
 //! which 128 bit numbers can be divided by 64 bit numbers. `impl_asymmetric!` is used to
 //! extend the 128 by 64 bit division to a full 128 by 128 bit division.

 // `allow(dead_code)` is used in various places, because the configuration code would otherwise be
 // ridiculously complex

 #[macro_use]
 mod norm_shift;

 #[macro_use]
 mod binary_long;

 #[macro_use]
 mod delegate;

 // used on SPARC
 #[allow(unused_imports)]
 #[cfg(not(feature = "public-test-deps"))]
 pub(crate) use self::delegate::u128_divide_sparc;

 #[cfg(feature = "public-test-deps")]
 pub use self::delegate::u128_divide_sparc;

 #[macro_use]
 mod trifecta;

 #[macro_use]
 mod asymmetric;

 /// The behavior of all divisions by zero is controlled by this function. This function should be
 /// impossible to reach by Rust users, unless `compiler-builtins` public division functions or
 /// `core/std::unchecked_div/rem` are directly used without a zero check in front.
 fn zero_div_fn() -> ! {
     // Calling the intrinsic directly, to avoid the `assert_unsafe_precondition` that cannot be used
     // here because it involves non-`inline` functions
     // (https://github.com/rust-lang/compiler-builtins/issues/491).
     unsafe { core::intrinsics::unreachable() }
 }

 const USE_LZ: bool = {
     if cfg!(target_arch = "arm") {
         if cfg!(target_feature = "thumb-mode") {
             // ARM thumb targets have CLZ instructions if the instruction set of ARMv6T2 is
             // supported. This is needed to successfully differentiate between targets like
             // `thumbv8.base` and `thumbv8.main`.
             cfg!(target_feature = "v6t2")
         } else {
             // Regular ARM targets have CLZ instructions if the ARMv5TE instruction set is
             // supported. Technically, ARMv5T was the first to have CLZ, but the "v5t" target
             // feature does not seem to work.
             cfg!(target_feature = "v5te")
         }
     } else if cfg!(any(target_arch = "sparc", target_arch = "sparc64")) {
         // LZD or LZCNT on SPARC only exists for the VIS 3 extension and later.
         cfg!(target_feature = "vis3")
     } else if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) {
         // The 'Zbb' Basic Bit-Manipulation extension on RISC-V
         // determines if a CLZ assembly instruction exists
         cfg!(target_feature = "zbb")
     } else {
         // All other common targets Rust supports should have CLZ instructions
         true
     }
 };

 impl_normalization_shift!(
     u32_normalization_shift,
     USE_LZ,
     32,
     u32,
     i32,
     allow(dead_code)
 );
 impl_normalization_shift!(
     u64_normalization_shift,
     USE_LZ,
     64,
     u64,
     i64,
     allow(dead_code)
 );

 /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
 /// `checked_div` and `checked_rem` are used to avoid bringing in panic function
 /// dependencies.
 #[inline]
 fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) {
     if let Some(quo) = duo.checked_div(div) {
         if let Some(rem) = duo.checked_rem(div) {
             return (quo, rem);
         }
     }
     zero_div_fn()
 }

 // Whether `trifecta` or `delegate` is faster for 128 bit division depends on the speed at which a
 // microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is
 // faster if the target pointer width is at least 64.
 #[cfg(all(
     not(any(target_pointer_width = "16", target_pointer_width = "32")),
     not(all(not(feature = "no-asm"), target_arch = "x86_64")),
     not(any(target_arch = "sparc", target_arch = "sparc64"))
 ))]
 impl_trifecta!(
     u128_div_rem,
     zero_div_fn,
     u64_by_u64_div_rem,
     32,
     u32,
     u64,
     u128
 );

 // If the pointer width less than 64, then the target architecture almost certainly does not have
 // the fast 64 to 128 bit widening multiplication needed for `trifecta` to be faster.
 #[cfg(all(
     any(target_pointer_width = "16", target_pointer_width = "32"),
     not(all(not(feature = "no-asm"), target_arch = "x86_64")),
     not(any(target_arch = "sparc", target_arch = "sparc64"))
 ))]
 impl_delegate!(
     u128_div_rem,
     zero_div_fn,
     u64_normalization_shift,
     u64_by_u64_div_rem,
     32,
     u32,
     u64,
     u128,
     i128
 );

 /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
 ///
 /// # Safety
 ///
 /// If the quotient does not fit in a `u64`, a floating point exception occurs.
 /// If `div == 0`, then a division by zero exception occurs.
 #[cfg(all(not(feature = "no-asm"), target_arch = "x86_64"))]
 #[inline]
 unsafe fn u128_by_u64_div_rem(duo: u128, div: u64) -> (u64, u64) {
     let duo_lo = duo as u64;
     let duo_hi = (duo >> 64) as u64;
     let quo: u64;
     let rem: u64;
     unsafe {
         // divides the combined registers rdx:rax (`duo` is split into two 64 bit parts to do this)
         // by `div`. The quotient is stored in rax and the remainder in rdx.
         // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
         core::arch::asm!(
             "div {0}",
             in(reg) div,
             inlateout("rax") duo_lo => quo,
             inlateout("rdx") duo_hi => rem,
             options(att_syntax, pure, nomem, nostack)
         );
     }
     (quo, rem)
 }

 // use `asymmetric` instead of `trifecta` on x86_64
 #[cfg(all(not(feature = "no-asm"), target_arch = "x86_64"))]
 impl_asymmetric!(
     u128_div_rem,
     zero_div_fn,
     u64_by_u64_div_rem,
     u128_by_u64_div_rem,
     32,
     u32,
     u64,
     u128
 );

 /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
 /// `checked_div` and `checked_rem` are used to avoid bringing in panic function
 /// dependencies.
 #[inline]
 #[allow(dead_code)]
 fn u32_by_u32_div_rem(duo: u32, div: u32) -> (u32, u32) {
     if let Some(quo) = duo.checked_div(div) {
         if let Some(rem) = duo.checked_rem(div) {
             return (quo, rem);
         }
     }
     zero_div_fn()
 }

 // When not on x86 and the pointer width is not 64, use `delegate` since the division size is larger
 // than register size.
 #[cfg(all(
     not(all(not(feature = "no-asm"), target_arch = "x86")),
     not(target_pointer_width = "64")
 ))]
 impl_delegate!(
     u64_div_rem,
     zero_div_fn,
     u32_normalization_shift,
     u32_by_u32_div_rem,
     16,
     u16,
     u32,
     u64,
     i64
 );

 // When not on x86 and the pointer width is 64, use `binary_long`.
 #[cfg(all(
     not(all(not(feature = "no-asm"), target_arch = "x86")),
     target_pointer_width = "64"
 ))]
 impl_binary_long!(
     u64_div_rem,
     zero_div_fn,
     u64_normalization_shift,
     64,
     u64,
     i64
 );

 /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
 ///
 /// # Safety
 ///
 /// If the quotient does not fit in a `u32`, a floating point exception occurs.
 /// If `div == 0`, then a division by zero exception occurs.
 #[cfg(all(not(feature = "no-asm"), target_arch = "x86"))]
 #[inline]
 unsafe fn u64_by_u32_div_rem(duo: u64, div: u32) -> (u32, u32) {
     let duo_lo = duo as u32;
     let duo_hi = (duo >> 32) as u32;
     let quo: u32;
     let rem: u32;
     unsafe {
         // divides the combined registers rdx:rax (`duo` is split into two 32 bit parts to do this)
         // by `div`. The quotient is stored in rax and the remainder in rdx.
         // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
         core::arch::asm!(
             "div {0}",
             in(reg) div,
             inlateout("rax") duo_lo => quo,
             inlateout("rdx") duo_hi => rem,
             options(att_syntax, pure, nomem, nostack)
         );
     }
     (quo, rem)
 }

 // use `asymmetric` instead of `delegate` on x86
 #[cfg(all(not(feature = "no-asm"), target_arch = "x86"))]
 impl_asymmetric!(
     u64_div_rem,
     zero_div_fn,
     u32_by_u32_div_rem,
     u64_by_u32_div_rem,
     16,
     u16,
     u32,
     u64
 );

 // 32 bits is the smallest division used by `compiler-builtins`, so we end with binary long division
 impl_binary_long!(
     u32_div_rem,
     zero_div_fn,
     u32_normalization_shift,
     32,
     u32,
     i32,
     allow(dead_code)
 );
	// TODO: when `unsafe_block_in_unsafe_fn` is stabilized, remove this
	#![allow(unused_unsafe)]
	// The functions are complex with many branches, and explicit
	// `return`s makes it clear where function exit points are
	#![allow(clippy::needless_return)]
	#![allow(clippy::comparison_chain)]
	// Clippy is confused by the complex configuration
	#![allow(clippy::if_same_then_else)]
	#![allow(clippy::needless_bool)]

	//! This `specialized_div_rem` module is originally from version 1.0.0 of the
	//! `specialized-div-rem` crate. Note that `for` loops with ranges are not used in this
	//! module, since unoptimized compilation may generate references to `memcpy`.
	//!
	//! The purpose of these macros is to easily change the both the division algorithm used
	//! for a given integer size and the half division used by that algorithm. The way
	//! functions call each other is also constructed such that linkers will find the chain of
	//! software and hardware divisions needed for every size of signed and unsigned division.
	//! For example, most target compilations do the following:
	//!
	//! - Many 128 bit division functions like `u128::wrapping_div` use
	//! `std::intrinsics::unchecked_div`, which gets replaced by `__udivti3` because there
	//! is not a 128 bit by 128 bit hardware division function in most architectures.
	//! `__udivti3` uses `u128_div_rem` (this extra level of function calls exists because
	//! `__umodti3` and `__udivmodti4` also exist, and `specialized_div_rem` supplies just
	//! one function to calculate both the quotient and remainder. If configuration flags
	//! enable it, `impl_trifecta!` defines `u128_div_rem` to use the trifecta algorithm,
	//! which requires the half sized division `u64_by_u64_div_rem`. If the architecture
	//! supplies a 64 bit hardware division instruction, `u64_by_u64_div_rem` will be
	//! reduced to those instructions. Note that we do not specify the half size division
	//! directly to be `__udivdi3`, because hardware division would never be introduced.
	//! - If the architecture does not supply a 64 bit hardware division instruction, u64
	//! divisions will use functions such as `__udivdi3`. This will call `u64_div_rem`
	//! which is defined by `impl_delegate!`. The half division for this algorithm is
	//! `u32_by_u32_div_rem` which in turn becomes hardware division instructions or more
	//! software division algorithms.
	//! - If the architecture does not supply a 32 bit hardware instruction, linkers will
	//! look for `__udivsi3`. `impl_binary_long!` is used, but this algorithm uses no half
	//! division, so the chain of calls ends here.
	//!
	//! On some architectures like x86_64, an asymmetrically sized division is supplied, in
	//! which 128 bit numbers can be divided by 64 bit numbers. `impl_asymmetric!` is used to
	//! extend the 128 by 64 bit division to a full 128 by 128 bit division.

	// `allow(dead_code)` is used in various places, because the configuration code would otherwise be
	// ridiculously complex

	#[macro_use]
	mod norm_shift;

	#[macro_use]
	mod binary_long;

	#[macro_use]
	mod delegate;

	// used on SPARC
	#[allow(unused_imports)]
	#[cfg(not(feature = "public-test-deps"))]
	pub(crate) use self::delegate::u128_divide_sparc;

	#[cfg(feature = "public-test-deps")]
	pub use self::delegate::u128_divide_sparc;

	#[macro_use]
	mod trifecta;

	#[macro_use]
	mod asymmetric;

	/// The behavior of all divisions by zero is controlled by this function. This function should be
	/// impossible to reach by Rust users, unless `compiler-builtins` public division functions or
	/// `core/std::unchecked_div/rem` are directly used without a zero check in front.
	fn zero_div_fn() -> ! {
	// Calling the intrinsic directly, to avoid the `assert_unsafe_precondition` that cannot be used
	// here because it involves non-`inline` functions
	// (https://github.com/rust-lang/compiler-builtins/issues/491).
	unsafe { core::intrinsics::unreachable() }
	}

	const USE_LZ: bool = {
	if cfg!(target_arch = "arm") {
	if cfg!(target_feature = "thumb-mode") {
	// ARM thumb targets have CLZ instructions if the instruction set of ARMv6T2 is
	// supported. This is needed to successfully differentiate between targets like
	// `thumbv8.base` and `thumbv8.main`.
	cfg!(target_feature = "v6t2")
	} else {
	// Regular ARM targets have CLZ instructions if the ARMv5TE instruction set is
	// supported. Technically, ARMv5T was the first to have CLZ, but the "v5t" target
	// feature does not seem to work.
	cfg!(target_feature = "v5te")
	}
	} else if cfg!(any(target_arch = "sparc", target_arch = "sparc64")) {
	// LZD or LZCNT on SPARC only exists for the VIS 3 extension and later.
	cfg!(target_feature = "vis3")
	} else if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) {
	// The 'Zbb' Basic Bit-Manipulation extension on RISC-V
	// determines if a CLZ assembly instruction exists
	cfg!(target_feature = "zbb")
	} else {
	// All other common targets Rust supports should have CLZ instructions
	true
	}
	};

	impl_normalization_shift!(
	u32_normalization_shift,
	USE_LZ,
	32,
	u32,
	i32,
	allow(dead_code)
	);
	impl_normalization_shift!(
	u64_normalization_shift,
	USE_LZ,
	64,
	u64,
	i64,
	allow(dead_code)
	);

	/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
	/// `checked_div` and `checked_rem` are used to avoid bringing in panic function
	/// dependencies.
	#[inline]
	fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) {
	if let Some(quo) = duo.checked_div(div) {
	if let Some(rem) = duo.checked_rem(div) {
	return (quo, rem);
	}
	}
	zero_div_fn()
	}

	// Whether `trifecta` or `delegate` is faster for 128 bit division depends on the speed at which a
	// microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is
	// faster if the target pointer width is at least 64.
	#[cfg(all(
	not(any(target_pointer_width = "16", target_pointer_width = "32")),
	not(all(not(feature = "no-asm"), target_arch = "x86_64")),
	not(any(target_arch = "sparc", target_arch = "sparc64"))
	))]
	impl_trifecta!(
	u128_div_rem,
	zero_div_fn,
	u64_by_u64_div_rem,
	32,
	u32,
	u64,
	u128
	);

	// If the pointer width less than 64, then the target architecture almost certainly does not have
	// the fast 64 to 128 bit widening multiplication needed for `trifecta` to be faster.
	#[cfg(all(
	any(target_pointer_width = "16", target_pointer_width = "32"),
	not(all(not(feature = "no-asm"), target_arch = "x86_64")),
	not(any(target_arch = "sparc", target_arch = "sparc64"))
	))]
	impl_delegate!(
	u128_div_rem,
	zero_div_fn,
	u64_normalization_shift,
	u64_by_u64_div_rem,
	32,
	u32,
	u64,
	u128,
	i128
	);

	/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
	///
	/// # Safety
	///
	/// If the quotient does not fit in a `u64`, a floating point exception occurs.
	/// If `div == 0`, then a division by zero exception occurs.
	#[cfg(all(not(feature = "no-asm"), target_arch = "x86_64"))]
	#[inline]
	unsafe fn u128_by_u64_div_rem(duo: u128, div: u64) -> (u64, u64) {
	let duo_lo = duo as u64;
	let duo_hi = (duo >> 64) as u64;
	let quo: u64;
	let rem: u64;
	unsafe {
	// divides the combined registers rdx:rax (`duo` is split into two 64 bit parts to do this)
	// by `div`. The quotient is stored in rax and the remainder in rdx.
	// FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
	core::arch::asm!(
	"div {0}",
	in(reg) div,
	inlateout("rax") duo_lo => quo,
	inlateout("rdx") duo_hi => rem,
	options(att_syntax, pure, nomem, nostack)
	);
	}
	(quo, rem)
	}

	// use `asymmetric` instead of `trifecta` on x86_64
	#[cfg(all(not(feature = "no-asm"), target_arch = "x86_64"))]
	impl_asymmetric!(
	u128_div_rem,
	zero_div_fn,
	u64_by_u64_div_rem,
	u128_by_u64_div_rem,
	32,
	u32,
	u64,
	u128
	);

	/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
	/// `checked_div` and `checked_rem` are used to avoid bringing in panic function
	/// dependencies.
	#[inline]
	#[allow(dead_code)]
	fn u32_by_u32_div_rem(duo: u32, div: u32) -> (u32, u32) {
	if let Some(quo) = duo.checked_div(div) {
	if let Some(rem) = duo.checked_rem(div) {
	return (quo, rem);
	}
	}
	zero_div_fn()
	}

	// When not on x86 and the pointer width is not 64, use `delegate` since the division size is larger
	// than register size.
	#[cfg(all(
	not(all(not(feature = "no-asm"), target_arch = "x86")),
	not(target_pointer_width = "64")
	))]
	impl_delegate!(
	u64_div_rem,
	zero_div_fn,
	u32_normalization_shift,
	u32_by_u32_div_rem,
	16,
	u16,
	u32,
	u64,
	i64
	);

	// When not on x86 and the pointer width is 64, use `binary_long`.
	#[cfg(all(
	not(all(not(feature = "no-asm"), target_arch = "x86")),
	target_pointer_width = "64"
	))]
	impl_binary_long!(
	u64_div_rem,
	zero_div_fn,
	u64_normalization_shift,
	64,
	u64,
	i64
	);

	/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
	///
	/// # Safety
	///
	/// If the quotient does not fit in a `u32`, a floating point exception occurs.
	/// If `div == 0`, then a division by zero exception occurs.
	#[cfg(all(not(feature = "no-asm"), target_arch = "x86"))]
	#[inline]
	unsafe fn u64_by_u32_div_rem(duo: u64, div: u32) -> (u32, u32) {
	let duo_lo = duo as u32;
	let duo_hi = (duo >> 32) as u32;
	let quo: u32;
	let rem: u32;
	unsafe {
	// divides the combined registers rdx:rax (`duo` is split into two 32 bit parts to do this)
	// by `div`. The quotient is stored in rax and the remainder in rdx.
	// FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
	core::arch::asm!(
	"div {0}",
	in(reg) div,
	inlateout("rax") duo_lo => quo,
	inlateout("rdx") duo_hi => rem,
	options(att_syntax, pure, nomem, nostack)
	);
	}
	(quo, rem)
	}

	// use `asymmetric` instead of `delegate` on x86
	#[cfg(all(not(feature = "no-asm"), target_arch = "x86"))]
	impl_asymmetric!(
	u64_div_rem,
	zero_div_fn,
	u32_by_u32_div_rem,
	u64_by_u32_div_rem,
	16,
	u16,
	u32,
	u64
	);

	// 32 bits is the smallest division used by `compiler-builtins`, so we end with binary long division
	impl_binary_long!(
	u32_div_rem,
	zero_div_fn,
	u32_normalization_shift,
	32,
	u32,
	i32,
	allow(dead_code)
	);