blob: 6ffea3b42222bb639d296d2ec9391379d77c90e8 [file] [log] [blame]
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
use super::float::*;
use crate::core::{
storage::XYZ,
traits::{quaternion::Quaternion, scalar::*, vector::*},
};
impl Quaternion<f32> for __m128 {
type SIMDVector3 = __m128;
#[inline(always)]
fn conjugate(self) -> Self {
const SIGN: __m128 = const_f32x4!([-0.0, -0.0, -0.0, 0.0]);
unsafe { _mm_xor_ps(self, SIGN) }
}
#[inline]
fn lerp(self, end: Self, s: f32) -> Self {
glam_assert!(FloatVector4::is_normalized(self));
glam_assert!(FloatVector4::is_normalized(end));
unsafe {
const NEG_ZERO: __m128 = const_f32x4!([-0.0; 4]);
let start = self;
let end = end;
let dot = Vector4::dot_into_vec(start, end);
// Calculate the bias, if the dot product is positive or zero, there is no bias
// but if it is negative, we want to flip the 'end' rotation XYZW components
let bias = _mm_and_ps(dot, NEG_ZERO);
let interpolated = _mm_add_ps(
_mm_mul_ps(_mm_sub_ps(_mm_xor_ps(end, bias), start), _mm_set_ps1(s)),
start,
);
FloatVector4::normalize(interpolated)
}
}
#[inline]
fn slerp(self, end: Self, s: f32) -> Self {
// http://number-none.com/product/Understanding%20Slerp,%20Then%20Not%20Using%20It/
glam_assert!(FloatVector4::is_normalized(self));
glam_assert!(FloatVector4::is_normalized(end));
const DOT_THRESHOLD: f32 = 0.9995;
let dot = Vector4::dot(self, end);
if dot > DOT_THRESHOLD {
// assumes lerp returns a normalized quaternion
self.lerp(end, s)
} else {
// assumes scalar_acos clamps the input to [-1.0, 1.0]
let theta = dot.acos_approx();
let x = 1.0 - s;
let y = s;
let z = 1.0;
unsafe {
let tmp = _mm_mul_ps(_mm_set_ps1(theta), _mm_set_ps(0.0, z, y, x));
let tmp = m128_sin(tmp);
let scale1 = _mm_shuffle_ps(tmp, tmp, 0b00_00_00_00);
let scale2 = _mm_shuffle_ps(tmp, tmp, 0b01_01_01_01);
let theta_sin = _mm_shuffle_ps(tmp, tmp, 0b10_10_10_10);
self.mul(scale1).add(end.mul(scale2)).div(theta_sin)
}
}
}
#[inline]
fn mul_quaternion(self, other: Self) -> Self {
glam_assert!(FloatVector4::is_normalized(self));
glam_assert!(FloatVector4::is_normalized(other));
unsafe {
// Based on https://github.com/nfrechette/rtm `rtm::quat_mul`
let lhs = self;
let rhs = other;
const CONTROL_WZYX: __m128 = const_f32x4!([1.0, -1.0, 1.0, -1.0]);
const CONTROL_ZWXY: __m128 = const_f32x4!([1.0, 1.0, -1.0, -1.0]);
const CONTROL_YXWZ: __m128 = const_f32x4!([-1.0, 1.0, 1.0, -1.0]);
let r_xxxx = _mm_shuffle_ps(lhs, lhs, 0b00_00_00_00);
let r_yyyy = _mm_shuffle_ps(lhs, lhs, 0b01_01_01_01);
let r_zzzz = _mm_shuffle_ps(lhs, lhs, 0b10_10_10_10);
let r_wwww = _mm_shuffle_ps(lhs, lhs, 0b11_11_11_11);
let lxrw_lyrw_lzrw_lwrw = _mm_mul_ps(r_wwww, rhs);
let l_wzyx = _mm_shuffle_ps(rhs, rhs, 0b00_01_10_11);
let lwrx_lzrx_lyrx_lxrx = _mm_mul_ps(r_xxxx, l_wzyx);
let l_zwxy = _mm_shuffle_ps(l_wzyx, l_wzyx, 0b10_11_00_01);
let lwrx_nlzrx_lyrx_nlxrx = _mm_mul_ps(lwrx_lzrx_lyrx_lxrx, CONTROL_WZYX);
let lzry_lwry_lxry_lyry = _mm_mul_ps(r_yyyy, l_zwxy);
let l_yxwz = _mm_shuffle_ps(l_zwxy, l_zwxy, 0b00_01_10_11);
let lzry_lwry_nlxry_nlyry = _mm_mul_ps(lzry_lwry_lxry_lyry, CONTROL_ZWXY);
let lyrz_lxrz_lwrz_lzrz = _mm_mul_ps(r_zzzz, l_yxwz);
let result0 = _mm_add_ps(lxrw_lyrw_lzrw_lwrw, lwrx_nlzrx_lyrx_nlxrx);
let nlyrz_lxrz_lwrz_wlzrz = _mm_mul_ps(lyrz_lxrz_lwrz_lzrz, CONTROL_YXWZ);
let result1 = _mm_add_ps(lzry_lwry_nlxry_nlyry, nlyrz_lxrz_lwrz_wlzrz);
_mm_add_ps(result0, result1)
}
}
#[inline]
fn mul_vector3(self, other: XYZ<f32>) -> XYZ<f32> {
self.mul_float4_as_vector3(other.into()).into()
}
#[inline]
fn mul_float4_as_vector3(self, other: __m128) -> __m128 {
glam_assert!(FloatVector4::is_normalized(self));
unsafe {
const TWO: __m128 = const_f32x4!([2.0; 4]);
let w = _mm_shuffle_ps(self, self, 0b11_11_11_11);
let b = self;
let b2 = Vector3::dot_into_vec(b, b);
other
.mul(w.mul(w).sub(b2))
.add(b.mul(Vector3::dot_into_vec(other, b).mul(TWO)))
.add(b.cross(other).mul(w.mul(TWO)))
}
}
}