blob: 0f1249ec88f0b85e2c7342153c71667868e4a59f [file] [log] [blame]
//! Vertical floating-point `sin_cos`
#![allow(unused)]
// FIXME 64-bit 1 elem vectors sin_cos
use crate::*;
crate trait SinCosPi: Sized {
type Output;
fn sin_cos_pi(self) -> Self::Output;
}
macro_rules! impl_def {
($vid:ident, $PI:path) => {
impl SinCosPi for $vid {
type Output = (Self, Self);
#[inline]
fn sin_cos_pi(self) -> Self::Output {
let v = self * Self::splat($PI);
(v.sin(), v.cos())
}
}
};
}
macro_rules! impl_def32 {
($vid:ident) => {
impl_def!($vid, crate::f32::consts::PI);
};
}
macro_rules! impl_def64 {
($vid:ident) => {
impl_def!($vid, crate::f64::consts::PI);
};
}
macro_rules! impl_unary_t {
($vid:ident: $fun:ident) => {
impl SinCosPi for $vid {
type Output = (Self, Self);
fn sin_cos_pi(self) -> Self::Output {
unsafe {
use crate::mem::transmute;
transmute($fun(transmute(self)))
}
}
}
};
($vid:ident[t => $vid_t:ident]: $fun:ident) => {
impl SinCosPi for $vid {
type Output = (Self, Self);
fn sin_cos_pi(self) -> Self::Output {
unsafe {
use crate::mem::{transmute, uninitialized};
union U {
vec: [$vid; 2],
twice: $vid_t,
}
let twice = U { vec: [self, uninitialized()] }.twice;
let twice = transmute($fun(transmute(twice)));
union R {
twice: ($vid_t, $vid_t),
vecs: ([$vid; 2], [$vid; 2]),
}
let r = R { twice }.vecs;
(*r.0.get_unchecked(0), *r.0.get_unchecked(1))
}
}
}
};
($vid:ident[h => $vid_h:ident]: $fun:ident) => {
impl SinCosPi for $vid {
type Output = (Self, Self);
fn sin_cos_pi(self) -> Self::Output {
unsafe {
use crate::mem::transmute;
union U {
vec: $vid,
halves: [$vid_h; 2],
}
let halves = U { vec: self }.halves;
let res_0: ($vid_h, $vid_h) =
transmute($fun(transmute(*halves.get_unchecked(0))));
let res_1: ($vid_h, $vid_h) =
transmute($fun(transmute(*halves.get_unchecked(1))));
union R {
result: ($vid, $vid),
halves: ([$vid_h; 2], [$vid_h; 2]),
}
R { halves: ([res_0.0, res_1.0], [res_0.1, res_1.1]) }
.result
}
}
}
};
($vid:ident[q => $vid_q:ident]: $fun:ident) => {
impl SinCosPi for $vid {
type Output = (Self, Self);
fn sin_cos_pi(self) -> Self::Output {
unsafe {
use crate::mem::transmute;
union U {
vec: $vid,
quarters: [$vid_q; 4],
}
let quarters = U { vec: self }.quarters;
let res_0: ($vid_q, $vid_q) =
transmute($fun(transmute(*quarters.get_unchecked(0))));
let res_1: ($vid_q, $vid_q) =
transmute($fun(transmute(*quarters.get_unchecked(1))));
let res_2: ($vid_q, $vid_q) =
transmute($fun(transmute(*quarters.get_unchecked(2))));
let res_3: ($vid_q, $vid_q) =
transmute($fun(transmute(*quarters.get_unchecked(3))));
union R {
result: ($vid, $vid),
quarters: ([$vid_q; 4], [$vid_q; 4]),
}
R {
quarters: (
[res_0.0, res_1.0, res_2.0, res_3.0],
[res_0.1, res_1.1, res_2.1, res_3.1],
),
}
.result
}
}
}
};
}
cfg_if! {
if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
use sleef_sys::*;
cfg_if! {
if #[cfg(target_feature = "avx2")] {
impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05avx2128);
impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx2);
impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx2);
impl_unary_t!(f32x4: Sleef_sincospif4_u05avx2128);
impl_unary_t!(f32x8: Sleef_sincospif8_u05avx2);
impl_unary_t!(f64x2: Sleef_sincospid2_u05avx2128);
impl_unary_t!(f64x4: Sleef_sincospid4_u05avx2);
} else if #[cfg(target_feature = "avx")] {
impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4);
impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx);
impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx);
impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4);
impl_unary_t!(f32x8: Sleef_sincospif8_u05avx);
impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4);
impl_unary_t!(f64x4: Sleef_sincospid4_u05avx);
} else if #[cfg(target_feature = "sse4.2")] {
impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4);
impl_unary_t!(f32x16[q => f32x4]: Sleef_sincospif4_u05sse4);
impl_unary_t!(f64x8[q => f64x2]: Sleef_sincospid2_u05sse4);
impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4);
impl_unary_t!(f32x8[h => f32x4]: Sleef_sincospif4_u05sse4);
impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4);
impl_unary_t!(f64x4[h => f64x2]: Sleef_sincospid2_u05sse4);
} else {
impl_def32!(f32x2);
impl_def32!(f32x4);
impl_def32!(f32x8);
impl_def32!(f32x16);
impl_def64!(f64x2);
impl_def64!(f64x4);
impl_def64!(f64x8);
}
}
} else {
impl_def32!(f32x2);
impl_def32!(f32x4);
impl_def32!(f32x8);
impl_def32!(f32x16);
impl_def64!(f64x2);
impl_def64!(f64x4);
impl_def64!(f64x8);
}
}