| /**************************************************************************** |
| * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| ****************************************************************************/ |
| |
| #ifndef __SWR_INTRIN_H__ |
| #define __SWR_INTRIN_H__ |
| |
| #include "os.h" |
| |
| #define SIMD_ARCH KNOB_ARCH |
| #include "simdlib_types.hpp" |
| |
| typedef SIMDImpl::SIMD128Impl::Float simd4scalar; |
| typedef SIMDImpl::SIMD128Impl::Double simd4scalard; |
| typedef SIMDImpl::SIMD128Impl::Integer simd4scalari; |
| typedef SIMDImpl::SIMD128Impl::Vec4 simd4vector; |
| typedef SIMDImpl::SIMD128Impl::Mask simd4mask; |
| |
| typedef SIMDImpl::SIMD256Impl::Float simd8scalar; |
| typedef SIMDImpl::SIMD256Impl::Double simd8scalard; |
| typedef SIMDImpl::SIMD256Impl::Integer simd8scalari; |
| typedef SIMDImpl::SIMD256Impl::Vec4 simd8vector; |
| typedef SIMDImpl::SIMD256Impl::Mask simd8mask; |
| |
| typedef SIMDImpl::SIMD512Impl::Float simd16scalar; |
| typedef SIMDImpl::SIMD512Impl::Double simd16scalard; |
| typedef SIMDImpl::SIMD512Impl::Integer simd16scalari; |
| typedef SIMDImpl::SIMD512Impl::Vec4 simd16vector; |
| typedef SIMDImpl::SIMD512Impl::Mask simd16mask; |
| |
| #if KNOB_SIMD_WIDTH == 8 |
| typedef simd8scalar simdscalar; |
| typedef simd8scalard simdscalard; |
| typedef simd8scalari simdscalari; |
| typedef simd8vector simdvector; |
| typedef simd8mask simdmask; |
| #else |
| #error Unsupported vector width |
| #endif |
| |
| INLINE |
| UINT pdep_u32(UINT a, UINT mask) |
| { |
| #if KNOB_ARCH >= KNOB_ARCH_AVX2 |
| return _pdep_u32(a, mask); |
| #else |
| UINT result = 0; |
| |
| // copied from http://wm.ite.pl/articles/pdep-soft-emu.html |
| // using bsf instead of funky loop |
| DWORD maskIndex; |
| while (_BitScanForward(&maskIndex, mask)) |
| { |
| // 1. isolate lowest set bit of mask |
| const UINT lowest = 1 << maskIndex; |
| |
| // 2. populate LSB from src |
| const UINT LSB = (UINT)((int)(a << 31) >> 31); |
| |
| // 3. copy bit from mask |
| result |= LSB & lowest; |
| |
| // 4. clear lowest bit |
| mask &= ~lowest; |
| |
| // 5. prepare for next iteration |
| a >>= 1; |
| } |
| |
| return result; |
| #endif |
| } |
| |
| INLINE |
| UINT pext_u32(UINT a, UINT mask) |
| { |
| #if KNOB_ARCH >= KNOB_ARCH_AVX2 |
| return _pext_u32(a, mask); |
| #else |
| UINT result = 0; |
| DWORD maskIndex; |
| uint32_t currentBit = 0; |
| while (_BitScanForward(&maskIndex, mask)) |
| { |
| // 1. isolate lowest set bit of mask |
| const UINT lowest = 1 << maskIndex; |
| |
| // 2. copy bit from mask |
| result |= ((a & lowest) > 0) << currentBit++; |
| |
| // 3. clear lowest bit |
| mask &= ~lowest; |
| } |
| return result; |
| #endif |
| } |
| |
| #endif//__SWR_INTRIN_H__ |