| diff -ru a/Eigen/src/Geometry/arch/Geometry_SSE.h b/Eigen/src/Geometry/arch/Geometry_SSE.h |
| --- a/Eigen/src/Geometry/arch/Geometry_SSE.h |
| +++ b/Eigen/src/Geometry/arch/Geometry_SSE.h |
| @@ -33,13 +33,14 @@ |
| Packet4f b = be.template packet<BAlignment,Packet4f>(0); |
| Packet4f s1 = pmul(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2)); |
| Packet4f s2 = pmul(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1)); |
| - pstoret<float,Packet4f,ResAlignment>( |
| - &res.x(), |
| - padd(psub(pmul(a,vec4f_swizzle1(b,3,3,3,3)), |
| - pmul(vec4f_swizzle1(a,2,0,1,0), |
| - vec4f_swizzle1(b,1,2,0,0))), |
| - pxor(mask,padd(s1,s2)))); |
| - |
| + pstoret<float, Packet4f, ResAlignment>( |
| + &res.x(), |
| + padd<Packet4f>( |
| + psub<Packet4f>(pmul<Packet4f>(a, vec4f_swizzle1(b, 3, 3, 3, 3)), |
| + pmul<Packet4f>(vec4f_swizzle1(a, 2, 0, 1, 0), |
| + vec4f_swizzle1(b, 1, 2, 0, 0))), |
| + pxor<Packet4f>(mask, padd(s1, s2)))); |
| + |
| return res; |
| } |
| }; |
| diff -ru a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h |
| --- a/Eigen/src/Core/GenericPacketMath.h |
| +++ b/Eigen/src/Core/GenericPacketMath.h |
| @@ -255,49 +255,43 @@ |
| return std::complex<RealScalar>(b, b); |
| } |
| |
| -template <typename Packet, typename Op> |
| -EIGEN_DEVICE_FUNC inline Packet bitwise_helper(const Packet& a, const Packet& b, Op op) { |
| +/** \internal \returns the bitwise and of \a a and \a b */ |
| +template<typename Packet> EIGEN_DEVICE_FUNC inline Packet |
| +pand(const Packet& a, const Packet& b) { |
| const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a); |
| const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b); |
| Packet c; |
| unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c); |
| for (size_t i = 0; i < sizeof(Packet); ++i) { |
| - *c_ptr++ = op(*a_ptr++, *b_ptr++); |
| + *c_ptr++ = *a_ptr++ & *b_ptr++; |
| } |
| return c; |
| } |
| |
| -/** \internal \returns the bitwise and of \a a and \a b */ |
| -template<typename Packet> EIGEN_DEVICE_FUNC inline Packet |
| -pand(const Packet& a, const Packet& b) { |
| -#if defined(EIGEN_HIP_DEVICE_COMPILE) |
| - return bitwise_helper(a ,b, std::bit_and<unsigned char>()); |
| -#else |
| - EIGEN_USING_STD(bit_and); |
| - return bitwise_helper(a ,b, bit_and<unsigned char>()); |
| -#endif |
| -} |
| - |
| /** \internal \returns the bitwise or of \a a and \a b */ |
| template<typename Packet> EIGEN_DEVICE_FUNC inline Packet |
| por(const Packet& a, const Packet& b) { |
| -#if defined(EIGEN_HIP_DEVICE_COMPILE) |
| - return bitwise_helper(a ,b, std::bit_or<unsigned char>()); |
| -#else |
| - EIGEN_USING_STD(bit_or); |
| - return bitwise_helper(a ,b, bit_or<unsigned char>()); |
| -#endif |
| + const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a); |
| + const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b); |
| + Packet c; |
| + unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c); |
| + for (size_t i = 0; i < sizeof(Packet); ++i) { |
| + *c_ptr++ = *a_ptr++ | *b_ptr++; |
| + } |
| + return c; |
| } |
| |
| /** \internal \returns the bitwise xor of \a a and \a b */ |
| template<typename Packet> EIGEN_DEVICE_FUNC inline Packet |
| pxor(const Packet& a, const Packet& b) { |
| -#if defined(EIGEN_HIP_DEVICE_COMPILE) |
| - return bitwise_helper(a ,b, std::bit_xor<unsigned char>()); |
| -#else |
| - EIGEN_USING_STD(bit_xor); |
| - return bitwise_helper(a ,b, bit_xor<unsigned char>()); |
| -#endif |
| + const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a); |
| + const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b); |
| + Packet c; |
| + unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c); |
| + for (size_t i = 0; i < sizeof(Packet); ++i) { |
| + *c_ptr++ = *a_ptr++ ^ *b_ptr++; |
| + } |
| + return c; |
| } |
| |
| /** \internal \returns the bitwise and of \a a and not \a b */ |