Skip to content
Commits on Source (4)
simde (0.0.0.git.20191205.c2e740c-1) UNRELEASED; urgency=medium
simde (0.0.0.git.20191209-1) UNRELEASED; urgency=medium
* Initial release. (Closes: #946299)
* TODO: wait for a versioned release from upstream
fixes for the failing tests (from upstream)
-- Michael R. Crusoe <michael.crusoe@gmail.com> Fri, 06 Dec 2019 18:57:14 +0100
......@@ -19,8 +19,3 @@ include /usr/share/dpkg/default.mk
%:
dh $@ --sourcedirectory=test
override_dh_auto_test:
ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
dh_auto_test || true
endif
......@@ -27,8 +27,8 @@ version=4
# https://qa.debian.org/cgi-bin/fakeupstream.cgi?upstream=github_commits_package_json/#user#/#project# \
# .*/archive/(.*\.tar\.gz?.*)
opts="uversionmangle=s/.*date=(\d{4})-(\d\d)-(\d\d)T.*/0.0.0+git.$1$2$3/, \
filenamemangle=s/.*date=(\d{4})-(\d\d)-(\d\d)T.*/0.0.0+git.$1$2$3.tar.gz/, \
opts="uversionmangle=s/.*date=(\d{4})-(\d\d)-(\d\d)T.*/0.0.0.git.$1$2$3/, \
filenamemangle=s/.*date=(\d{4})-(\d\d)-(\d\d)T.*/0.0.0.git.$1$2$3.tar.gz/, \
repack,compression=xz" \
https://qa.debian.org/cgi-bin/fakeupstream.cgi?upstream=github_commits_package_json/nemequ/simde \
.*/archive/(.*\.tar\.gz?.*)
......
......@@ -215,7 +215,7 @@ HEDLEY_STATIC_ASSERT(sizeof(simde_float64) == 8, "Unable to find 64-bit floating
#endif
#if HEDLEY_GCC_HAS_BUILTIN(__builtin_convertvector,9,0,0)
# define SIMDE__CONVERT_VECTOR(to, from) ((to) = __builtin_convertvector((from), typeof(to)))
# define SIMDE__CONVERT_VECTOR(to, from) ((to) = __builtin_convertvector((from), __typeof__(to)))
#endif
#if HEDLEY_HAS_WARNING("-Wbad-function-cast")
......@@ -305,4 +305,21 @@ HEDLEY_STATIC_ASSERT(sizeof(simde_float64) == 8, "Unable to find 64-bit floating
# endif
#endif
#if !defined(__cplusplus)
#define SIMDE_F64_ALL_SET (((union { uint64_t u64; simde_float64 f64; }) { .u64 = ~UINT64_C(0x0) }).f64)
#define SIMDE_F64_ALL_UNSET (((union { uint64_t u64; simde_float64 f64; }) { .u64 = UINT64_C(0x0) }).f64)
#define SIMDE_F32_ALL_SET (((union { uint32_t u32; simde_float32 f32; }) { .u32 = ~UINT32_C(0x0) }).f32)
#define SIMDE_F32_ALL_UNSET (((union { uint32_t u32; simde_float32 f32; }) { .u32 = UINT32_C(0x0) }).f32)
#else
static const union { uint64_t u64; simde_float64 f64; } simde_f64_all_set = { .u64 = ~UINT64_C(0) };
static const union { uint64_t u64; simde_float64 f64; } simde_f64_all_unset = { .u64 = UINT64_C(0) };
static const union { uint64_t u32; simde_float64 f32; } simde_f32_all_set = { .u32 = ~UINT32_C(0) };
static const union { uint64_t u32; simde_float64 f32; } simde_f32_all_unset = { .u32 = UINT32_C(0) };
# define SIMDE_F64_ALL_SET (simde_f64_all_set.f64)
# define SIMDE_F64_ALL_UNSET (simde_f64_all_unset.f64)
# define SIMDE_F32_ALL_SET (simde_f32_all_set.f32)
# define SIMDE_F32_ALL_UNSET (simde_f32_all_unset.f32)
#endif
#endif /* !defined(SIMDE_COMMON_H) */
This diff is collapsed.
......@@ -184,7 +184,7 @@ simde_mm256_broadcastsi128_si256 (simde__m128i a) {
simde__m256i r;
#if defined(SIMDE_AVX2_NATIVE)
return SIMDE__M256I_C(_mm256_broadcastsi128_si256(a.n));
return SIMDE__M256I_FROM_NATIVE(_mm256_broadcastsi128_si256(a.n));
#elif defined(SIMDE_SSE2_NATIVE)
r.m128i[0].n = a.n;
r.m128i[1].n = a.n;
......@@ -206,7 +206,7 @@ simde__m256i
simde_mm256_cvtepi8_epi16 (simde__m128i a){
simde__m256i r;
#if defined(SIMDE_AVX2_NATIVE)
r = SIMDE__M256I_C(_mm256_cvtepi8_epi16(a.n));
r = SIMDE__M256I_FROM_NATIVE(_mm256_cvtepi8_epi16(a.n));
#else
SIMDE__VECTORIZE
for (size_t i = 0 ; i < (sizeof(r.i16) / sizeof(r.i16[0])) ; i++) {
......@@ -224,7 +224,7 @@ simde__m256i
simde_mm256_cvtepi8_epi32 (simde__m128i a){
simde__m256i r;
#if defined(SIMDE_AVX2_NATIVE)
r = SIMDE__M256I_C(_mm256_cvtepi8_epi32(a.n));
r = SIMDE__M256I_FROM_NATIVE(_mm256_cvtepi8_epi32(a.n));
#else
SIMDE__VECTORIZE
for (size_t i = 0 ; i < (sizeof(r.i32) / sizeof(r.i32[0])) ; i++) {
......@@ -242,7 +242,7 @@ simde__m256i
simde_mm256_cvtepi8_epi64 (simde__m128i a){
simde__m256i r;
#if defined(SIMDE_AVX2_NATIVE)
r = SIMDE__M256I_C(_mm256_cvtepi8_epi64(a.n));
r = SIMDE__M256I_FROM_NATIVE(_mm256_cvtepi8_epi64(a.n));
#else
SIMDE__VECTORIZE
for (size_t i = 0 ; i < (sizeof(r.i64) / sizeof(r.i64[0])) ; i++) {
......@@ -260,7 +260,7 @@ simde__m256i
simde_mm256_cvtepi16_epi32 (simde__m128i a){
simde__m256i r;
#if defined(SIMDE_AVX2_NATIVE)
r = SIMDE__M256I_C(_mm256_cvtepi16_epi32(a.n));
r = SIMDE__M256I_FROM_NATIVE(_mm256_cvtepi16_epi32(a.n));
#else
SIMDE__VECTORIZE
for (size_t i = 0 ; i < (sizeof(r.i32) / sizeof(r.i32[0])) ; i++) {
......@@ -278,7 +278,7 @@ simde__m256i
simde_mm256_cvtepi16_epi64 (simde__m128i a){
simde__m256i r;
#if defined(SIMDE_AVX2_NATIVE)
r = SIMDE__M256I_C(_mm256_cvtepi16_epi64(a.n));
r = SIMDE__M256I_FROM_NATIVE(_mm256_cvtepi16_epi64(a.n));
#else
SIMDE__VECTORIZE
for (size_t i = 0 ; i < (sizeof(r.i64) / sizeof(r.i64[0])) ; i++) {
......@@ -296,7 +296,7 @@ simde__m256i
simde_mm256_cvtepi32_epi64 (simde__m128i a){
simde__m256i r;
#if defined(SIMDE_AVX2_NATIVE)
r = SIMDE__M256I_C(_mm256_cvtepi32_epi64(a.n));
r = SIMDE__M256I_FROM_NATIVE(_mm256_cvtepi32_epi64(a.n));
#else
SIMDE__VECTORIZE
for (size_t i = 0 ; i < (sizeof(r.i64) / sizeof(r.i64[0])) ; i++) {
......@@ -314,7 +314,7 @@ simde__m256i
simde_mm256_cvtepu8_epi16 (simde__m128i a){
simde__m256i r;
#if defined(SIMDE_AVX2_NATIVE)
r = SIMDE__M256I_C(_mm256_cvtepu8_epi16(a.n));
r = SIMDE__M256I_FROM_NATIVE(_mm256_cvtepu8_epi16(a.n));
#else
SIMDE__VECTORIZE
for (size_t i = 0 ; i < (sizeof(r.i16) / sizeof(r.i16[0])) ; i++) {
......@@ -332,7 +332,7 @@ simde__m256i
simde_mm256_cvtepu8_epi32 (simde__m128i a){
simde__m256i r;
#if defined(SIMDE_AVX2_NATIVE)
r = SIMDE__M256I_C(_mm256_cvtepu8_epi32(a.n));
r = SIMDE__M256I_FROM_NATIVE(_mm256_cvtepu8_epi32(a.n));
#else
SIMDE__VECTORIZE
for (size_t i = 0 ; i < (sizeof(r.i32) / sizeof(r.i32[0])) ; i++) {
......@@ -350,7 +350,7 @@ simde__m256i
simde_mm256_cvtepu8_epi64 (simde__m128i a){
simde__m256i r;
#if defined(SIMDE_AVX2_NATIVE)
r = SIMDE__M256I_C(_mm256_cvtepu8_epi64(a.n));
r = SIMDE__M256I_FROM_NATIVE(_mm256_cvtepu8_epi64(a.n));
#else
SIMDE__VECTORIZE
for (size_t i = 0 ; i < (sizeof(r.i64) / sizeof(r.i64[0])) ; i++) {
......@@ -368,7 +368,7 @@ simde__m256i
simde_mm256_cvtepu16_epi32 (simde__m128i a){
simde__m256i r;
#if defined(SIMDE_AVX2_NATIVE)
r = SIMDE__M256I_C(_mm256_cvtepu16_epi32(a.n));
r = SIMDE__M256I_FROM_NATIVE(_mm256_cvtepu16_epi32(a.n));
#else
SIMDE__VECTORIZE
for (size_t i = 0 ; i < (sizeof(r.i32) / sizeof(r.i32[0])) ; i++) {
......@@ -386,7 +386,7 @@ simde__m256i
simde_mm256_cvtepu16_epi64 (simde__m128i a){
simde__m256i r;
#if defined(SIMDE_AVX2_NATIVE)
r = SIMDE__M256I_C(_mm256_cvtepu16_epi64(a.n));
r = SIMDE__M256I_FROM_NATIVE(_mm256_cvtepu16_epi64(a.n));
#else
SIMDE__VECTORIZE
for (size_t i = 0 ; i < (sizeof(r.i64) / sizeof(r.i64[0])) ; i++) {
......@@ -404,7 +404,7 @@ simde__m256i
simde_mm256_cvtepu32_epi64 (simde__m128i a){
simde__m256i r;
#if defined(SIMDE_AVX2_NATIVE)
r = SIMDE__M256I_C(_mm256_cvtepu32_epi64(a.n));
r = SIMDE__M256I_FROM_NATIVE(_mm256_cvtepu32_epi64(a.n));
#else
SIMDE__VECTORIZE
for (size_t i = 0 ; i < (sizeof(r.i64) / sizeof(r.i64[0])) ; i++) {
......@@ -467,7 +467,7 @@ simde_mm256_srli_epi64 (simde__m256i a, const int imm8) {
return r;
}
#if defined(SIMDE_AVX2_NATIVE)
# define simde_mm256_srli_epi64(a, imm8) SIMDE__M256I_C(_mm256_srli_epi64(a.n, imm8))
# define simde_mm256_srli_epi64(a, imm8) SIMDE__M256I_FROM_NATIVE(_mm256_srli_epi64(a.n, imm8))
#endif
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
# define _mm256_srli_epi64(a, imm8) simde_mm256_srli_epi64(SIMDE__M256I_FROM_NATIVE(a), imm8)
......
......@@ -1340,8 +1340,10 @@ simde_mm_cvtt_ps2pi (simde__m128 a) {
return r;
}
#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a)
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
# define _mm_cvtt_ps2pi(a) SIMDE__M64_TO_NATIVE(simde_mm_cvtt_ps2pi(SIMDE__M128_FROM_NATIVE(a)))
# define _mm_cvttps_pi32(a) SIMDE__M64_TO_NATIVE(simde_mm_cvttps_pi32(SIMDE__M128_FROM_NATIVE(a)))
#endif
SIMDE__FUNCTION_ATTRIBUTES
......@@ -1353,37 +1355,9 @@ simde_mm_cvtt_ss2si (simde__m128 a) {
return SIMDE_CONVERT_FTOI(int32_t, truncf(a.f32[0]));
#endif
}
#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si(a)
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si(SIMDE__M128_FROM_NATIVE(a))
#endif
SIMDE__FUNCTION_ATTRIBUTES
simde__m64
simde_mm_cvttps_pi32 (simde__m128 a) {
simde__m64 r;
#if defined(SIMDE_SSE_NATIVE)
r.n = _mm_cvttps_pi32(a.n);
#else
r = simde_mm_cvtt_ps2pi(a);
#endif
return r;
}
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
# define _mm_cvtt_pi32(a) SIMDE__M64_TO_NATIVE(simde_mm_cvtt_pi32(SIMDE__M128_FROM_NATIVE(a)))
#endif
SIMDE__FUNCTION_ATTRIBUTES
int32_t
simde_mm_cvttss_si32 (simde__m128 a) {
#if defined(SIMDE_SSE_NATIVE)
return _mm_cvttss_si32(a.n);
#else
return SIMDE_CONVERT_FTOI(int32_t, truncf(a.f32[0]));
#endif
}
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
# define _mm_cvttss_si32(a) simde_mm_cvttss_si32(SIMDE__M128_FROM_NATIVE(a))
#endif
......@@ -1565,6 +1539,8 @@ simde_mm_load_ps1 (simde_float32 const* mem_addr) {
#if defined(SIMDE_SSE_NATIVE)
r.n = _mm_load_ps1(mem_addr);
#elif defined(SIMDE_SSE_NEON)
r.neon_f32 = vld1q_dup_f32(mem_addr);
#else
const simde_float32 v = *mem_addr;
SIMDE__VECTORIZE
......@@ -1575,6 +1551,7 @@ simde_mm_load_ps1 (simde_float32 const* mem_addr) {
return r;
}
#define simde_mm_load1_ps(mem_addr) simde_mm_load_ps1(mem_addr)
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
# define _mm_load_ps1(mem_addr) SIMDE__M128_TO_NATIVE(simde_mm_load_ps1(mem_addr))
#endif
......@@ -1601,25 +1578,6 @@ simde_mm_load_ss (simde_float32 const* mem_addr) {
# define _mm_load_ss(mem_addr) SIMDE__M128_TO_NATIVE(simde_mm_load_ss(mem_addr))
#endif
SIMDE__FUNCTION_ATTRIBUTES
simde__m128
simde_mm_load1_ps (simde_float32 const* mem_addr) {
simde__m128 r;
#if defined(SIMDE_SSE_NATIVE)
r.n = _mm_load1_ps(mem_addr);
#elif defined(SIMDE_SSE_NEON)
r.neon_f32 = vld1q_dup_f32(mem_addr);
#else
r = simde_mm_load_ps1(mem_addr);
#endif
return r;
}
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
# define _mm_load1_ps(mem_addr) SIMDE__M128_TO_NATIVE(simde_mm_load1_ps(mem_addr))
#endif
SIMDE__FUNCTION_ATTRIBUTES
simde__m128
simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) {
......@@ -2393,10 +2351,25 @@ simde_mm_sfence (void) {
# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w)
#endif
#if defined(SIMDE_SSE_NATIVE) && !defined(__PGI)
# define simde_mm_shuffle_pi16(a, imm8) SIMDE__M64_FROM_NATIVE(_mm_shuffle_pi16(a.n, imm8))
#elif defined(SIMDE__SHUFFLE_VECTOR)
# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \
const simde__m64 simde__tmp_a_ = a; \
(simde__m64) { .i16 = \
SIMDE__SHUFFLE_VECTOR(16, 8, \
(simde__tmp_a_).i16, \
(simde__tmp_a_).i16, \
(((imm8) ) & 3), \
(((imm8) >> 2) & 3), \
(((imm8) >> 4) & 3), \
(((imm8) >> 6) & 3)) }; }))
#else
SIMDE__FUNCTION_ATTRIBUTES
simde__m64
simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) {
simde__m64 r;
for (size_t i = 0 ; i < sizeof(r.i16) / sizeof(r.i16[0]) ; i++) {
r.i16[i] = a.i16[(imm8 >> (i * 2)) & 3];
}
......@@ -2408,19 +2381,6 @@ HEDLEY_DIAGNOSTIC_PUSH
return r;
HEDLEY_DIAGNOSTIC_POP
}
#if defined(SIMDE_SSE_NATIVE) && !defined(__PGI)
# define simde_mm_shuffle_pi16(a, imm8) SIMDE__M64_FROM_NATIVE(_mm_shuffle_pi16(a.n, imm8))
#elif defined(SIMDE__SHUFFLE_VECTOR)
# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \
const simde__m64 simde__tmp_a_ = a; \
(simde__m64) { .i16 = \
SIMDE__SHUFFLE_VECTOR(16, 8, \
(simde__tmp_a_).i16, \
(simde__tmp_a_).i16, \
(((imm8) ) & 3), \
(((imm8) >> 2) & 3), \
(((imm8) >> 4) & 3), \
(((imm8) >> 6) & 3)) }; }))
#endif
#if defined(SIMDE_SSE_NATIVE) && !defined(__PGI)
# define simde_m_pshufw(a, imm8) SIMDE__M64_FROM_NATIVE(_m_pshufw(a.n, imm8))
......@@ -2432,16 +2392,6 @@ HEDLEY_DIAGNOSTIC_POP
# define _m_pshufw(a, imm8) SIMDE__M64_TO_NATIVE(simde_mm_shuffle_pi16(SIMDE__M64_FROM_NATIVE(a), imm8))
#endif
SIMDE__FUNCTION_ATTRIBUTES
simde__m128
simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) {
simde__m128 r;
r.f32[0] = a.f32[(imm8 >> 0) & 3];
r.f32[1] = a.f32[(imm8 >> 2) & 3];
r.f32[2] = b.f32[(imm8 >> 4) & 3];
r.f32[3] = b.f32[(imm8 >> 6) & 3];
return r;
}
#if defined(SIMDE_SSE_NATIVE) && !defined(__PGI)
# define simde_mm_shuffle_ps(a, b, imm8) SIMDE__M128_FROM_NATIVE(_mm_shuffle_ps(a.n, b.n, imm8))
#elif defined(SIMDE__SHUFFLE_VECTOR)
......@@ -2454,6 +2404,17 @@ simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) {
(((imm8) >> 2) & 3), \
(((imm8) >> 4) & 3) + 4, \
(((imm8) >> 6) & 3) + 4) }; }))
#else
SIMDE__FUNCTION_ATTRIBUTES
simde__m128
simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) {
simde__m128 r;
r.f32[0] = a.f32[(imm8 >> 0) & 3];
r.f32[1] = a.f32[(imm8 >> 2) & 3];
r.f32[2] = b.f32[(imm8 >> 4) & 3];
r.f32[3] = b.f32[(imm8 >> 6) & 3];
return r;
}
#endif
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
# define _mm_shuffle_ps(a, b, imm8) SIMDE__M128_TO_NATIVE(simde_mm_shuffle_ps(SIMDE__M128_FROM_NATIVE(a), SIMDE__M128_FROM_NATIVE(b), imm8))
......
......@@ -181,8 +181,6 @@ typedef union {
#if defined(SIMDE_SSE2_NATIVE)
HEDLEY_STATIC_ASSERT(sizeof(__m128i) == sizeof(simde__m128i), "__m128i size doesn't match simde__m128i size");
HEDLEY_STATIC_ASSERT(sizeof(__m128d) == sizeof(simde__m128d), "__m128d size doesn't match simde__m128d size");
SIMDE__FUNCTION_ATTRIBUTES simde__m128i SIMDE__M128I_C(__m128i v) { simde__m128i r; r.n = v; return r; }
SIMDE__FUNCTION_ATTRIBUTES simde__m128d SIMDE__M128D_C(__m128d v) { simde__m128d r; r.n = v; return r; }
#elif defined(SIMDE_SSE_NEON)
#define SIMDE__M128I_NEON_C(T, expr) (simde__m128i) { .neon_##T = expr }
#define SIMDE__M128D_NEON_C(T, expr) (simde__m128d) { .neon_##T = expr }
......@@ -505,7 +503,7 @@ simde_mm_andnot_pd (simde__m128d a, simde__m128d b) {
simde__m128d r;
#if defined(SIMDE_SSE2_NATIVE)
return SIMDE__M128D_C(_mm_andnot_pd(a.n, b.n));
return SIMDE__M128D_FROM_NATIVE(_mm_andnot_pd(a.n, b.n));
#elif defined(SIMDE_SSE2_NEON)
r.neon_i32 = vbicq_s32(a.neon_i32, b.neon_i32);
#else
......@@ -615,7 +613,7 @@ simde_mm_bslli_si128 (simde__m128i a, const int imm8) {
return r;
}
#if defined(SIMDE_SSE2_NATIVE) && !defined(__PGI)
# define simde_mm_bslli_si128(a, imm8) SIMDE__M128I_C(_mm_slli_si128(a.n, imm8))
# define simde_mm_bslli_si128(a, imm8) SIMDE__M128I_FROM_NATIVE(_mm_slli_si128(a.n, imm8))
#elif defined(SIMDE_SSE2_NEON)
# define simde_mm_bslli_si128(a, imm8) \
SIMDE__M128I_NEON_C(i8, (((imm8) <= 0) ? ((a).neon_i8) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), (a).neon_i8, 16 - (imm8))))))
......@@ -654,7 +652,7 @@ simde_mm_bsrli_si128 (simde__m128i a, const int imm8) {
return r;
}
#if defined(SIMDE_SSE2_NATIVE) && !defined(__PGI)
# define simde_mm_bsrli_si128(a, imm8) SIMDE__M128I_C(_mm_srli_si128(a.n, imm8))
# define simde_mm_bsrli_si128(a, imm8) SIMDE__M128I_FROM_NATIVE(_mm_srli_si128(a.n, imm8))
#elif defined(SIMDE_SSE2_NEON)
# define simde_mm_bsrli_si128(a, imm8) \
SIMDE__M128I_NEON_C(i8, ((imm8) <= 0) ? ((a).neon_i8) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8((a).neon_i8, vdupq_n_s8(0), (imm8)))))
......@@ -778,7 +776,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_castpd_si128 (simde__m128d a) {
#if defined(SIMDE_SSE2_NATIVE)
return SIMDE__M128I_C(_mm_castpd_si128(a.n));
return SIMDE__M128I_FROM_NATIVE(_mm_castpd_si128(a.n));
#else
union {
simde__m128d pd;
......@@ -796,7 +794,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_castps_pd (simde__m128 a) {
#if defined(SIMDE_SSE2_NATIVE)
return SIMDE__M128D_C(_mm_castps_pd(a.n));
return SIMDE__M128D_FROM_NATIVE(_mm_castps_pd(a.n));
#else
union {
simde__m128 ps;
......@@ -816,7 +814,7 @@ simde_mm_castps_si128 (simde__m128 a) {
simde__m128i r;
#if defined(SIMDE_SSE2_NATIVE)
return SIMDE__M128I_C(_mm_castps_si128(a.n));
return SIMDE__M128I_FROM_NATIVE(_mm_castps_si128(a.n));
#elif defined(SIMDE_SSE2_NEON)
r.neon_i32 = a.neon_i32;
#else
......@@ -835,7 +833,7 @@ simde_mm_castsi128_pd (simde__m128i a) {
simde__m128d r;
#if defined(SIMDE_SSE2_NATIVE)
return SIMDE__M128D_C(_mm_castsi128_pd(a.n));
return SIMDE__M128D_FROM_NATIVE(_mm_castsi128_pd(a.n));
#else
r = HEDLEY_REINTERPRET_CAST(simde__m128d, a);
#endif
......@@ -1479,7 +1477,7 @@ simde_mm_cvtepi32_pd (simde__m128i a) {
simde__m128d r;
#if defined(SIMDE_SSE2_NATIVE)
r = SIMDE__M128D_C(_mm_cvtepi32_pd(a.n));
r = SIMDE__M128D_FROM_NATIVE(_mm_cvtepi32_pd(a.n));
#elif defined(SIMDE__CONVERT_VECTOR)
SIMDE__CONVERT_VECTOR(r.f64, a.m64[0].i32);
#else
......@@ -1525,7 +1523,7 @@ simde_mm_cvtpd_epi32 (simde__m128d a) {
simde__m128i r;
#if defined(SIMDE_SSE2_NATIVE)
r = SIMDE__M128I_C(_mm_cvtpd_epi32(a.n));
r = SIMDE__M128I_FROM_NATIVE(_mm_cvtpd_epi32(a.n));
#elif defined(SIMDE__CONVERT_VECTOR)
SIMDE__CONVERT_VECTOR(r.m64[0].i32, a.f64);
r.m64[1] = simde_mm_setzero_si64();
......@@ -1655,6 +1653,8 @@ simde_mm_cvtps_pd (simde__m128 a) {
#if defined(SIMDE_SSE2_NATIVE)
r.n = _mm_cvtps_pd(a.n);
#elif defined(SIMDE__CONVERT_VECTOR)
SIMDE__CONVERT_VECTOR(r.f64, a.m64[0].f32);
#else
SIMDE__VECTORIZE
for (size_t i = 0 ; i < (sizeof(r.f64) / sizeof(r.f64[0])) ; i++) {
......@@ -1695,7 +1695,7 @@ simde_mm_cvtsd_si32 (simde__m128d a) {
#endif
SIMDE__FUNCTION_ATTRIBUTES
int32_t
int64_t
simde_mm_cvtsd_si64 (simde__m128d a) {
#if defined(SIMDE_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64)
#if defined(__PGI)
......@@ -1901,6 +1901,8 @@ simde_mm_cvttpd_pi32 (simde__m128d a) {
#if defined(SIMDE_SSE2_NATIVE)
r.n = _mm_cvttpd_pi32(a.n);
#elif defined(SIMDE__CONVERT_VECTOR)
SIMDE__CONVERT_VECTOR(r.i32, a.f64);
#else
for (size_t i = 0 ; i < (sizeof(r.i32) / sizeof(r.i32[0])) ; i++) {
r.i32[i] = SIMDE_CONVERT_FTOI(int32_t, trunc(a.f64[i]));
......@@ -1922,6 +1924,8 @@ simde_mm_cvttps_epi32 (simde__m128 a) {
r.n = _mm_cvttps_epi32(a.n);
#elif defined(SIMDE_SSE2_NEON)
r.neon_i32 = vcvtq_s32_f32(a.neon_f32);
#elif defined(SIMDE__CONVERT_VECTOR)
SIMDE__CONVERT_VECTOR(r.i32, a.f32);
#else
for (size_t i = 0 ; i < (sizeof(r.i32) / sizeof(r.i32[0])) ; i++) {
r.i32[i] = SIMDE_CONVERT_FTOI(int32_t, truncf(a.f32[i]));
......@@ -1973,6 +1977,8 @@ simde_mm_div_pd (simde__m128d a, simde__m128d b) {
#if defined(SIMDE_SSE2_NATIVE)
r.n = _mm_div_pd(a.n, b.n);
#elif defined(SIMDE__ENABLE_GCC_VEC_EXT)
r.f64 = a.f64 / b.f64;
#else
SIMDE__VECTORIZE
for (size_t i = 0 ; i < (sizeof(r.f64) / sizeof(r.f64[0])) ; i++) {
......@@ -2025,7 +2031,7 @@ simde_mm_insert_epi16 (simde__m128i a, int32_t i, const int imm8) {
return a;
}
#if defined(SIMDE_SSE2_NATIVE) && !defined(__PGI)
# define simde_mm_insert_epi16(a, i, imm8) SIMDE__M128I_C(_mm_insert_epi16((a).n, (i), (imm8)))
# define simde_mm_insert_epi16(a, i, imm8) SIMDE__M128I_FROM_NATIVE(_mm_insert_epi16((a).n, (i), (imm8)))
#elif defined(SIMDE_SSE2_NEON)
# define simde_mm_insert_epi16(a, i, imm8) SIMDE__M128I_NEON_C(i16, vsetq_lane_s16((i), a.neon_i16, (imm8)))
#endif
......@@ -3457,7 +3463,7 @@ simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) {
return r;
}
#if defined(SIMDE_SSE2_NATIVE)
# define simde_mm_shuffle_epi32(a, imm8) SIMDE__M128I_C(_mm_shuffle_epi32((a).n, (imm8)))
# define simde_mm_shuffle_epi32(a, imm8) SIMDE__M128I_FROM_NATIVE(_mm_shuffle_epi32((a).n, (imm8)))
#elif defined(SIMDE__SHUFFLE_VECTOR)
# define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \
const simde__m128i simde__tmp_a_ = a; \
......@@ -3485,7 +3491,7 @@ simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) {
return r;
}
#if defined(SIMDE_SSE2_NATIVE) && !defined(__PGI)
# define simde_mm_shuffle_pd(a, b, imm8) SIMDE__M128D_C(_mm_shuffle_pd((a).n, (b).n, (imm8)))
# define simde_mm_shuffle_pd(a, b, imm8) SIMDE__M128D_FROM_NATIVE(_mm_shuffle_pd((a).n, (b).n, (imm8)))
#elif defined(SIMDE__SHUFFLE_VECTOR)
# define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \
(simde__m128d) { .f64 = \
......@@ -3512,7 +3518,7 @@ simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) {
return r;
}
#if defined(SIMDE_SSE2_NATIVE)
# define simde_mm_shufflehi_epi16(a, imm8) SIMDE__M128I_C(_mm_shufflehi_epi16((a).n, (imm8)))
# define simde_mm_shufflehi_epi16(a, imm8) SIMDE__M128I_FROM_NATIVE(_mm_shufflehi_epi16((a).n, (imm8)))
#elif defined(SIMDE__SHUFFLE_VECTOR)
# define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \
const simde__m128i simde__tmp_a_ = a; \
......@@ -3543,7 +3549,7 @@ simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) {
return r;
}
#if defined(SIMDE_SSE2_NATIVE)
# define simde_mm_shufflelo_epi16(a, imm8) SIMDE__M128I_C(_mm_shufflelo_epi16((a).n, (imm8)))
# define simde_mm_shufflelo_epi16(a, imm8) SIMDE__M128I_FROM_NATIVE(_mm_shufflelo_epi16((a).n, (imm8)))
#elif defined(SIMDE__SHUFFLE_VECTOR)
# define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \
const simde__m128i simde__tmp_a_ = a; \
......@@ -3757,7 +3763,7 @@ simde_mm_srai_epi16 (simde__m128i a, const int imm8) {
return r;
}
#if defined(SIMDE_SSE2_NATIVE)
# define simde_mm_srai_epi16(a, imm8) SIMDE__M128I_C(_mm_srai_epi16((a).n, (imm8)));
# define simde_mm_srai_epi16(a, imm8) SIMDE__M128I_FROM_NATIVE(_mm_srai_epi16((a).n, (imm8)));
#endif
#if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
# define _mm_srai_epi16(a, imm8) SIMDE__M128I_TO_NATIVE(simde_mm_srai_epi16(SIMDE__M128I_FROM_NATIVE(a), imm8))
......@@ -3777,7 +3783,7 @@ simde_mm_srai_epi32 (simde__m128i a, int imm8) {
return r;
}
#if defined(SIMDE_SSE2_NATIVE)
# define simde_mm_srai_epi32(a, imm8) SIMDE__M128I_C(_mm_srai_epi32((a).n, (imm8)))
# define simde_mm_srai_epi32(a, imm8) SIMDE__M128I_FROM_NATIVE(_mm_srai_epi32((a).n, (imm8)))
#elif defined(SIMDE_SSE2_NEON)
# define simde_mm_srai_epi32(a, imm8) SIMDE__M128I_NEON_C(i32, ((imm8) <= 0) ? (a.neon_i32) : (((imm8) > 31) ? (vshrq_n_s32(vshrq_n_s32(a.neon_i32, 16), 16)) : (vshrq_n_s32(a.neon_i32, (imm8)))))
#endif
......@@ -3841,7 +3847,7 @@ simde_mm_slli_epi16 (simde__m128i a, const int imm8) {
return r;
}
#if defined(SIMDE_SSE2_NATIVE)
# define simde_mm_slli_epi16(a, imm8) SIMDE__M128I_C(_mm_slli_epi16(a.n, imm8));
# define simde_mm_slli_epi16(a, imm8) SIMDE__M128I_FROM_NATIVE(_mm_slli_epi16(a.n, imm8));
#elif defined(SIMDE_SSE2_NEON)
# define simde_mm_slli_epi16(a, imm8) \
SIMDE__M128I_NEON_C(i16, ((imm8) <= 0) ? ((a).neon_i16) : (((imm8) > 31) ? (vdupq_n_s16(0)) : (vshlq_n_s16((a).neon_i16, (imm8)))))
......@@ -3862,7 +3868,7 @@ simde_mm_slli_epi32 (simde__m128i a, const int imm8) {
return r;
}
#if defined(SIMDE_SSE2_NATIVE)
# define simde_mm_slli_epi32(a, imm8) SIMDE__M128I_C(_mm_slli_epi32(a.n, imm8));
# define simde_mm_slli_epi32(a, imm8) SIMDE__M128I_FROM_NATIVE(_mm_slli_epi32(a.n, imm8));
#elif defined(SIMDE_SSE2_NEON)
# define simde_mm_slli_epi32(a, imm8) \
SIMDE__M128I_NEON_C(i32, ((imm8) <= 0) ? ((a).neon_i32) : (((imm8) > 31) ? (vdupq_n_s32(0)) : (vshlq_n_s32((a).neon_i32, (imm8)))))
......@@ -3883,7 +3889,7 @@ simde_mm_slli_epi64 (simde__m128i a, const int imm8) {
return r;
}
#if defined(SIMDE_SSE2_NATIVE)
# define simde_mm_slli_epi64(a, imm8) SIMDE__M128I_C(_mm_slli_epi64(a.n, imm8));
# define simde_mm_slli_epi64(a, imm8) SIMDE__M128I_FROM_NATIVE(_mm_slli_epi64(a.n, imm8));
#endif
#if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
# define _mm_slli_epi64(a, imm8) SIMDE__M128I_TO_NATIVE(simde_mm_slli_epi64(SIMDE__M128I_FROM_NATIVE(a), imm8))
......@@ -3901,7 +3907,7 @@ simde_mm_srli_epi16 (simde__m128i a, const int imm8) {
return r;
}
#if defined(SIMDE_SSE2_NATIVE)
# define simde_mm_srli_epi16(a, imm8) SIMDE__M128I_C(_mm_srli_epi16(a.n, imm8));
# define simde_mm_srli_epi16(a, imm8) SIMDE__M128I_FROM_NATIVE(_mm_srli_epi16(a.n, imm8));
#elif defined(SIMDE_SSE2_NEON)
# define simde_mm_srli_epi16(a, imm8) \
SIMDE__M128I_NEON_C(u16, ((imm8) <= 0) ? ((a).neon_u16) : (((imm8) > 31) ? (vdupq_n_u16(0)) : (vshrq_n_u16((a).neon_u16, (imm8)))))
......@@ -3922,7 +3928,7 @@ simde_mm_srli_epi32 (simde__m128i a, const int imm8) {
return r;
}
#if defined(SIMDE_SSE2_NATIVE)
# define simde_mm_srli_epi32(a, imm8) SIMDE__M128I_C(_mm_srli_epi32(a.n, imm8))
# define simde_mm_srli_epi32(a, imm8) SIMDE__M128I_FROM_NATIVE(_mm_srli_epi32(a.n, imm8))
#elif defined(SIMDE_SSE2_NEON)
# define simde_mm_srli_epi32(a, imm8) \
SIMDE__M128I_NEON_C(u32, ((imm8) <= 0) ? ((a).neon_u32) : (((imm8) > 31) ? (vdupq_n_u32(0)) : (vshrq_n_u32((a).neon_u32, (imm8)))))
......@@ -3947,7 +3953,7 @@ simde_mm_srli_epi64 (simde__m128i a, const int imm8) {
return r;
}
#if defined(SIMDE_SSE2_NATIVE)
# define simde_mm_srli_epi64(a, imm8) SIMDE__M128I_C(_mm_srli_epi64(a.n, imm8))
# define simde_mm_srli_epi64(a, imm8) SIMDE__M128I_FROM_NATIVE(_mm_srli_epi64(a.n, imm8))
#elif defined(SIMDE_SSE2_NEON)
# define simde_mm_srli_epi64(a, imm8) \
SIMDE__M128I_NEON_C(u64, (((imm8)&255) < 0 || ((imm8)&255) > 63) ? (vdupq_n_u64(0)) : ((((imm8)&255) == 0) ? (a.neon_u64) : (vshrq_n_u64((a).neon_u64, (imm8)&255))))
......
......@@ -64,7 +64,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_addsub_pd (simde__m128d a, simde__m128d b) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128D_C(_mm_addsub_pd(a.n, b.n));
return SIMDE__M128D_FROM_NATIVE(_mm_addsub_pd(a.n, b.n));
#else
simde__m128d r;
for (size_t i = 0 ; i < (sizeof(r.f64) / sizeof(r.f64[0])) ; i += 2) {
......@@ -95,7 +95,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_hadd_pd (simde__m128d a, simde__m128d b) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128D_C(_mm_hadd_pd(a.n, b.n));
return SIMDE__M128D_FROM_NATIVE(_mm_hadd_pd(a.n, b.n));
#else
simde__m128d r;
r.f64[0] = a.f64[0] + a.f64[1];
......@@ -139,7 +139,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_hsub_pd (simde__m128d a, simde__m128d b) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128D_C(_mm_hsub_pd(a.n, b.n));
return SIMDE__M128D_FROM_NATIVE(_mm_hsub_pd(a.n, b.n));
#else
simde__m128d r;
r.f64[0] = a.f64[0] - a.f64[1];
......@@ -183,7 +183,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_lddqu_si128 (simde__m128i const* mem_addr) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128I_C(_mm_lddqu_si128(&mem_addr->n));
return SIMDE__M128I_FROM_NATIVE(_mm_lddqu_si128(&mem_addr->n));
#elif defined(SIMDE_SSE3_NEON)
return SIMDE__M128I_NEON_C(i32, vld1q_s32((int32_t const*) mem_addr));
#else
......@@ -200,7 +200,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_movedup_pd (simde__m128d a) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128D_C(_mm_movedup_pd(a.n));
return SIMDE__M128D_FROM_NATIVE(_mm_movedup_pd(a.n));
#else
simde__m128d r;
r.f64[0] = a.f64[0];
......
......@@ -123,7 +123,7 @@ simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) {
return r;
}
#if defined(SIMDE_SSE4_1_NATIVE)
# define simde_mm_blend_epi16(a, b, imm8) SIMDE__M128I_C(_mm_blend_epi16(a.n, b.n, imm8))
# define simde_mm_blend_epi16(a, b, imm8) SIMDE__M128I_FROM_NATIVE(_mm_blend_epi16(a.n, b.n, imm8))
#endif
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
# define _mm_blend_epi16(a, b, imm8) SIMDE__M128I_TO_NATIVE(simde_mm_blend_epi16(SIMDE__M128I_FROM_NATIVE(a), SIMDE__M128I_FROM_NATIVE(b), imm8))
......@@ -140,7 +140,7 @@ simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) {
return r;
}
#if defined(SIMDE_SSE4_1_NATIVE)
# define simde_mm_blend_pd(a, b, imm8) SIMDE__M128D_C(_mm_blend_pd(a.n, b.n, imm8))
# define simde_mm_blend_pd(a, b, imm8) SIMDE__M128D_FROM_NATIVE(_mm_blend_pd(a.n, b.n, imm8))
#endif
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
# define _mm_blend_pd(a, b, imm8) SIMDE__M128D_TO_NATIVE(simde_mm_blend_pd(SIMDE__M128D_FROM_NATIVE(a), SIMDE__M128D_FROM_NATIVE(b), imm8))
......@@ -167,7 +167,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_blendv_epi8(a.n, b.n, mask.n));
return SIMDE__M128I_FROM_NATIVE(_mm_blendv_epi8(a.n, b.n, mask.n));
#elif defined(SIMDE_SSE4_1_NEON)
simde__m128i mask_ = simde_mm_cmplt_epi8(mask, simde_mm_set1_epi8(0));
return SIMDE__M128I_NEON_C(i8, vbslq_s8(mask_.neon_u8, b.neon_i8, a.neon_i8));
......@@ -192,7 +192,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128D_C(_mm_blendv_pd(a.n, b.n, mask.n));
return SIMDE__M128D_FROM_NATIVE(_mm_blendv_pd(a.n, b.n, mask.n));
#else
simde__m128d r;
SIMDE__VECTORIZE
......@@ -236,7 +236,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_ceil_pd (simde__m128d a) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128D_C(_mm_ceil_pd(a.n));
return SIMDE__M128D_FROM_NATIVE(_mm_ceil_pd(a.n));
#else
simde__m128d r;
SIMDE__VECTORIZE
......@@ -272,7 +272,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_ceil_sd (simde__m128d a, simde__m128d b) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128D_C(_mm_ceil_sd(a.n, b.n));
return SIMDE__M128D_FROM_NATIVE(_mm_ceil_sd(a.n, b.n));
#else
return simde_mm_set_pd(a.f64[1], ceil(b.f64[0]));
#endif
......@@ -298,7 +298,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_cmpeq_epi64(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_cmpeq_epi64(a.n, b.n));
#else
simde__m128i r;
SIMDE__VECTORIZE
......@@ -316,7 +316,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_cvtepi8_epi16 (simde__m128i a) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_cvtepi8_epi16(a.n));
return SIMDE__M128I_FROM_NATIVE(_mm_cvtepi8_epi16(a.n));
#else
simde__m128i r;
SIMDE__VECTORIZE
......@@ -334,7 +334,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_cvtepi8_epi32 (simde__m128i a) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_cvtepi8_epi32(a.n));
return SIMDE__M128I_FROM_NATIVE(_mm_cvtepi8_epi32(a.n));
#else
simde__m128i r;
SIMDE__VECTORIZE
......@@ -352,7 +352,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_cvtepi8_epi64 (simde__m128i a) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_cvtepi8_epi64(a.n));
return SIMDE__M128I_FROM_NATIVE(_mm_cvtepi8_epi64(a.n));
#else
simde__m128i r;
SIMDE__VECTORIZE
......@@ -370,7 +370,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_cvtepu8_epi16 (simde__m128i a) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_cvtepu8_epi16(a.n));
return SIMDE__M128I_FROM_NATIVE(_mm_cvtepu8_epi16(a.n));
#else
simde__m128i r;
SIMDE__VECTORIZE
......@@ -388,7 +388,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_cvtepu8_epi32 (simde__m128i a) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_cvtepu8_epi32(a.n));
return SIMDE__M128I_FROM_NATIVE(_mm_cvtepu8_epi32(a.n));
#elif defined(SIMDE_SSE4_1_NEON)
uint8x16_t u8x16 = a.neon_u8; /* blendx blendx blendx DCBA */
uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */
......@@ -411,7 +411,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_cvtepu8_epi64 (simde__m128i a) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_cvtepu8_epi64(a.n));
return SIMDE__M128I_FROM_NATIVE(_mm_cvtepu8_epi64(a.n));
#else
simde__m128i r;
SIMDE__VECTORIZE
......@@ -429,7 +429,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_cvtepi16_epi32 (simde__m128i a) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_cvtepi16_epi32(a.n));
return SIMDE__M128I_FROM_NATIVE(_mm_cvtepi16_epi32(a.n));
#elif defined(SIMDE_SSE4_1_NEON)
return SIMDE__M128I_NEON_C(i32, vmovl_s16(vget_low_s16(a.neon_i16)));
#else
......@@ -449,7 +449,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_cvtepu16_epi32 (simde__m128i a) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_cvtepu16_epi32(a.n));
return SIMDE__M128I_FROM_NATIVE(_mm_cvtepu16_epi32(a.n));
#else
simde__m128i r;
SIMDE__VECTORIZE
......@@ -467,7 +467,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_cvtepu16_epi64 (simde__m128i a) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_cvtepu16_epi64(a.n));
return SIMDE__M128I_FROM_NATIVE(_mm_cvtepu16_epi64(a.n));
#else
simde__m128i r;
SIMDE__VECTORIZE
......@@ -485,7 +485,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_cvtepi16_epi64 (simde__m128i a) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_cvtepi16_epi64(a.n));
return SIMDE__M128I_FROM_NATIVE(_mm_cvtepi16_epi64(a.n));
#else
simde__m128i r;
SIMDE__VECTORIZE
......@@ -503,7 +503,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_cvtepi32_epi64 (simde__m128i a) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_cvtepi32_epi64(a.n));
return SIMDE__M128I_FROM_NATIVE(_mm_cvtepi32_epi64(a.n));
#else
simde__m128i r;
SIMDE__VECTORIZE
......@@ -521,7 +521,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_cvtepu32_epi64 (simde__m128i a) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_cvtepu32_epi64(a.n));
return SIMDE__M128I_FROM_NATIVE(_mm_cvtepu32_epi64(a.n));
#else
simde__m128i r;
SIMDE__VECTORIZE
......@@ -554,7 +554,7 @@ simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) {
return r;
}
#if defined(SIMDE_SSE4_1_NATIVE)
# define simde_mm_dp_pd(a, b, imm8) SIMDE__M128D_C(_mm_dp_pd(a.n, b.n, imm8))
# define simde_mm_dp_pd(a, b, imm8) SIMDE__M128D_FROM_NATIVE(_mm_dp_pd(a.n, b.n, imm8))
#endif
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
# define _mm_dp_pd(a, b, imm8) SIMDE__M128D_TO_NATIVE(simde_mm_dp_pd(SIMDE__M128D_FROM_NATIVE(a), SIMDE__M128D_FROM_NATIVE(b), imm8))
......@@ -640,7 +640,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_floor_pd (simde__m128d a) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128D_C(_mm_floor_pd(a.n));
return SIMDE__M128D_FROM_NATIVE(_mm_floor_pd(a.n));
#else
simde__m128d r;
SIMDE__VECTORIZE
......@@ -676,7 +676,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_floor_sd (simde__m128d a, simde__m128d b) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128D_C(_mm_floor_sd(a.n, b.n));
return SIMDE__M128D_FROM_NATIVE(_mm_floor_sd(a.n, b.n));
#else
simde__m128d r;
r.f64[0] = floor(b.f64[0]);
......@@ -713,7 +713,7 @@ simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) {
return a;
}
#if defined(SIMDE_SSE4_1_NATIVE)
# define simde_mm_insert_epi8(a, i, imm8) SIMDE__M128I_C(_mm_insert_epi8(a.n, i, imm8));
# define simde_mm_insert_epi8(a, i, imm8) SIMDE__M128I_FROM_NATIVE(_mm_insert_epi8(a.n, i, imm8));
#endif
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
# define _mm_insert_epi8(a, i, imm8) SIMDE__M128I_TO_NATIVE(simde_mm_insert_epi8(SIMDE__M128I_FROM_NATIVE(a), i, imm8))
......@@ -726,7 +726,7 @@ simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) {
return a;
}
#if defined(SIMDE_SSE4_1_NATIVE)
# define simde_mm_insert_epi32(a, i, imm8) SIMDE__M128I_C(_mm_insert_epi32(a.n, i, imm8));
# define simde_mm_insert_epi32(a, i, imm8) SIMDE__M128I_FROM_NATIVE(_mm_insert_epi32(a.n, i, imm8));
#endif
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
# define _mm_insert_epi32(a, i, imm8) SIMDE__M128I_TO_NATIVE(simde_mm_insert_epi32(SIMDE__M128I_FROM_NATIVE(a), i, imm8))
......@@ -739,7 +739,7 @@ simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) {
return a;
}
#if defined(SIMDE_SSE4_1_NATIVE)
# define simde_mm_insert_epi64(a, i, imm8) SIMDE__M128I_C(_mm_insert_epi64(a.n, i, imm8));
# define simde_mm_insert_epi64(a, i, imm8) SIMDE__M128I_FROM_NATIVE(_mm_insert_epi64(a.n, i, imm8));
#endif
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
# define _mm_insert_epi64(a, i, imm8) SIMDE__M128I_TO_NATIVE(simde_mm_insert_epi64(SIMDE__M128I_FROM_NATIVE(a), i, imm8))
......@@ -771,7 +771,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_max_epi8 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSE4_1_NATIVE) && !defined(__PGI)
return SIMDE__M128I_C(_mm_max_epi8(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_max_epi8(a.n, b.n));
#elif defined(SIMDE_SSE4_1_NEON)
return SIMDE__M128I_NEON_C(i8, vmaxq_s8(a.neon_i8, b.neon_i8));
#else
......@@ -791,7 +791,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_max_epi32 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSE4_1_NATIVE) && !defined(__PGI)
return SIMDE__M128I_C(_mm_max_epi32(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_max_epi32(a.n, b.n));
#elif defined(SIMDE_SSE4_1_NEON)
return SIMDE__M128I_NEON_C(i32, vmaxq_s32(a.neon_i32, b.neon_i32));
#else
......@@ -811,7 +811,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_max_epu16 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_max_epu16(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_max_epu16(a.n, b.n));
#elif defined(SIMDE_SSE4_1_NEON)
return SIMDE__M128I_NEON_C(u16, vmaxq_u16(a.neon_u16, b.neon_u16));
#else
......@@ -831,7 +831,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_max_epu32 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_max_epu32(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_max_epu32(a.n, b.n));
#elif defined(SIMDE_SSE4_1_NEON)
return SIMDE__M128I_NEON_C(u32, vmaxq_u32(a.neon_u32, b.neon_u32));
#else
......@@ -851,7 +851,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_min_epi8 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSE4_1_NATIVE) && !defined(__PGI)
return SIMDE__M128I_C(_mm_min_epi8(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_min_epi8(a.n, b.n));
#elif defined(SIMDE_SSE4_1_NEON)
return SIMDE__M128I_NEON_C(i8, vminq_s8(a.neon_i8, b.neon_i8));
#else
......@@ -871,7 +871,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_min_epi32 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSE4_1_NATIVE) && !defined(__PGI)
return SIMDE__M128I_C(_mm_min_epi32(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_min_epi32(a.n, b.n));
#elif defined(SIMDE_SSE4_1_NEON)
return SIMDE__M128I_NEON_C(i32, vminq_s32(a.neon_i32, b.neon_i32));
#else
......@@ -891,7 +891,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_min_epu16 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_min_epu16(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_min_epu16(a.n, b.n));
#elif defined(SIMDE_SSE4_1_NEON)
return SIMDE__M128I_NEON_C(u16, vminq_u16(a.neon_u16, b.neon_u16));
#else
......@@ -911,7 +911,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_min_epu32 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_min_epu32(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_min_epu32(a.n, b.n));
#elif defined(SIMDE_SSE4_1_NEON)
return SIMDE__M128I_NEON_C(u32, vminq_u32(a.neon_u32, b.neon_u32));
#else
......@@ -931,7 +931,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_minpos_epu16 (simde__m128i a) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_minpos_epu16(a.n));
return SIMDE__M128I_FROM_NATIVE(_mm_minpos_epu16(a.n));
#else
simde__m128i r = simde_x_mm_set_epu16(0, 0, 0, 0, 0, 0, 0, UINT16_MAX);
......@@ -967,7 +967,7 @@ simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) {
return r;
}
#if defined(SIMDE_SSE4_1_NATIVE)
# define simde_mm_mpsadbw_epu8(a, b, imm8) SIMDE__M128I_C(_mm_mpsadbw_epu8(a.n, b.n, imm8));
# define simde_mm_mpsadbw_epu8(a, b, imm8) SIMDE__M128I_FROM_NATIVE(_mm_mpsadbw_epu8(a.n, b.n, imm8));
#endif
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
# define _mm_mpsadbw_epu8(a, b, imm8) SIMDE__M128I_TO_NATIVE(simde_mm_mpsadbw_epu8(SIMDE__M128I_FROM_NATIVE(a), SIMDE__M128I_FROM_NATIVE(b), imm8))
......@@ -977,7 +977,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_mul_epi32(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_mul_epi32(a.n, b.n));
#else
simde__m128i r;
SIMDE__VECTORIZE
......@@ -997,7 +997,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_mullo_epi32(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_mullo_epi32(a.n, b.n));
#elif defined(SIMDE_SSE4_1_NEON)
return SIMDE__M128I_NEON_C(i32, vmulq_s32(a.neon_i32, b.neon_i32));
#else
......@@ -1017,7 +1017,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_packus_epi32(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_packus_epi32(a.n, b.n));
#else
simde__m128i r;
for (size_t i = 0 ; i < (sizeof(r.i32) / sizeof(r.i32[0])) ; i++) {
......@@ -1060,7 +1060,7 @@ simde_mm_round_pd (simde__m128d a, int rounding) {
return r;
}
#if defined(SIMDE_SSE4_1_NATIVE)
# define simde_mm_round_pd(a, rounding) SIMDE__M128D_C(_mm_round_pd((a).n, rounding))
# define simde_mm_round_pd(a, rounding) SIMDE__M128D_FROM_NATIVE(_mm_round_pd((a).n, rounding))
#endif
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
# define _mm_round_pd(a, rounding) SIMDE__M128D_TO_NATIVE(simde_mm_round_pd(SIMDE__M128D_FROM_NATIVE(a), rounding))
......@@ -1128,7 +1128,7 @@ simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) {
return r;
}
#if defined(SIMDE_SSE4_1_NATIVE)
# define simde_mm_round_sd(a, b, rounding) SIMDE__M128D_C(_mm_round_sd((a).n, (b).n, rounding))
# define simde_mm_round_sd(a, b, rounding) SIMDE__M128D_FROM_NATIVE(_mm_round_sd((a).n, (b).n, rounding))
#endif
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
# define _mm_round_sd(a, b, rounding) SIMDE__M128D_TO_NATIVE(simde_mm_round_sd(SIMDE__M128D_FROM_NATIVE(a), SIMDE__M128D_FROM_NATIVE(b), rounding))
......@@ -1171,7 +1171,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_stream_load_si128 (const simde__m128i* mem_addr) {
#if defined(SIMDE_SSE4_1_NATIVE)
return SIMDE__M128I_C(_mm_stream_load_si128((__m128i*)(void*) &(mem_addr->n)));
return SIMDE__M128I_FROM_NATIVE(_mm_stream_load_si128((__m128i*)(void*) &(mem_addr->n)));
#else
return *mem_addr;
#endif
......
......@@ -68,7 +68,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_abs_epi8 (simde__m128i a) {
#if defined(SIMDE_SSSE3_NATIVE)
return SIMDE__M128I_C(_mm_abs_epi8(a.n));
return SIMDE__M128I_FROM_NATIVE(_mm_abs_epi8(a.n));
#else
simde__m128i r;
SIMDE__VECTORIZE
......@@ -86,7 +86,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_abs_epi16 (simde__m128i a) {
#if defined(SIMDE_SSSE3_NATIVE)
return SIMDE__M128I_C(_mm_abs_epi16(a.n));
return SIMDE__M128I_FROM_NATIVE(_mm_abs_epi16(a.n));
#else
simde__m128i r;
SIMDE__VECTORIZE
......@@ -104,7 +104,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_abs_epi32 (simde__m128i a) {
#if defined(SIMDE_SSSE3_NATIVE)
return SIMDE__M128I_C(_mm_abs_epi32(a.n));
return SIMDE__M128I_FROM_NATIVE(_mm_abs_epi32(a.n));
#else
simde__m128i r;
SIMDE__VECTORIZE
......@@ -212,7 +212,7 @@ simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) {
return r;
}
#if defined(SIMDE_SSSE3_NATIVE)
# define simde_mm_alignr_epi8(a, b, count) SIMDE__M128I_C(_mm_alignr_epi8(a.n, b.n, count))
# define simde_mm_alignr_epi8(a, b, count) SIMDE__M128I_FROM_NATIVE(_mm_alignr_epi8(a.n, b.n, count))
#endif
#if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
# define _mm_alignr_epi8(a, b, count) SIMDE__M128I_TO_NATIVE(simde_mm_alignr_epi8(SIMDE__M128I_FROM_NATIVE(a), SIMDE__M128I_FROM_NATIVE(b), count))
......@@ -253,7 +253,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSSE3_NATIVE)
return SIMDE__M128I_C(_mm_shuffle_epi8(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_shuffle_epi8(a.n, b.n));
#else
simde__m128i r;
for (size_t i = 0 ; i < (sizeof(r.u8) / sizeof(r.u8[0])) ; i++) {
......@@ -287,7 +287,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSSE3_NATIVE)
return SIMDE__M128I_C(_mm_hadd_epi16(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_hadd_epi16(a.n, b.n));
#else
simde__m128i r;
......@@ -311,7 +311,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSSE3_NATIVE)
return SIMDE__M128I_C(_mm_hadd_epi32(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_hadd_epi32(a.n, b.n));
#else
simde__m128i r;
......@@ -369,7 +369,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSSE3_NATIVE)
return SIMDE__M128I_C(_mm_hadds_epi16(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_hadds_epi16(a.n, b.n));
#else
simde__m128i r;
for (size_t i = 0 ; i < ((sizeof(r.i16) / sizeof(r.i16[0])) / 2) ; i++) {
......@@ -409,7 +409,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSSE3_NATIVE)
return SIMDE__M128I_C(_mm_hsub_epi16(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_hsub_epi16(a.n, b.n));
#else
simde__m128i r;
......@@ -433,7 +433,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSSE3_NATIVE)
return SIMDE__M128I_C(_mm_hsub_epi32(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_hsub_epi32(a.n, b.n));
#else
simde__m128i r;
......@@ -491,7 +491,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSSE3_NATIVE)
return SIMDE__M128I_C(_mm_hsubs_epi16(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_hsubs_epi16(a.n, b.n));
#else
simde__m128i r;
for (size_t i = 0 ; i < ((sizeof(r.i16) / sizeof(r.i16[0])) / 2) ; i++) {
......@@ -531,7 +531,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSSE3_NATIVE)
return SIMDE__M128I_C(_mm_maddubs_epi16(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_maddubs_epi16(a.n, b.n));
#else
simde__m128i r;
for (size_t i = 0 ; i < (sizeof(r.i16) / sizeof(r.i16[0])) ; i++) {
......@@ -573,7 +573,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSSE3_NATIVE)
return SIMDE__M128I_C(_mm_mulhrs_epi16(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_mulhrs_epi16(a.n, b.n));
#else
simde__m128i r;
SIMDE__VECTORIZE
......@@ -609,7 +609,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSSE3_NATIVE)
return SIMDE__M128I_C(_mm_sign_epi8(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_sign_epi8(a.n, b.n));
#else
simde__m128i r;
for (size_t i = 0 ; i < (sizeof(r.i8) / sizeof(r.i8[0])) ; i++) {
......@@ -626,7 +626,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSSE3_NATIVE)
return SIMDE__M128I_C(_mm_sign_epi16(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_sign_epi16(a.n, b.n));
#else
simde__m128i r;
for (size_t i = 0 ; i < (sizeof(r.i16) / sizeof(r.i16[0])) ; i++) {
......@@ -643,7 +643,7 @@ SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) {
#if defined(SIMDE_SSSE3_NATIVE)
return SIMDE__M128I_C(_mm_sign_epi32(a.n, b.n));
return SIMDE__M128I_FROM_NATIVE(_mm_sign_epi32(a.n, b.n));
#else
simde__m128i r;
for (size_t i = 0 ; i < (sizeof(r.i32) / sizeof(r.i32[0])) ; i++) {
......
......@@ -68,11 +68,8 @@ set(TEST_SOURCES)
if(NOT DISABLE_X86)
set(TEST_SOURCES
x86/mmx/mmx.c
x86/mmx/compare.c
x86/sse/sse.c
x86/sse/compare.c
x86/sse2/sse2.c
x86/sse2/compare.c
x86/sse3/sse3.c
x86/ssse3/ssse3.c
x86/sse4.1/sse4.1.c
......
......@@ -43,15 +43,12 @@ int main(int argc, char* argv[MUNIT_ARRAY_PARAM(argc + 1)]) {
#if !defined(DISABLE_X86)
simde_mmx_test_suite,
simde_mmx_emul_test_suite,
simde_mmx_cmp_test_suite,
simde_sse_test_suite,
simde_sse_emul_test_suite,
simde_sse_cmp_test_suite,
simde_sse2_test_suite,
simde_sse2_emul_test_suite,
simde_sse2_cmp_test_suite,
simde_sse3_test_suite,
simde_sse3_emul_test_suite,
......
......@@ -133,6 +133,20 @@ simde_float32 random_f32_range(simde_float32 min, simde_float32 max);
} \
} while (0)
#define simde_assert_f64v_close(T, nmemb, a, b, precision) \
do { \
const T* simde_tmp_a_ = (a); \
const T* simde_tmp_b_ = (b); \
for (size_t simde_i_ = 0 ; simde_i_ < nmemb ; simde_i_++) { \
const T simde_tmp_diff_ = ((simde_tmp_a_[simde_i_] - simde_tmp_b_[simde_i_]) < 0) ? \
(simde_tmp_b_[simde_i_] - simde_tmp_a_[simde_i_]) : \
(simde_tmp_a_[simde_i_] - simde_tmp_b_[simde_i_]); \
if (MUNIT_UNLIKELY(simde_tmp_diff_ > precision)) { \
munit_errorf("assertion failed: (" #a ")[%" MUNIT_SIZE_MODIFIER "u] == (" #b ")[%" MUNIT_SIZE_MODIFIER "u] (%" #precision ".1f == %" #precision ".1f)", simde_i_, simde_i_, simde_tmp_a_[simde_i_], simde_tmp_b_[simde_i_]); \
} \
} \
} while (0)
/* These probably won't go into µnit; they're similar to the
simde_assert_*v macros above, but print in hex. */
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -68,7 +68,7 @@ test_simde_mm_addsub_pd(const MunitParameter params[], void* data) {
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
simde__m128d r = simde_mm_addsub_pd(test_vec[i].a, test_vec[i].b);
simde_assert_m128d_f64_equal(r, test_vec[i].r, 1);
simde_assert_m128d_close(r, test_vec[i].r, 1);
}
return MUNIT_OK;
......@@ -112,7 +112,7 @@ test_simde_mm_addsub_ps(const MunitParameter params[], void* data) {
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
simde__m128 r = simde_mm_addsub_ps(test_vec[i].a, test_vec[i].b);
simde_assert_m128_f32_equal(r, test_vec[i].r, 1);
simde_assert_m128_close(r, test_vec[i].r, 1);
}
return MUNIT_OK;
......@@ -156,7 +156,7 @@ test_simde_mm_hadd_pd(const MunitParameter params[], void* data) {
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
simde__m128d r = simde_mm_hadd_pd(test_vec[i].a, test_vec[i].b);
simde_assert_m128d_f64_equal(r, test_vec[i].r, 1);
simde_assert_m128d_close(r, test_vec[i].r, 1);
}
return MUNIT_OK;
......@@ -200,7 +200,7 @@ test_simde_mm_hadd_ps(const MunitParameter params[], void* data) {
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
simde__m128 r = simde_mm_hadd_ps(test_vec[i].a, test_vec[i].b);
simde_assert_m128_f32_equal(r, test_vec[i].r, 1);
simde_assert_m128_close(r, test_vec[i].r, 1);
}
return MUNIT_OK;
......@@ -244,7 +244,7 @@ test_simde_mm_hsub_pd(const MunitParameter params[], void* data) {
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
simde__m128d r = simde_mm_hsub_pd(test_vec[i].a, test_vec[i].b);
simde_assert_m128d_f64_equal(r, test_vec[i].r, 1);
simde_assert_m128d_close(r, test_vec[i].r, 1);
}
return MUNIT_OK;
......@@ -288,7 +288,7 @@ test_simde_mm_hsub_ps(const MunitParameter params[], void* data) {
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
simde__m128 r = simde_mm_hsub_ps(test_vec[i].a, test_vec[i].b);
simde_assert_m128_f32_equal(r, test_vec[i].r, 1);
simde_assert_m128_close(r, test_vec[i].r, 1);
}
return MUNIT_OK;
......@@ -374,7 +374,7 @@ test_simde_mm_movedup_pd(const MunitParameter params[], void* data) {
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
simde__m128d r = simde_mm_movedup_pd(test_vec[i].a);
simde_assert_m128d_f64_equal(r, test_vec[i].r, 1);
simde_assert_m128d_close(r, test_vec[i].r, 1);
}
return MUNIT_OK;
......@@ -409,7 +409,7 @@ test_simde_mm_movehdup_ps(const MunitParameter params[], void* data) {
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
simde__m128 r = simde_mm_movehdup_ps(test_vec[i].a);
simde_assert_m128_f32_equal(r, test_vec[i].r, 1);
simde_assert_m128_close(r, test_vec[i].r, 1);
}
return MUNIT_OK;
......@@ -444,7 +444,7 @@ test_simde_mm_moveldup_ps(const MunitParameter params[], void* data) {
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
simde__m128 r = simde_mm_moveldup_ps(test_vec[i].a);
simde_assert_m128_f32_equal(r, test_vec[i].r, 1);
simde_assert_m128_close(r, test_vec[i].r, 1);
}
return MUNIT_OK;
......
......@@ -265,7 +265,7 @@ test_simde_mm_blend_ps(const MunitParameter params[], void* data) {
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
simde__m128 r = simde_mm_blend_ps(test_vec[i].a, test_vec[i].b, 2);
simde_assert_m128_f32_equal(r, test_vec[i].r, 1);
simde_assert_m128_close(r, test_vec[i].r, 1);
}
return MUNIT_OK;
......@@ -371,7 +371,7 @@ test_simde_mm_blendv_ps(const MunitParameter params[], void* data) {
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
simde__m128 r = simde_mm_blendv_ps(test_vec[i].a, test_vec[i].b, *((simde__m128*) &(test_vec[i].mask)));
simde_assert_m128_f32_equal(r, test_vec[i].r, 1);
simde_assert_m128_close(r, test_vec[i].r, 1);
}
return MUNIT_OK;
......@@ -441,7 +441,7 @@ test_simde_mm_ceil_ps(const MunitParameter params[], void* data) {
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
simde__m128 r = simde_mm_ceil_ps(test_vec[i].a);
simde_assert_m128_f32_equal(r, test_vec[i].r, 1);
simde_assert_m128_close(r, test_vec[i].r, 1);
}
return MUNIT_OK;
......@@ -529,7 +529,7 @@ test_simde_mm_ceil_ss(const MunitParameter params[], void* data) {
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
simde__m128 r = simde_mm_ceil_ss(test_vec[i].a, test_vec[i].b);
simde_assert_m128_f32_equal(r, test_vec[i].r, 1);
simde_assert_m128_close(r, test_vec[i].r, 1);
}
return MUNIT_OK;
......@@ -1249,7 +1249,7 @@ test_simde_mm_dp_ps(const MunitParameter params[], void* data) {
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
simde__m128 r = simde_mm_dp_ps(test_vec[i].a, test_vec[i].b, 42);
simde_assert_m128_f32_equal(r, test_vec[i].r, 1);
simde_assert_m128_close(r, test_vec[i].r, 1);
}
return MUNIT_OK;
......@@ -1448,7 +1448,7 @@ test_simde_mm_floor_ps(const MunitParameter params[], void* data) {
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
simde__m128 r = simde_mm_floor_ps(test_vec[i].a);
simde_assert_m128_f32_equal(r, test_vec[i].r, 1);
simde_assert_m128_close(r, test_vec[i].r, 1);
}
return MUNIT_OK;
......@@ -1536,7 +1536,7 @@ test_simde_mm_floor_ss(const MunitParameter params[], void* data) {
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
simde__m128 r = simde_mm_floor_ss(test_vec[i].a, test_vec[i].b);
simde_assert_m128_f32_equal(r, test_vec[i].r, 1);
simde_assert_m128_close(r, test_vec[i].r, 1);
}
return MUNIT_OK;
......@@ -1760,7 +1760,7 @@ test_simde_mm_insert_ps(const MunitParameter params[], void* data) {
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
simde__m128 r = simde_mm_insert_ps(test_vec[i].a, test_vec[i].b, 3);
simde_assert_m128_f32_equal(r, test_vec[i].r, 1);
simde_assert_m128_close(r, test_vec[i].r, 1);
}
return MUNIT_OK;
......@@ -2599,19 +2599,19 @@ test_simde_mm_round_ps(const MunitParameter params[], void* data) {
for (size_t i = 0 ; i < 2 ; i++) {
simde__m128 r = simde_mm_round_ps(test_vec[i].a, SIMDE_MM_FROUND_TO_NEAREST_INT);
simde_assert_m128_f32_equal(r, test_vec[i].r, 1);
simde_assert_m128_close(r, test_vec[i].r, 1);
}
for (size_t i = 2 ; i < 4 ; i++) {
simde__m128 r = simde_mm_round_ps(test_vec[i].a, SIMDE_MM_FROUND_TO_NEG_INF);
simde_assert_m128_f32_equal(r, test_vec[i].r, 1);
simde_assert_m128_close(r, test_vec[i].r, 1);
}
for (size_t i = 4 ; i < 6 ; i++) {
simde__m128 r = simde_mm_round_ps(test_vec[i].a, SIMDE_MM_FROUND_TO_POS_INF);
simde_assert_m128_f32_equal(r, test_vec[i].r, 1);
simde_assert_m128_close(r, test_vec[i].r, 1);
}
for (size_t i = 6 ; i < 8 ; i++) {
simde__m128 r = simde_mm_round_ps(test_vec[i].a, SIMDE_MM_FROUND_TO_ZERO);
simde_assert_m128_f32_equal(r, test_vec[i].r, 1);
simde_assert_m128_close(r, test_vec[i].r, 1);
}
return MUNIT_OK;
......@@ -2729,19 +2729,19 @@ test_simde_mm_round_ss(const MunitParameter params[], void* data) {
for (size_t i = 0 ; i < 2 ; i++) {
simde__m128 r = simde_mm_round_ss(test_vec[i].a, test_vec[i].b, SIMDE_MM_FROUND_TO_NEAREST_INT);
simde_assert_m128_f32_equal(r, test_vec[i].r, 1);
simde_assert_m128_close(r, test_vec[i].r, 1);
}
for (size_t i = 2 ; i < 4 ; i++) {
simde__m128 r = simde_mm_round_ss(test_vec[i].a, test_vec[i].b, SIMDE_MM_FROUND_TO_NEG_INF);
simde_assert_m128_f32_equal(r, test_vec[i].r, 1);
simde_assert_m128_close(r, test_vec[i].r, 1);
}
for (size_t i = 4 ; i < 6 ; i++) {
simde__m128 r = simde_mm_round_ss(test_vec[i].a, test_vec[i].b, SIMDE_MM_FROUND_TO_POS_INF);
simde_assert_m128_f32_equal(r, test_vec[i].r, 1);
simde_assert_m128_close(r, test_vec[i].r, 1);
}
for (size_t i = 6 ; i < 8 ; i++) {
simde__m128 r = simde_mm_round_ss(test_vec[i].a, test_vec[i].b, SIMDE_MM_FROUND_TO_ZERO);
simde_assert_m128_f32_equal(r, test_vec[i].r, 1);
simde_assert_m128_close(r, test_vec[i].r, 1);
}
return MUNIT_OK;
......
......@@ -32,15 +32,12 @@
MunitSuite simde_mmx_test_suite;
MunitSuite simde_mmx_emul_test_suite;
MunitSuite simde_mmx_cmp_test_suite;
MunitSuite simde_sse_test_suite;
MunitSuite simde_sse_emul_test_suite;
MunitSuite simde_sse_cmp_test_suite;
MunitSuite simde_sse2_test_suite;
MunitSuite simde_sse2_emul_test_suite;
MunitSuite simde_sse2_cmp_test_suite;
MunitSuite simde_sse3_test_suite;
MunitSuite simde_sse3_emul_test_suite;
......@@ -60,22 +57,7 @@ MunitSuite simde_avx_emul_test_suite;
MunitSuite simde_avx2_test_suite;
MunitSuite simde_avx2_emul_test_suite;
#define simde_assert_m64_i8(a, op, b) \
simde_assert_typev(int8_t, PRId8, 8, (int8_t*) &(a), op, (int8_t*) &(b))
#define simde_assert_m64_u8(a, op, b) \
simde_assert_typev(uint8_t, PRIu8, 8, (uint8_t*) &(a), op, (uint8_t*) &(b))
#define simde_assert_m64_i16(a, op, b) \
simde_assert_int16vx(4, (int16_t*) &(a), op, (int16_t*) &(b))
#define simde_assert_m64_u16(a, op, b) \
simde_assert_uint16vx(4, (uint16_t*) &(a), op, (uint16_t*) &(b))
#define simde_assert_m64_i32(a, op, b) \
simde_assert_int32vx(2, (int32_t*) &(a), op, (int32_t*) &(b))
#define simde_assert_m64_u32(a, op, b) \
simde_assert_uint32vx(2, (uint32_t*) &(a), op, (uint32_t*) &(b))
#define simde_assert_m64_i64(a, op, b) \
simde_assert_int64vx(1, (int64_t*) &(a), op, (int64_t*) &(b))
#define simde_assert_m64_u64(a, op, b) \
simde_assert_uint64vx(1, (uint64_t*) &(a), op, (uint64_t*) &(b))
#define simde_assert_m64_f32(a, op, b) \
simde_assert_typev(simde_float32, "f", 2, (simde_float32*) &(a), op, (simde_float32*) &(b))
#define simde_assert_m64_f64(a, op, b) \
......@@ -101,40 +83,22 @@ MunitSuite simde_avx2_emul_test_suite;
simde_assert_uint64vx(2, (uint64_t*) &(a), op, (uint64_t*) &(b))
#define simde_assert_m128_f32(a, op, b) \
simde_assert_typev(simde_float32, "f", 4, (simde_float32*) &(a), op, (simde_float32*) &(b))
#define simde_assert_m128_f32_equal(a, b, precision) \
simde_assert_f32v_equal(simde_float32, 4, (simde_float32*) &(a), (simde_float32*) &(b), precision)
#define simde_assert_m128_f32_close(a, b, precision) \
simde_assert_f32v_close(simde_float32, 4, (simde_float32*) &(a), (simde_float32*) &(b), precision)
#define simde_assert_m128_f64(a, op, b) \
simde_assert_typev(simde_float64, "f", 2, (simde_float64*) &(a), op, (simde_float64*) &(b))
#define simde_assert_m128_f64_equal(a, b, precision) \
simde_assert_f32v_equal(simde_float64, 2, (simde_float64*) &(a), (simde_float64*) &(b), precision)
#define simde_assert_m128_f64_close(a, b, precision) \
simde_assert_f32v_close(simde_float64, 2, (simde_float64*) &(a), (simde_float64*) &(b), precision)
#define simde_assert_m128i_i8(a, op, b) \
simde_assert_typev(int8_t, PRId8, 16, (int8_t*) &(a), op, (int8_t*) &(b))
#define simde_assert_m128i_u8(a, op, b) \
simde_assert_typev(uint8_t, PRIu8, 16, (uint8_t*) &(a), op, (uint8_t*) &(b))
#define simde_assert_m128i_i16(a, op, b) \
simde_assert_typev(int16_t, PRId16, 8, (int16_t*) &(a), op, (int16_t*) &(b))
#define simde_assert_m128i_u16(a, op, b) \
simde_assert_typev(uint16_t, PRIu16, 8, (uint16_t*) &(a), op, (uint16_t*) &(b))
#define simde_assert_m128i_i32(a, op, b) \
simde_assert_typev(int32_t, PRId32, 4, (int32_t*) &(a), op, (int32_t*) &(b))
#define simde_assert_m128i_u32(a, op, b) \
simde_assert_typev(uint32_t, PRIu32, 4, (uint32_t*) &(a), op, (uint32_t*) &(b))
#define simde_assert_m128i_i64(a, op, b) \
simde_assert_typev(int64_t, PRId64, 2, (int64_t*) &(a), op, (int64_t*) &(b))
#define simde_assert_m128i_u64(a, op, b) \
simde_assert_typev(uint64_t, PRIu64, 2, (uint64_t*) &(a), op, (uint64_t*) &(b))
// #define simde_assert_m128i_u8(a, op, b) \
// simde_assert_typev(uint8_t, PRIu8, 16, (uint8_t*) &(a), op, (uint8_t*) &(b))
// #define simde_assert_m128i_u16(a, op, b) \
// simde_assert_typev(uint16_t, PRIu16, 8, (uint16_t*) &(a), op, (uint16_t*) &(b))
// #define simde_assert_m128i_u32(a, op, b) \
// simde_assert_typev(uint32_t, PRIu32, 4, (uint32_t*) &(a), op, (uint32_t*) &(b))
// #define simde_assert_m128i_u64(a, op, b) \
// simde_assert_typev(uint64_t, PRIu64, 2, (uint64_t*) &(a), op, (uint64_t*) &(b))
#define simde_assert_m128d_f32(a, op, b) \
simde_assert_typev(simde_float32, "f", 4, (simde_float32*) &(a), op, (simde_float32*) &(b))
#define simde_assert_m128d_f32_equal(a, b, precision) \
simde_assert_f32v_equal(simde_float32, 4, (simde_float32*) &(a), (simde_float32*) &(b), precision)
#define simde_assert_m128d_f64(a, op, b) \
simde_assert_typev(simde_float64, "f", 2, (simde_float64*) &(a), op, (simde_float64*) &(b))
#define simde_assert_m128d_f64_equal(a, b, precision) \
simde_assert_f32v_equal(simde_float64, 2, (simde_float64*) &(a), (simde_float64*) &(b), precision)
#define simde_assert_m128d_f64_close(a, b, precision) \
......@@ -169,4 +133,98 @@ MunitSuite simde_avx2_emul_test_suite;
#define simde_assert_m256_f32_equal(a, b, precision) \
simde_assert_f32v_equal(simde_float32, 8, (simde_float32*) &(a), (simde_float32*) &(b), precision)
/* Assert that two vectors are bit-identical without worring about the
underlying type. */
#define simde_assert_vec_equal(a, b, T) { \
for (int simde_i_ = 0 ; simde_i_ < (int) (sizeof(a.u32f) / sizeof(a.u32f[0])) ; simde_i_++) { \
T simde_a_ = (a), simde_b_ = (b); \
if (HEDLEY_UNLIKELY(simde_a_.u32f[simde_i_] != simde_b_.u32f[simde_i_])) { \
munit_errorf("assertion failed: " #a ".u32f[%d] (%" PRIxFAST32 ") != " #b ".u32f[%d] (%" PRIxFAST32 ")", \
simde_i_, simde_a_.u32f[simde_i_], \
simde_i_, simde_b_.u32f[simde_i_]); \
} \
} \
}
#define simde_assert_m64_equal(a, b) \
simde_assert_vec_equal(a, b, simde__m64)
#define simde_assert_m128_equal(a, b) \
simde_assert_vec_equal(a, b, simde__m128)
#define simde_assert_m128i_equal(a, b) \
simde_assert_vec_equal(a, b, simde__m128i)
#define simde_assert_m128d_equal(a, b) \
simde_assert_vec_equal(a, b, simde__m128d)
#define simde_assert_m256_equal(a, b) \
simde_assert_vec_equal(a, b, simde__m256)
#define simde_assert_m256i_equal(a, b) \
simde_assert_vec_equal(a, b, simde__m256i)
#define simde_assert_m256d_equal(a, b) \
simde_assert_vec_equal(a, b, simde__m256d)
/* Assert that every integer in two vectors are equal */
#define simde_assert_vec_i(a, op, b, T, accessor, fmt) { \
const T simde_a_ = (a), simde_b_ = (b); \
for (int simde_i_ = 0 ; simde_i_ < (int) (sizeof(a.accessor) / sizeof(a.accessor[0])) ; simde_i_++) { \
if (HEDLEY_UNLIKELY(!(simde_a_.accessor[simde_i_] op simde_b_.accessor[simde_i_]))) { \
munit_errorf("assertion failed: " #a "." #accessor "[%d] " #op " " #b "." #accessor "[%d] (%" fmt " " #op " %" fmt ")", \
simde_i_, simde_i_, simde_a_.accessor[simde_i_], simde_b_.accessor[simde_i_]); \
} \
} \
}
#define simde_assert_m128i_i8(a, op, b) \
simde_assert_vec_i(a, op, b, simde__m128i, i8, PRId8)
#define simde_assert_m128i_i16(a, op, b) \
simde_assert_vec_i(a, op, b, simde__m128i, i16, PRId16)
#define simde_assert_m128i_i32(a, op, b) \
simde_assert_vec_i(a, op, b, simde__m128i, i32, PRId32)
#define simde_assert_m128i_i64(a, op, b) \
simde_assert_vec_i(a, op, b, simde__m128i, i64, PRId64)
#define simde_assert_m128i_u8(a, op, b) \
simde_assert_vec_i(a, op, b, simde__m128i, u8, PRIu8)
#define simde_assert_m128i_u16(a, op, b) \
simde_assert_vec_i(a, op, b, simde__m128i, u16, PRIu16)
#define simde_assert_m128i_u32(a, op, b) \
simde_assert_vec_i(a, op, b, simde__m128i, u32, PRIu32)
#define simde_assert_m128i_u64(a, op, b) \
simde_assert_vec_i(a, op, b, simde__m128i, u64, PRIu64)
#define simde_assert_m64_i8(a, op, b) \
simde_assert_vec_i(a, op, b, simde__m64, i8, PRId8)
#define simde_assert_m64_i16(a, op, b) \
simde_assert_vec_i(a, op, b, simde__m64, i16, PRId16)
#define simde_assert_m64_i32(a, op, b) \
simde_assert_vec_i(a, op, b, simde__m64, i32, PRId32)
#define simde_assert_m64_i64(a, op, b) \
simde_assert_vec_i(a, op, b, simde__m64, i64, PRId64)
#define simde_assert_m64_u8(a, op, b) \
simde_assert_vec_i(a, op, b, simde__m64, u8, PRIu8)
#define simde_assert_m64_u16(a, op, b) \
simde_assert_vec_i(a, op, b, simde__m64, u16, PRIu16)
#define simde_assert_m64_u32(a, op, b) \
simde_assert_vec_i(a, op, b, simde__m64, u32, PRIu32)
#define simde_assert_m64_u64(a, op, b) \
simde_assert_vec_i(a, op, b, simde__m64, u64, PRIu64)
#if !defined(HEDLEY_PGI_VERSION)
# define SIMDE_ALMOST_EQUAL_TO "≈"
#else
# define SIMDE_ALMOST_EQUAL_TO "~=~"
#endif
/* Assert that the floating point values in each vector are approximately equal. */
#define simde_assert_vec_close(a, b, precision, T, accessor) { \
const T simde_a_ = (a), simde_b_ = (b); \
for (int simde_i_ = 0 ; simde_i_ < (int) (sizeof(a.accessor) / sizeof(a.accessor[0])) ; simde_i_++) { \
if (HEDLEY_UNLIKELY(!((simde_a_.accessor[simde_i_] < (simde_b_.accessor[simde_i_] + 1e-##precision)) && (simde_a_.accessor[simde_i_] > (simde_b_.accessor[simde_i_] - 1e-##precision))))) { \
munit_errorf("assertion failed: " #a "." #accessor "[%d] " SIMDE_ALMOST_EQUAL_TO " " #b "." #accessor "[%d] (%." #precision "f" " " SIMDE_ALMOST_EQUAL_TO " %." #precision "f)", \
simde_i_, simde_i_, simde_a_.accessor[simde_i_], simde_b_.accessor[simde_i_]); \
} \
} \
}
#define simde_assert_m128_close(a, b, precision) \
simde_assert_vec_close(a, b, precision, simde__m128, f32)
#define simde_assert_m128d_close(a, b, precision) \
simde_assert_vec_close(a, b, precision, simde__m128d, f64)
#endif