Skip to content
Commits on Source (10)
#!/bin/bash
PKG=last-align
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
BASE=$(realpath ${DIR}/../lib/$PKG/$(basename "$0"))
function test_and_run () {
if grep -q "$1" /proc/cpuinfo && [ -x "${BASE}-$1" ]; then
cmd="${BASE}-$1"
shift
# echo "${cmd}" "$@"
"${cmd}" "$@"
exit
fi
}
for SIMD in avx2 avx sse4.1 ssse3 sse3 sse2 sse mmx ; do test_and_run ${SIMD} "$@" ; done
# fallback to plain option
$BASE-plain "$@"
last-align (1044-2) UNRELEASED; urgency=medium
* Team upload.
* Enable building on any architecture via simde. (Closes: #946850)
* For any-amd64 and any-i386: compile multiple times with reducing levels of
SIMD instruction support and ship a dispatching script to pick the best
one for the user at run-time.
-- Michael R. Crusoe <michael.crusoe@gmail.com> Sun, 22 Dec 2019 20:19:58 -0700
last-align (1044-1) unstable; urgency=medium
* Team upload.
......
/* Check (assertions)
* Portable Snippets - https://gitub.com/nemequ/portable-snippets
* Created by Evan Nemerson <evan@nemerson.com>
*
* To the extent possible under law, the authors have waived all
* copyright and related or neighboring rights to this code. For
* details, see the Creative Commons Zero 1.0 Universal license at
* https://creativecommons.org/publicdomain/zero/1.0/
*/
#if !defined(SIMDE_CHECK_H)
#define SIMDE_CHECK_H
#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG)
# define SIMDE_NDEBUG 1
#endif
#include <stdint.h>
#if !defined(_WIN32)
# define SIMDE_SIZE_MODIFIER "z"
# define SIMDE_CHAR_MODIFIER "hh"
# define SIMDE_SHORT_MODIFIER "h"
#else
# if defined(_M_X64) || defined(__amd64__)
# define SIMDE_SIZE_MODIFIER "I64"
# else
# define SIMDE_SIZE_MODIFIER ""
# endif
# define SIMDE_CHAR_MODIFIER ""
# define SIMDE_SHORT_MODIFIER ""
#endif
#if defined(_MSC_VER) && (_MSC_VER >= 1500)
# define SIMDE__PUSH_DISABLE_MSVC_C4127 __pragma(warning(push)) __pragma(warning(disable:4127))
# define SIMDE__POP_DISABLE_MSVC_C4127 __pragma(warning(pop))
#else
# define SIMDE__PUSH_DISABLE_MSVC_C4127
# define SIMDE__POP_DISABLE_MSVC_C4127
#endif
#if !defined(simde_errorf)
# include <stdio.h>
# include <stdlib.h>
# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort())
#endif
#define simde_error(msg) simde_errorf("%s", msg)
#if defined(SIMDE_NDEBUG)
# if defined(SIMDE_CHECK_FAIL_DEFINED)
# define simde_assert(expr)
# else
# if defined(HEDLEY_ASSUME)
# define simde_assert(expr) HEDLEY_ASSUME(expr)
# elif HEDLEY_GCC_VERSION_CHECK(4,5,0)
# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1)))
# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0)
# define simde_assert(expr) __assume(expr)
# else
# define simde_assert(expr)
# endif
# endif
# define simde_assert_true(expr) simde_assert(expr)
# define simde_assert_false(expr) simde_assert(!(expr))
# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b)))
# define simde_assert_double_equal(a, b, precision)
# define simde_assert_string_equal(a, b)
# define simde_assert_string_not_equal(a, b)
# define simde_assert_memory_equal(size, a, b)
# define simde_assert_memory_not_equal(size, a, b)
#else
# define simde_assert(expr) \
do { \
if (!HEDLEY_LIKELY(expr)) { \
simde_error("assertion failed: " #expr "\n"); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# define simde_assert_true(expr) \
do { \
if (!HEDLEY_LIKELY(expr)) { \
simde_error("assertion failed: " #expr " is not true\n"); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# define simde_assert_false(expr) \
do { \
if (!HEDLEY_LIKELY(!(expr))) { \
simde_error("assertion failed: " #expr " is not false\n"); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \
do { \
T simde_tmp_a_ = (a); \
T simde_tmp_b_ = (b); \
if (!(simde_tmp_a_ op simde_tmp_b_)) { \
simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \
#a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# define simde_assert_double_equal(a, b, precision) \
do { \
const double simde_tmp_a_ = (a); \
const double simde_tmp_b_ = (b); \
const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \
-(simde_tmp_a_ - simde_tmp_b_) : \
(simde_tmp_a_ - simde_tmp_b_); \
if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \
simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \
#a, #b, simde_tmp_a_, simde_tmp_b_); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# include <string.h>
# define simde_assert_string_equal(a, b) \
do { \
const char* simde_tmp_a_ = a; \
const char* simde_tmp_b_ = b; \
if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \
simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \
#a, #b, simde_tmp_a_, simde_tmp_b_); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# define simde_assert_string_not_equal(a, b) \
do { \
const char* simde_tmp_a_ = a; \
const char* simde_tmp_b_ = b; \
if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \
simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \
#a, #b, simde_tmp_a_, simde_tmp_b_); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# define simde_assert_memory_equal(size, a, b) \
do { \
const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \
const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \
const size_t simde_tmp_size_ = (size); \
if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \
size_t simde_tmp_pos_; \
for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \
if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \
simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \
#a, #b, simde_tmp_pos_); \
break; \
} \
} \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# define simde_assert_memory_not_equal(size, a, b) \
do { \
const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \
const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \
const size_t simde_tmp_size_ = (size); \
if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \
simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \
#a, #b, simde_tmp_size_); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
#endif
#define simde_assert_type(T, fmt, a, op, b) \
simde_assert_type_full("", "", T, fmt, a, op, b)
#define simde_assert_char(a, op, b) \
simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b)
#define simde_assert_uchar(a, op, b) \
simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b)
#define simde_assert_short(a, op, b) \
simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b)
#define simde_assert_ushort(a, op, b) \
simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b)
#define simde_assert_int(a, op, b) \
simde_assert_type(int, "d", a, op, b)
#define simde_assert_uint(a, op, b) \
simde_assert_type(unsigned int, "u", a, op, b)
#define simde_assert_long(a, op, b) \
simde_assert_type(long int, "ld", a, op, b)
#define simde_assert_ulong(a, op, b) \
simde_assert_type(unsigned long int, "lu", a, op, b)
#define simde_assert_llong(a, op, b) \
simde_assert_type(long long int, "lld", a, op, b)
#define simde_assert_ullong(a, op, b) \
simde_assert_type(unsigned long long int, "llu", a, op, b)
#define simde_assert_size(a, op, b) \
simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b)
#define simde_assert_float(a, op, b) \
simde_assert_type(float, "f", a, op, b)
#define simde_assert_double(a, op, b) \
simde_assert_type(double, "g", a, op, b)
#define simde_assert_ptr(a, op, b) \
simde_assert_type(const void*, "p", a, op, b)
#define simde_assert_int8(a, op, b) \
simde_assert_type(int8_t, PRIi8, a, op, b)
#define simde_assert_uint8(a, op, b) \
simde_assert_type(uint8_t, PRIu8, a, op, b)
#define simde_assert_int16(a, op, b) \
simde_assert_type(int16_t, PRIi16, a, op, b)
#define simde_assert_uint16(a, op, b) \
simde_assert_type(uint16_t, PRIu16, a, op, b)
#define simde_assert_int32(a, op, b) \
simde_assert_type(int32_t, PRIi32, a, op, b)
#define simde_assert_uint32(a, op, b) \
simde_assert_type(uint32_t, PRIu32, a, op, b)
#define simde_assert_int64(a, op, b) \
simde_assert_type(int64_t, PRIi64, a, op, b)
#define simde_assert_uint64(a, op, b) \
simde_assert_type(uint64_t, PRIu64, a, op, b)
#define simde_assert_ptr_equal(a, b) \
simde_assert_ptr(a, ==, b)
#define simde_assert_ptr_not_equal(a, b) \
simde_assert_ptr(a, !=, b)
#define simde_assert_null(ptr) \
simde_assert_ptr(ptr, ==, NULL)
#define simde_assert_not_null(ptr) \
simde_assert_ptr(ptr, !=, NULL)
#define simde_assert_ptr_null(ptr) \
simde_assert_ptr(ptr, ==, NULL)
#define simde_assert_ptr_not_null(ptr) \
simde_assert_ptr(ptr, !=, NULL)
#endif /* !defined(SIMDE_CHECK_H) */
This diff is collapsed.
/* Architecture detection
* Created by Evan Nemerson <evan@nemerson.com>
*
* To the extent possible under law, the authors have waived all
* copyright and related or neighboring rights to this code. For
* details, see the Creative Commons Zero 1.0 Universal license at
* <https://creativecommons.org/publicdomain/zero/1.0/>
*
* Different compilers define different preprocessor macros for the
* same architecture. This is an attempt to provide a single
* interface which is usable on any compiler.
*
* In general, a macro named SIMDE_ARCH_* is defined for each
* architecture the CPU supports. When there are multiple possible
* versions, we try to define the macro to the target version. For
* example, if you want to check for i586+, you could do something
* like:
*
* #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5)
* ...
* #endif
*
* You could also just check that SIMDE_ARCH_X86 >= 5 without checking
* if it's defined first, but some compilers may emit a warning about
* an undefined macro being used (e.g., GCC with -Wundef).
*
* This was originally created for SIMDe
* <https://github.com/nemequ/simde> (hence the prefix), but this
* header has no dependencies and may be used anywhere. It is
* originally based on information from
* <https://sourceforge.net/p/predef/wiki/Architectures/>, though it
* has been enhanced with additional information.
*
* If you improve this file, or find a bug, please file the issue at
* <https://github.com/nemequ/simde/issues>. If you copy this into
* your project, even if you change the prefix, please keep the links
* to SIMDe intact so others know where to report issues, submit
* enhancements, and find the latest version. */
#if !defined(SIMDE_ARCH_H)
#define SIMDE_ARCH_H
/* Alpha
<https://en.wikipedia.org/wiki/DEC_Alpha> */
#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA)
# if defined(__alpha_ev6__)
# define SIMDE_ARCH_ALPHA 6
# elif defined(__alpha_ev5__)
# define SIMDE_ARCH_ALPHA 5
# elif defined(__alpha_ev4__)
# define SIMDE_ARCH_ALPHA 4
# else
# define SIMDE_ARCH_ALPHA 1
# endif
#endif
/* Atmel AVR
<https://en.wikipedia.org/wiki/Atmel_AVR> */
#if defined(__AVR_ARCH__)
# define SIMDE_ARCH_AVR __AVR_ARCH__
#endif
/* AMD64 / x86_64
<https://en.wikipedia.org/wiki/X86-64> */
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X66) || defined(_M_AMD64)
# define SIMDE_ARCH_AMD64 1
#endif
/* ARM
<https://en.wikipedia.org/wiki/ARM_architecture> */
#if defined(__ARM_ARCH_8A__)
# define SIMDE_ARCH_ARM 82
#elif defined(__ARM_ARCH_8R__)
# define SIMDE_ARCH_ARM 81
#elif defined(__ARM_ARCH_8__)
# define SIMDE_ARCH_ARM 80
#elif defined(__ARM_ARCH_7S__)
# define SIMDE_ARCH_ARM 74
#elif defined(__ARM_ARCH_7M__)
# define SIMDE_ARCH_ARM 73
#elif defined(__ARM_ARCH_7R__)
# define SIMDE_ARCH_ARM 72
#elif defined(__ARM_ARCH_7A__)
# define SIMDE_ARCH_ARM 71
#elif defined(__ARM_ARCH_7__)
# define SIMDE_ARCH_ARM 70
#elif defined(__ARM_ARCH)
# define SIMDE_ARCH_ARM (__ARM_ARCH * 10)
#elif defined(_M_ARM)
# define SIMDE_ARCH_ARM (_M_ARM * 10)
#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM)
# define SIMDE_ARCH_ARM 1
#endif
/* AArch64
<https://en.wikipedia.org/wiki/ARM_architecture> */
#if defined(__aarch64__) || defined(_M_ARM64)
# define SIMDE_ARCH_AARCH64 10
#endif
/* Blackfin
<https://en.wikipedia.org/wiki/Blackfin> */
#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__)
# define SIMDE_ARCH_BLACKFIN 1
#endif
/* CRIS
<https://en.wikipedia.org/wiki/ETRAX_CRIS> */
#if defined(__CRIS_arch_version)
# define SIMDE_ARCH_CRIS __CRIS_arch_version
#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__)
# define SIMDE_ARCH_CRIS 1
#endif
/* Convex
<https://en.wikipedia.org/wiki/Convex_Computer> */
#if defined(__convex_c38__)
# define SIMDE_ARCH_CONVEX 38
#elif defined(__convex_c34__)
# define SIMDE_ARCH_CONVEX 34
#elif defined(__convex_c32__)
# define SIMDE_ARCH_CONVEX 32
#elif defined(__convex_c2__)
# define SIMDE_ARCH_CONVEX 2
#elif defined(__convex__)
# define SIMDE_ARCH_CONVEX 1
#endif
/* Adapteva Epiphany
<https://en.wikipedia.org/wiki/Adapteva_Epiphany> */
#if defined(__epiphany__)
# define SIMDE_ARCH_EPIPHANY 1
#endif
/* Fujitsu FR-V
<https://en.wikipedia.org/wiki/FR-V_(microprocessor)> */
#if defined(__frv__)
# define SIMDE_ARCH_FRV 1
#endif
/* H8/300
<https://en.wikipedia.org/wiki/H8_Family> */
#if defined(__H8300__)
# define SIMDE_ARCH_H8300
#endif
/* HP/PA / PA-RISC
<https://en.wikipedia.org/wiki/PA-RISC> */
#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0)
# define SIMDE_ARCH_HPPA 20
#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1)
# define SIMDE_ARCH_HPPA 11
#elif defined(_PA_RISC1_0)
# define SIMDE_ARCH_HPPA 10
#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa)
# define SIMDE_ARCH_HPPA 1
#endif
/* x86
<https://en.wikipedia.org/wiki/X86> */
#if defined(_M_IX86)
# define SIMDE_ARCH_X86 (_M_IX86 / 100)
#elif defined(__I86__)
# define SIMDE_ARCH_X86 __I86__
#elif defined(i686) || defined(__i686) || defined(__i686__)
# define SIMDE_ARCH_X86 6
#elif defined(i586) || defined(__i586) || defined(__i586__)
# define SIMDE_ARCH_X86 5
#elif defined(i486) || defined(__i486) || defined(__i486__)
# define SIMDE_ARCH_X86 4
#elif defined(i386) || defined(__i386) || defined(__i386__)
# define SIMDE_ARCH_X86 3
#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__)
# define SIMDE_ARCH_X86 3
#endif
/* Itanium
<https://en.wikipedia.org/wiki/Itanium> */
#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__)
# define SIMDE_ARCH_IA64 1
#endif
/* Renesas M32R
<https://en.wikipedia.org/wiki/M32R> */
#if defined(__m32r__) || defined(__M32R__)
# define SIMDE_ARCH_M32R
#endif
/* Motorola 68000
<https://en.wikipedia.org/wiki/Motorola_68000> */
#if defined(__mc68060__) || defined(__MC68060__)
# define SIMDE_ARCH_M68K 68060
#elif defined(__mc68040__) || defined(__MC68040__)
# define SIMDE_ARCH_M68K 68040
#elif defined(__mc68030__) || defined(__MC68030__)
# define SIMDE_ARCH_M68K 68030
#elif defined(__mc68020__) || defined(__MC68020__)
# define SIMDE_ARCH_M68K 68020
#elif defined(__mc68010__) || defined(__MC68010__)
# define SIMDE_ARCH_M68K 68010
#elif defined(__mc68000__) || defined(__MC68000__)
# define SIMDE_ARCH_M68K 68000
#endif
/* Xilinx MicroBlaze
<https://en.wikipedia.org/wiki/MicroBlaze> */
#if defined(__MICROBLAZE__) || defined(__microblaze__)
# define SIMDE_ARCH_MICROBLAZE
#endif
/* MIPS
<https://en.wikipedia.org/wiki/MIPS_architecture> */
#if defined(_MIPS_ISA_MIPS64R2)
# define SIMDE_ARCH_MIPS 642
#elif defined(_MIPS_ISA_MIPS64)
# define SIMDE_ARCH_MIPS 640
#elif defined(_MIPS_ISA_MIPS32R2)
# define SIMDE_ARCH_MIPS 322
#elif defined(_MIPS_ISA_MIPS32)
# define SIMDE_ARCH_MIPS 320
#elif defined(_MIPS_ISA_MIPS4)
# define SIMDE_ARCH_MIPS 4
#elif defined(_MIPS_ISA_MIPS3)
# define SIMDE_ARCH_MIPS 3
#elif defined(_MIPS_ISA_MIPS2)
# define SIMDE_ARCH_MIPS 2
#elif defined(_MIPS_ISA_MIPS1)
# define SIMDE_ARCH_MIPS 1
#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__)
# define SIMDE_ARCH_MIPS 1
#endif
/* Matsushita MN10300
<https://en.wikipedia.org/wiki/MN103> */
#if defined(__MN10300__) || defined(__mn10300__)
# define SIMDE_ARCH_MN10300 1
#endif
/* POWER
<https://en.wikipedia.org/wiki/IBM_POWER_Instruction_Set_Architecture> */
#if defined(_M_PPC)
# define SIMDE_ARCH_POWER _M_PPC
#elif defined(_ARCH_PWR8)
# define SIMDE_ARCH_POWER 800
#elif defined(_ARCH_PWR7)
# define SIMDE_ARCH_POWER 700
#elif defined(_ARCH_PWR6)
# define SIMDE_ARCH_POWER 600
#elif defined(_ARCH_PWR5)
# define SIMDE_ARCH_POWER 500
#elif defined(_ARCH_PWR4)
# define SIMDE_ARCH_POWER 400
#elif defined(_ARCH_440) || defined(__ppc440__)
# define SIMDE_ARCH_POWER 440
#elif defined(_ARCH_450) || defined(__ppc450__)
# define SIMDE_ARCH_POWER 450
#elif defined(_ARCH_601) || defined(__ppc601__)
# define SIMDE_ARCH_POWER 601
#elif defined(_ARCH_603) || defined(__ppc603__)
# define SIMDE_ARCH_POWER 603
#elif defined(_ARCH_604) || defined(__ppc604__)
# define SIMDE_ARCH_POWER 604
#elif defined(_ARCH_605) || defined(__ppc605__)
# define SIMDE_ARCH_POWER 605
#elif defined(_ARCH_620) || defined(__ppc620__)
# define SIMDE_ARCH_POWER 620
#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc)
# define SIMDE_ARCH_POWER 1
#endif
/* SPARC
<https://en.wikipedia.org/wiki/SPARC> */
#if defined(__sparc_v9__) || defined(__sparcv9)
# define SIMDE_ARCH_SPARC 9
#elif defined(__sparc_v8__) || defined(__sparcv8)
# define SIMDE_ARCH_SPARC 8
#elif defined(__sparc_v7__) || defined(__sparcv7)
# define SIMDE_ARCH_SPARC 7
#elif defined(__sparc_v6__) || defined(__sparcv6)
# define SIMDE_ARCH_SPARC 6
#elif defined(__sparc_v5__) || defined(__sparcv5)
# define SIMDE_ARCH_SPARC 5
#elif defined(__sparc_v4__) || defined(__sparcv4)
# define SIMDE_ARCH_SPARC 4
#elif defined(__sparc_v3__) || defined(__sparcv3)
# define SIMDE_ARCH_SPARC 3
#elif defined(__sparc_v2__) || defined(__sparcv2)
# define SIMDE_ARCH_SPARC 2
#elif defined(__sparc_v1__) || defined(__sparcv1)
# define SIMDE_ARCH_SPARC 1
#elif defined(__sparc__) || defined(__sparc)
# define SIMDE_ARCH_SPARC 1
#endif
/* SuperH
<https://en.wikipedia.org/wiki/SuperH> */
#if defined(__sh5__) || defined(__SH5__)
# define SIMDE_ARCH_SUPERH 5
#elif defined(__sh4__) || defined(__SH4__)
# define SIMDE_ARCH_SUPERH 4
#elif defined(__sh3__) || defined(__SH3__)
# define SIMDE_ARCH_SUPERH 3
#elif defined(__sh2__) || defined(__SH2__)
# define SIMDE_ARCH_SUPERH 2
#elif defined(__sh1__) || defined(__SH1__)
# define SIMDE_ARCH_SUPERH 1
#elif defined(__sh__) || defined(__SH__)
# define SIMDE_ARCH_SUPERH 1
#endif
/* IBM System z
<https://en.wikipedia.org/wiki/IBM_System_z> */
#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__)
# define SIMDE_ARCH_SYSTEMZ
#endif
/* TMS320 DSP
<https://en.wikipedia.org/wiki/Texas_Instruments_TMS320> */
#if defined(_TMS320C6740) || defined(__TMS320C6740__)
# define SIMDE_ARCH_TMS320 6740
#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__)
# define SIMDE_ARCH_TMS320 6701
#elif defined(_TMS320C6700) || defined(__TMS320C6700__)
# define SIMDE_ARCH_TMS320 6700
#elif defined(_TMS320C6600) || defined(__TMS320C6600__)
# define SIMDE_ARCH_TMS320 6600
#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__)
# define SIMDE_ARCH_TMS320 6401
#elif defined(_TMS320C6400) || defined(__TMS320C6400__)
# define SIMDE_ARCH_TMS320 6400
#elif defined(_TMS320C6200) || defined(__TMS320C6200__)
# define SIMDE_ARCH_TMS320 6200
#elif defined(_TMS320C55X) || defined(__TMS320C55X__)
# define SIMDE_ARCH_TMS320 550
#elif defined(_TMS320C54X) || defined(__TMS320C54X__)
# define SIMDE_ARCH_TMS320 540
#elif defined(_TMS320C28X) || defined(__TMS320C28X__)
# define SIMDE_ARCH_TMS320 280
#endif
/* Xtensa
<https://en.wikipedia.org/wiki/> */
#if defined(__xtensa__) || defined(__XTENSA__)
# define SIMDE_ARCH_XTENSA 1
#endif
#endif /* !defined(SIMDE_ARCH_H) */
/* Copyright (c) 2017-2019 Evan Nemerson <evan@nemerson.com>
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if !defined(SIMDE_COMMON_H)
#define SIMDE_COMMON_H
#include "hedley.h"
#include "check.h"
#include "simde-arch.h"
#if \
HEDLEY_HAS_ATTRIBUTE(aligned) || \
HEDLEY_GCC_VERSION_CHECK(2,95,0) || \
HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \
HEDLEY_IBM_VERSION_CHECK(11,1,0) || \
HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
HEDLEY_PGI_VERSION_CHECK(19,4,0) || \
HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \
HEDLEY_TI_VERSION_CHECK(8,1,0)
# define SIMDE_ALIGN(alignment) __attribute__((aligned(alignment)))
#elif defined(_MSC_VER) && !(defined(_M_ARM) && !defined(_M_ARM64))
# define SIMDE_ALIGN(alignment) __declspec(align(alignment))
#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
# define SIMDE_ALIGN(alignment) _Alignas(alignment)
#elif defined(__cplusplus) && (__cplusplus >= 201103L)
# define SIMDE_ALIGN(alignment) alignas(alignment)
#else
# define SIMDE_ALIGN(alignment)
#endif
#define simde_assert_aligned(alignment, val) \
simde_assert_int(HEDLEY_REINTERPRET_CAST(uintptr_t, HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, (val)))) % (alignment), ==, 0)
/* TODO: this should really do something like
HEDLEY_STATIC_CAST(T, (simde_assert_int(alignment, v), v))
but I need to think about how to handle it in all compilers...
may end up moving to Hedley, too. */
#if HEDLEY_HAS_BUILTIN(__builtin_assume_aligned)
# define SIMDE_CAST_ALIGN(alignment, T, v) HEDLEY_REINTERPRET_CAST(T, __builtin_assume_aligned(v, alignment))
#elif HEDLEY_HAS_WARNING("-Wcast-align")
# define SIMDE_CAST_ALIGN(alignment, T, v) \
HEDLEY_DIAGNOSTIC_PUSH \
_Pragma("clang diagnostic ignored \"-Wcast-align\"") \
HEDLEY_REINTERPRET_CAST(T, (v)) \
HEDLEY_DIAGNOSTIC_POP
#else
# define SIMDE_CAST_ALIGN(alignment, T, v) HEDLEY_REINTERPRET_CAST(T, (v))
#endif
#if HEDLEY_GCC_HAS_ATTRIBUTE(vector_size,4,6,0)
# define SIMDE__ENABLE_GCC_VEC_EXT
#endif
#if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L)))
# define SIMDE_ENABLE_OPENMP
#endif
#if !defined(SIMDE_ENABLE_CILKPLUS) && defined(__cilk)
# define SIMDE_ENABLE_CILKPLUS
#endif
#if defined(SIMDE_ENABLE_OPENMP)
# define SIMDE__VECTORIZE _Pragma("omp simd")
# define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l))
# define SIMDE__VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r))
# define SIMDE__VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a))
#elif defined(SIMDE_ENABLE_CILKPLUS)
# define SIMDE__VECTORIZE _Pragma("simd")
# define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l))
# define SIMDE__VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r))
# define SIMDE__VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a))
#elif defined(__INTEL_COMPILER)
# define SIMDE__VECTORIZE _Pragma("simd")
# define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l))
# define SIMDE__VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r))
# define SIMDE__VECTORIZE_ALIGNED(a)
#elif defined(__clang__)
# define SIMDE__VECTORIZE _Pragma("clang loop vectorize(enable)")
# define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l))
# define SIMDE__VECTORIZE_REDUCTION(r) SIMDE__VECTORIZE
# define SIMDE__VECTORIZE_ALIGNED(a)
#elif HEDLEY_GCC_VERSION_CHECK(4,9,0)
# define SIMDE__VECTORIZE _Pragma("GCC ivdep")
# define SIMDE__VECTORIZE_SAFELEN(l) SIMDE__VECTORIZE
# define SIMDE__VECTORIZE_REDUCTION(r) SIMDE__VECTORIZE
# define SIMDE__VECTORIZE_ALIGNED(a)
#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0)
# define SIMDE__VECTORIZE _Pragma("_CRI ivdep")
# define SIMDE__VECTORIZE_SAFELEN(l) SIMDE__VECTORIZE
# define SIMDE__VECTORIZE_REDUCTION(r) SIMDE__VECTORIZE
# define SIMDE__VECTORIZE_ALIGNED(a)
#else
# define SIMDE__VECTORIZE
# define SIMDE__VECTORIZE_SAFELEN(l)
# define SIMDE__VECTORIZE_REDUCTION(r)
# define SIMDE__VECTORIZE_ALIGNED(a)
#endif
#if HEDLEY_GCC_HAS_ATTRIBUTE(unused,3,1,0)
# define SIMDE__UNUSED __attribute__((__unused__))
#else
# define SIMDE__UNUSED
#endif
#if HEDLEY_GCC_HAS_ATTRIBUTE(artificial,4,3,0)
# define SIMDE__ARTIFICIAL __attribute__((__artificial__))
#else
# define SIMDE__ARTIFICIAL
#endif
/* Intended for checking coverage, you should never use this in
production. */
#if defined(SIMDE_NO_INLINE)
# define SIMDE__FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE SIMDE__UNUSED static
#else
# define SIMDE__FUNCTION_ATTRIBUTES HEDLEY_INLINE SIMDE__ARTIFICIAL static
#endif
#if defined(_MSC_VER)
# define SIMDE__BEGIN_DECLS HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS
# define SIMDE__END_DECLS HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS
#else
# define SIMDE__BEGIN_DECLS HEDLEY_BEGIN_C_DECLS
# define SIMDE__END_DECLS HEDLEY_END_C_DECLS
#endif
#if HEDLEY_HAS_WARNING("-Wpedantic")
# define SIMDE_DIAGNOSTIC_DISABLE_INT128 _Pragma("clang diagnostic ignored \"-Wpedantic\"")
#elif defined(HEDLEY_GCC_VERSION)
# define SIMDE_DIAGNOSTIC_DISABLE_INT128 _Pragma("GCC diagnostic ignored \"-Wpedantic\"")
#else
# define SIMDE_DIAGNOSTIC_DISABLE_INT128
#endif
#if defined(__SIZEOF_INT128__)
# define SIMDE__HAVE_INT128
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DIAGNOSTIC_DISABLE_INT128
typedef __int128 simde_int128;
typedef unsigned __int128 simde_uint128;
HEDLEY_DIAGNOSTIC_POP
#endif
/* TODO: we should at least make an attempt to detect the correct
types for simde_float32/float64 instead of just assuming float and
double. */
#if !defined(SIMDE_FLOAT32_TYPE)
# define SIMDE_FLOAT32_TYPE float
# define SIMDE_FLOAT32_C(value) value##f
#else
# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value)
#endif
typedef SIMDE_FLOAT32_TYPE simde_float32;
HEDLEY_STATIC_ASSERT(sizeof(simde_float32) == 4, "Unable to find 32-bit floating-point type.");
#if !defined(SIMDE_FLOAT64_TYPE)
# define SIMDE_FLOAT64_TYPE double
# define SIMDE_FLOAT64_C(value) value
#else
# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT64_TYPE) value)
#endif
typedef SIMDE_FLOAT64_TYPE simde_float64;
HEDLEY_STATIC_ASSERT(sizeof(simde_float64) == 8, "Unable to find 64-bit floating-point type.");
/* Whether to assume that the compiler can auto-vectorize reasonably
well. This will cause SIMDe to attempt to compose vector
operations using more simple vector operations instead of minimize
serial work.
As an example, consider the _mm_add_ss(a, b) function from SSE,
which returns { a0 + b0, a1, a2, a3 }. This pattern is repeated
for other operations (sub, mul, etc.).
The naïve implementation would result in loading a0 and b0, adding
them into a temporary variable, then splicing that value into a new
vector with the remaining elements from a.
On platforms which support vectorization, it's generally faster to
simply perform the operation on the entire vector to avoid having
to move data between SIMD registers and non-SIMD registers.
Basically, instead of the temporary variable being (a0 + b0) it
would be a vector of (a + b), which is then combined with a to form
the result.
By default, SIMDe will prefer the pure-vector versions if we detect
a vector ISA extension, but this can be overridden by defining
SIMDE_NO_ASSUME_VECTORIZATION. You can also define
SIMDE_ASSUME_VECTORIZATION if you want to force SIMDe to use the
vectorized version. */
#if !defined(SIMDE_NO_ASSUME_VECTORIZATION) && !defined(SIMDE_ASSUME_VECTORIZATION)
# if defined(__SSE__) || defined(__ARM_NEON) || defined(__mips_msa) || defined(__ALTIVEC__)
# define SIMDE_ASSUME_VECTORIZATION
# endif
#endif
/* GCC and clang have built-in functions to handle shuffling of
vectors, but the implementations are slightly different. This
macro is just an abstraction over them. Note that elem_size is in
bits but vec_size is in bytes. */
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
# define SIMDE__SHUFFLE_VECTOR(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__)
#elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER)
# define SIMDE__SHUFFLE_VECTOR(elem_size, vec_size, a, b, ...) (__extension__ ({ \
int##elem_size##_t __attribute__((__vector_size__(vec_size))) simde_shuffle_ = { __VA_ARGS__ }; \
__builtin_shuffle(a, b, simde_shuffle_); \
}))
#endif
#if HEDLEY_GCC_HAS_BUILTIN(__builtin_convertvector,9,0,0)
# define SIMDE__CONVERT_VECTOR(to, from) ((to) = __builtin_convertvector((from), __typeof__(to)))
#endif
#if HEDLEY_HAS_WARNING("-Wbad-function-cast")
# define SIMDE_CONVERT_FTOI(T,v) \
HEDLEY_DIAGNOSTIC_PUSH \
_Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \
HEDLEY_STATIC_CAST(T, (v)) \
HEDLEY_DIAGNOSTIC_POP
#else
# define SIMDE_CONVERT_FTOI(T,v) ((T) (v))
#endif
#if HEDLEY_HAS_WARNING("-Wfloat-equal")
# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"")
#elif HEDLEY_GCC_VERSION_CHECK(3,0,0)
# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
#else
# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL
#endif
/* Some algorithms are iterative, and fewer iterations means less
accuracy. Lower values here will result in faster, but less
accurate, calculations for some functions. */
#if !defined(SIMDE_ACCURACY_ITERS)
# define SIMDE_ACCURACY_ITERS 2
#endif
#if defined(SIMDE__ASSUME_ALIGNED)
# undef SIMDE__ASSUME_ALIGNED
#endif
#if HEDLEY_INTEL_VERSION_CHECK(9,0,0)
# define SIMDE__ASSUME_ALIGNED(ptr, align) __assume_aligned(ptr, align)
#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0)
# define SIMDE__ASSUME_ALIGNED(ptr, align) __assume((((char*) ptr) - ((char*) 0)) % (align) == 0)
#elif HEDLEY_GCC_HAS_BUILTIN(__builtin_assume_aligned,4,7,0)
# define SIMDE__ASSUME_ALIGNED(ptr, align) (ptr = (__typeof__(ptr)) __builtin_assume_aligned((ptr), align))
#elif HEDLEY_CLANG_HAS_BUILTIN(__builtin_assume)
# define SIMDE__ASSUME_ALIGNED(ptr, align) __builtin_assume((((char*) ptr) - ((char*) 0)) % (align) == 0)
#elif HEDLEY_GCC_HAS_BUILTIN(__builtin_unreachable,4,5,0)
# define SIMDE__ASSUME_ALIGNED(ptr, align) ((((char*) ptr) - ((char*) 0)) % (align) == 0) ? (1) : (__builtin_unreachable(), 0)
#else
# define SIMDE__ASSUME_ALIGNED(ptr, align)
#endif
/* This is only to help us implement functions like _mm_undefined_ps. */
#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
# undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_
#endif
#if HEDLEY_HAS_WARNING("-Wuninitialized")
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"")
#elif HEDLEY_GCC_VERSION_CHECK(4,2,0)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"")
#elif HEDLEY_PGI_VERSION_CHECK(19,10,0)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549")
#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)")
#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)")
#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)")
/* #elif \
HEDLEY_TI_VERSION_CHECK(16,9,9) || \
HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \
HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") */
#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)")
#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700))
#endif
/* Sometimes we run into problems with specific versions of compilers
which make the native versions unusable for us. Often this is due
to missing functions, sometimes buggy implementations, etc. These
macros are how we check for specific bugs. As they are fixed we'll
start only defining them for problematic compiler versions. */
#if !defined(SIMDE_IGNORE_COMPILER_BUGS)
# if !HEDLEY_GCC_VERSION_CHECK(4,9,0)
# define SIMDE_BUG_GCC_REV_208793
# endif
# if !HEDLEY_GCC_VERSION_CHECK(5,0,0)
# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */
# endif
# if !HEDLEY_GCC_VERSION_CHECK(4,6,0)
# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */
# endif
# if !HEDLEY_GCC_VERSION_CHECK(10,0,0)
# define SIMDE_BUG_GCC_REV_274313
# endif
# if defined(HEDLEY_EMSCRIPTEN_VERSION)
# define SIMDE_BUG_EMSCRIPTEN_MISSING_IMPL /* Placeholder for (as yet) unfiled issues. */
# define SIMDE_BUG_EMSCRIPTEN_5242
# endif
#endif
HEDLEY_ALWAYS_INLINE static
simde_float32 simde_u32_to_f32(uint32_t val) {
union {
uint32_t u32;
simde_float32 f32;
} u;
u.u32 = val;
return u.f32;
}
HEDLEY_ALWAYS_INLINE static
simde_float64 simde_u64_to_f64(uint64_t val) {
union {
uint64_t u64;
simde_float64 f64;
} u;
u.u64 = val;
return u.f64;
}
#define SIMDE_F32_ALL_SET (simde_u32_to_f32(~UINT32_C(0)))
#define SIMDE_F32_ALL_UNSET (simde_u32_to_f32( UINT32_C(0)))
#define SIMDE_F64_ALL_SET (simde_u64_to_f64(~UINT64_C(0)))
#define SIMDE_F64_ALL_UNSET (simde_u64_to_f64( UINT64_C(0)))
#endif /* !defined(SIMDE_COMMON_H) */
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/* Copyright (c) 2017 Evan Nemerson <evan@nemerson.com>
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if !defined(SIMDE__SSE3_H)
# if !defined(SIMDE__SSE3_H)
# define SIMDE__SSE3_H
# endif
# include "sse2.h"
# if defined(SIMDE_SSE3_NATIVE)
# undef SIMDE_SSE3_NATIVE
# endif
# if defined(SIMDE_SSE3_FORCE_NATIVE)
# define SIMDE_SSE3_NATIVE
# elif defined(__SSE3__) && (!defined(SIMDE_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE))
# define SIMDE_SSE3_NATIVE
# elif defined(__ARM_NEON) && !defined(SIMDE_SSE3_NO_NEON) && !defined(SIMDE_NO_NEON)
# define SIMDE_SSE3_NEON
# endif
# if defined(SIMDE_SSE3_NATIVE) && !defined(SIMDE_SSE2_NATIVE)
# if defined(SIMDE_SSE3_FORCE_NATIVE)
# error Native SSE3 support requires native SSE2 support
# else
# warning Native SSE3 support requires native SSE2 support, disabling
# undef SIMDE_SSE3_NATIVE
# endif
# elif defined(SIMDE_SSE3_NEON) && !defined(SIMDE_SSE2_NEON)
# warning SSE3 NEON support requires SSE2 NEON support, disabling
# undef SIMDE_SSE3_NEON
# endif
# if defined(SIMDE_SSE3_NATIVE)
# include <pmmintrin.h>
# endif
#if !defined(SIMDE_SSE3_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
# define SIMDE_SSE3_ENABLE_NATIVE_ALIASES
#endif
SIMDE__BEGIN_DECLS
SIMDE__FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_addsub_pd (simde__m128d a, simde__m128d b) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128D_FROM_NATIVE(_mm_addsub_pd(a.n, b.n));
#else
simde__m128d r;
for (size_t i = 0 ; i < (sizeof(r.f64) / sizeof(r.f64[0])) ; i += 2) {
r.f64[ i] = a.f64[ i] - b.f64[ i];
r.f64[1 + i] = a.f64[1 + i] + b.f64[1 + i];
}
return r;
#endif
}
#if defined(SIMDE_SSE3_ENABLE_NATIVE_ALIASES)
# define _mm_addsub_pd(a, b) SIMDE__M128D_TO_NATIVE(simde_mm_addsub_pd(SIMDE__M128D_FROM_NATIVE(a), SIMDE__M128D_FROM_NATIVE(b)))
#endif
SIMDE__FUNCTION_ATTRIBUTES
simde__m128
simde_mm_addsub_ps (simde__m128 a, simde__m128 b) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128_FROM_NATIVE(_mm_addsub_ps(a.n, b.n));
#else
return simde_mm_add_ps(a, simde_mm_mul_ps(simde_mm_set_ps( 1.0f, -1.0f, 1.0f, -1.0f), b));
#endif
}
#if defined(SIMDE_SSE3_ENABLE_NATIVE_ALIASES)
# define _mm_addsub_ps(a, b) SIMDE__M128_TO_NATIVE(simde_mm_addsub_ps(SIMDE__M128_FROM_NATIVE(a), SIMDE__M128_FROM_NATIVE(b)))
#endif
SIMDE__FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_hadd_pd (simde__m128d a, simde__m128d b) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128D_FROM_NATIVE(_mm_hadd_pd(a.n, b.n));
#else
simde__m128d r;
r.f64[0] = a.f64[0] + a.f64[1];
r.f64[1] = b.f64[0] + b.f64[1];
return r;
#endif
}
#if defined(SIMDE_SSE3_ENABLE_NATIVE_ALIASES)
# define _mm_hadd_pd(a, b) SIMDE__M128D_TO_NATIVE(simde_mm_hadd_pd(SIMDE__M128D_FROM_NATIVE(a), SIMDE__M128D_FROM_NATIVE(b)))
#endif
SIMDE__FUNCTION_ATTRIBUTES
simde__m128
simde_mm_hadd_ps (simde__m128 a, simde__m128 b) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128_FROM_NATIVE(_mm_hadd_ps(a.n, b.n));
#elif defined(SIMDE_SSE3_NEON)
#if defined(SIMDE_ARCH_AARCH64)
return SIMDE__M128_NEON_C(f32, vpaddq_f32(a.neon_f32, b.neon_f32));
#else
float32x2_t a10 = vget_low_f32(a.neon_f32);
float32x2_t a32 = vget_high_f32(a.neon_f32);
float32x2_t b10 = vget_low_f32(b.neon_f32);
float32x2_t b32 = vget_high_f32(b.neon_f32);
return SIMDE__M128_NEON_C(f32, vcombine_f32(vpadd_f32(a10, a32), vpadd_f32(b10, b32)));
#endif
#else
simde__m128 r;
r.f32[0] = a.f32[0] + a.f32[1];
r.f32[1] = a.f32[2] + a.f32[3];
r.f32[2] = b.f32[0] + b.f32[1];
r.f32[3] = b.f32[2] + b.f32[3];
return r;
#endif
}
#if defined(SIMDE_SSE3_ENABLE_NATIVE_ALIASES)
# define _mm_hadd_ps(a, b) SIMDE__M128_TO_NATIVE(simde_mm_hadd_ps(SIMDE__M128_FROM_NATIVE(a), SIMDE__M128_FROM_NATIVE(b)))
#endif
SIMDE__FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_hsub_pd (simde__m128d a, simde__m128d b) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128D_FROM_NATIVE(_mm_hsub_pd(a.n, b.n));
#else
simde__m128d r;
r.f64[0] = a.f64[0] - a.f64[1];
r.f64[1] = b.f64[0] - b.f64[1];
return r;
#endif
}
#if defined(SIMDE_SSE3_ENABLE_NATIVE_ALIASES)
# define _mm_hsub_pd(a, b) SIMDE__M128D_TO_NATIVE(simde_mm_hsub_pd(SIMDE__M128D_FROM_NATIVE(a), SIMDE__M128D_FROM_NATIVE(b)))
#endif
SIMDE__FUNCTION_ATTRIBUTES
simde__m128
simde_mm_hsub_ps (simde__m128 a, simde__m128 b) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128_FROM_NATIVE(_mm_hsub_ps(a.n, b.n));
#elif defined(SIMDE_SSE3_NEON)
const float32_t mp[] = { 1.0f, -1.0f, 1.0f, -1.0f };
const float32x4_t m = vld1q_f32(mp);
float32x4_t ap = vmulq_f32(a.neon_f32, m);
float32x4_t bp = vmulq_f32(b.neon_f32, m);
float32x2_t ax = vpadd_f32(vget_low_f32(ap), vget_high_f32(ap));
float32x2_t bx = vpadd_f32(vget_low_f32(bp), vget_high_f32(bp));
return SIMDE__M128_NEON_C(f32, vcombine_f32(ax, bx));
#else
simde__m128 r;
r.f32[0] = a.f32[0] - a.f32[1];
r.f32[1] = a.f32[2] - a.f32[3];
r.f32[2] = b.f32[0] - b.f32[1];
r.f32[3] = b.f32[2] - b.f32[3];
return r;
#endif
}
#if defined(SIMDE_SSE3_ENABLE_NATIVE_ALIASES)
# define _mm_hsub_ps(a, b) SIMDE__M128_TO_NATIVE(simde_mm_hsub_ps(SIMDE__M128_FROM_NATIVE(a), SIMDE__M128_FROM_NATIVE(b)))
#endif
SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_lddqu_si128 (simde__m128i const* mem_addr) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128I_FROM_NATIVE(_mm_lddqu_si128(&mem_addr->n));
#elif defined(SIMDE_SSE3_NEON)
return SIMDE__M128I_NEON_C(i32, vld1q_s32((int32_t const*) mem_addr));
#else
simde__m128i r;
memcpy(&r, mem_addr, sizeof(r));
return r;
#endif
}
#if defined(SIMDE_SSE3_ENABLE_NATIVE_ALIASES)
# define _mm_lddqu_si128(mem_addr) SIMDE__M128I_TO_NATIVE(simde_mm_lddqu_si128(mem_addr))
#endif
SIMDE__FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_movedup_pd (simde__m128d a) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128D_FROM_NATIVE(_mm_movedup_pd(a.n));
#else
simde__m128d r;
r.f64[0] = a.f64[0];
r.f64[1] = a.f64[0];
return r;
#endif
}
#if defined(SIMDE_SSE3_ENABLE_NATIVE_ALIASES)
# define _mm_movedup_pd(a) SIMDE__M128D_TO_NATIVE(simde_mm_movedup_pd(SIMDE__M128D_FROM_NATIVE(a)))
#endif
SIMDE__FUNCTION_ATTRIBUTES
simde__m128
simde_mm_movehdup_ps (simde__m128 a) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128_FROM_NATIVE(_mm_movehdup_ps(a.n));
#else
simde__m128 r;
r.f32[0] = a.f32[1];
r.f32[1] = a.f32[1];
r.f32[2] = a.f32[3];
r.f32[3] = a.f32[3];
return r;
#endif
}
#if defined(SIMDE_SSE3_ENABLE_NATIVE_ALIASES)
# define _mm_movehdup_ps(a) SIMDE__M128_TO_NATIVE(simde_mm_movehdup_ps(SIMDE__M128_FROM_NATIVE(a)))
#endif
SIMDE__FUNCTION_ATTRIBUTES
simde__m128
simde_mm_moveldup_ps (simde__m128 a) {
#if defined(SIMDE__SSE3_NATIVE)
return SIMDE__M128_FROM_NATIVE(_mm_moveldup_ps(a.n));
#else
simde__m128 r;
r.f32[0] = a.f32[0];
r.f32[1] = a.f32[0];
r.f32[2] = a.f32[2];
r.f32[3] = a.f32[2];
return r;
#endif
}
#if defined(SIMDE_SSE3_ENABLE_NATIVE_ALIASES)
# define _mm_moveldup_ps(a) SIMDE__M128_TO_NATIVE(simde_mm_moveldup_ps(SIMDE__M128_FROM_NATIVE(a)))
#endif
SIMDE__END_DECLS
#endif /* !defined(SIMDE__SSE3_H) */
This diff is collapsed.
This diff is collapsed.
2to3.patch
helpMakefiles.patch
simde
This diff is collapsed.
......@@ -8,29 +8,84 @@
include /usr/share/dpkg/default.mk
mandir=$(CURDIR)/debian/$(DEB_SOURCE)/usr/share/man/man1/
prefix=$(CURDIR)/debian/$(DEB_SOURCE)/usr
mandir=$(prefix)/share/man/man1
libexecdir=$(prefix)/lib/$(DEB_SOURCE)
# Copy upstream CXXFLAGS here because makefile enables only overriding them
CXXFLAGS += -Wall -Wextra -Wcast-qual -Wswitch-enum -Wundef -Wcast-align -Wno-long-long -ansi -pedantic -std=c++11
CXXFLAGS += -msse4
CPPFLAGS += -DHAS_CXX_THREADS
CXXFLAGS += -Wall -Wextra -Wcast-qual -Wswitch-enum -Wundef -Wcast-align -Wno-long-long -ansi -pedantic -std=c++11 -fopenmp-simd -O3
CPPFLAGS += -DHAS_CXX_THREADS -DSIMDE_ENABLE_OPENMP
# -Wconversion
# -fomit-frame-pointer ?
LDFLAGS += -pthread
NUMJOBS = 1
ifneq (,$(filter parallel=%,$(DEB_BUILD_OPTIONS)))
NUMJOBS = $(patsubst parallel=%,%,$(filter parallel=%,$(DEB_BUILD_OPTIONS)))
MAKEFLAGS += -j$(NUMJOBS)
endif
export DEB_BUILD_MAINT_OPTIONS = hardening=+all
%:
dh $@
override_dh_auto_build:
make all CXXFLAGS="$(CXXFLAGS)" CFLAGS="$(CFLAGS)" CPPFLAGS="$(CPPFLAGS)" LDFLAGS="$(LDFLAGS)"
ifeq (amd64,$(DEB_HOST_ARCH))
mkdir -p $(prefix)
mkdir -p $(libexecdir)
for SIMD in avx2 avx sse4.1 ssse3 sse3 sse2 ; do \
$(MAKE) -j$(NUMJOBS) all SFX=-$${SIMD} CXXFLAGS="$(CXXFLAGS) -m$${SIMD}" CFLAGS="$(CFLAGS) -m$${SIMD}" CPPFLAGS="$(CPPFLAGS)" LDFLAGS="$(LDFLAGS)" ; \
find . -name '*.o' -delete ; \
find . -name '*.o8' -delete ; \
done
else ifeq (i386,$(DEB_HOST_ARCH))
mkdir -p $(prefix)
mkdir -p $(libexecdir)
for SIMD in ssse3 sse3 sse2 sse mmx; do \
$(MAKE) -j$(NUMJOBS) all SFX=-$${SIMD} CXXFLAGS="$(CXXFLAGS) -m$${SIMD}" CFLAGS="$(CFLAGS) -m$${SIMD}" CPPFLAGS="$(CPPFLAGS)" LDFLAGS="$(LDFLAGS)" ; \
find . -name '*.o' -delete ; \
find . -name '*.o8' -delete ; \
done
$(MAKE) -j$(NUMJOBS) all SFX=-plain CXXFLAGS="$(CXXFLAGS)" CFLAGS="$(CFLAGS)" CPPFLAGS="$(CPPFLAGS)" LDFLAGS="$(LDFLAGS)"
else
$(MAKE) -j$(NUMJOBS) all CXXFLAGS="$(CXXFLAGS)" CFLAGS="$(CFLAGS)" CPPFLAGS="$(CPPFLAGS)" LDFLAGS="$(LDFLAGS)"
endif
HELP2MAN = help2man --no-info --version-string="$(DEB_VERSION_UPSTREAM)"
override_dh_auto_install:
make install prefix=$(CURDIR)/debian/$(DEB_SOURCE)/usr
ifeq (amd64,$(DEB_HOST_ARCH))
dh_install debian/bin/simd-dispatch /usr/lib/$(DEB_SOURCE)/
dh_install src/lastdb-* src/lastal-* src/last-split-* src/last-merge-batches-* src/last-pair-probs-* src/lastdb8-* src/lastal8-* src/last-split8-* usr/lib/last-align
dh_install scripts/* usr/bin
mkdir -p $(prefix)/bin
cd $(prefix)/bin \
&& for prog in lastdb lastal last-split last-merge-batches last-pair-probs lastdb8 lastal8 last-split8 ; do \
ln -s ../lib/$(DEB_SOURCE)/simd-dispatch $${prog} ; done
else ifeq (i386,$(DEB_HOST_ARCH))
dh_install debian/bin/simd-dispatch /usr/lib/$(DEB_SOURCE)/
dh_install src/lastdb-* src/lastal-* src/last-split-* src/last-merge-batches-* src/last-pair-probs-* src/lastdb8-* src/lastal8-* src/last-split8-* usr/lib/last-align
dh_install scripts/* usr/bin
mkdir -p $(prefix)/bin
cd $(prefix)/bin \
&& for prog in lastdb lastal last-split last-merge-batches last-pair-probs lastdb8 lastal8 last-split8 ; do \
ln -s ../lib/$(DEB_SOURCE)/simd-dispatch $${prog} ; done
else
$(MAKE) -j$(NUMJOBS) install prefix=$(prefix)
endif
override_dh_auto_clean:
ifeq (amd64,$(DEB_HOST_ARCH))
for SIMD in avx2 avx sse4.1 ssse3 sse3 sse2 ; do \
dh_auto_clean -- SFX=-$${SIMD} ; done
else ifeq (i386,$(DEB_HOST_ARCH))
for SIMD in ssse3 sse3 sse2 sse mmx plain; do \
dh_auto_clean -- SFX=-$${SIMD} ; done
else
dh_auto_clean
endif
override_dh_installman:
dh_installman
......@@ -40,16 +95,16 @@ override_dh_installman:
$(HELP2MAN) \
--name="genome-scale comparison of biological sequences" \
--help-option="-h" \
$(CURDIR)/src/lastal > $(mandir)/lastal.1
$(prefix)/bin/lastal > $(mandir)/lastal.1
$(HELP2MAN) \
--name="genome-scale comparison of biological sequences" \
--help-option="-h" \
$(CURDIR)/src/lastal8 > $(mandir)/lastal8.1
$(prefix)/bin/lastal8 > $(mandir)/lastal8.1
# help2man output needs manual postprocessing - see debian/mans
#$(HELP2MAN) \
# --name="genome-scale comparison of biological sequences" \
# --help-option="-h" \
# $(CURDIR)/src/lastdb > $(mandir)/lastdb.1
# $(prefix)/bin/lastdb > $(mandir)/lastdb.1
$(HELP2MAN) \
--name="Sort MAF-format alignments by sequence name" \
$(CURDIR)/scripts/maf-sort > $(mandir)/maf-sort.1
......@@ -73,10 +128,10 @@ override_dh_installman:
# $(CURDIR)/scripts/last-remove-dominated > $(mandir)/last-remove-dominated.1
$(HELP2MAN) \
--name='Estimates "split" or "spliced" alignments.' \
$(CURDIR)/src/last-split > $(mandir)/last-split.1
$(prefix)/bin/last-split > $(mandir)/last-split.1
$(HELP2MAN) \
--name='Estimates "split" or "spliced" alignments.' \
$(CURDIR)/src/last-split8 > $(mandir)/last-split8.1
$(prefix)/bin/last-split8 > $(mandir)/last-split8.1
$(HELP2MAN) \
--name="Join two or more sets of MAF-format multiple alignments" \
$(CURDIR)/scripts/maf-join > $(mandir)/maf-join.1
......@@ -88,11 +143,11 @@ override_dh_installman:
$(CURDIR)/scripts/fastq-interleave > $(mandir)/fastq-interleave.1
$(HELP2MAN) \
--name="Read files of lastal output, merge corresponding batches, and write them" \
$(CURDIR)/src/last-merge-batches > $(mandir)/last-merge-batches.1
$(prefix)/bin/last-merge-batches > $(mandir)/last-merge-batches.1
# help2man output needs manual postprocessing - see debian/mans
#$(HELP2MAN) \
# --name="Read alignments of paired DNA reads to a genome, and do estimations" \
# $(CURDIR)/src/last-pair-probs > $(mandir)/last-pair-probs.1
# $(prefix)/bin/last-pair-probs > $(mandir)/last-pair-probs.1
$(HELP2MAN) \
--name="Get alignments that have a segment with score >= threshold, with gentle masking of lowercase letters" \
$(CURDIR)/scripts/last-postmask > $(mandir)/last-postmask.1
......