Skip to content
Commits on Source (2)
raxml (8.2.12+dfsg-2) UNRELEASED; urgency=medium
* Team upload.
* Build on non-x86 too, via SIMDE
-- Michael R. Crusoe <michael.crusoe@gmail.com> Mon, 16 Dec 2019 10:48:02 +0100
raxml (8.2.12+dfsg-1) unstable; urgency=medium
* New upstream version
......
......@@ -11,7 +11,7 @@ Vcs-Git: https://salsa.debian.org/med-team/raxml.git
Homepage: http://www.exelixis-lab.org/
Package: raxml
Architecture: amd64 i386 kfreebsd-i386 kfreebsd-amd64
Architecture: any
Depends: ${shlibs:Depends},
${misc:Depends}
Description: Randomized Axelerated Maximum Likelihood of phylogenetic trees
......
......@@ -32,3 +32,23 @@ License: GPL-2+
On Debian systems, the complete text of the GNU General Public
License version 2 can be found in `/usr/share/common-licenses/GPL-2'.
Files: debian/include/simde/*
Copyright: 2013-2019, Evan Nemerson <evan@nemerson.com>
License: MIT
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
.
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
/* Check (assertions)
* Portable Snippets - https://gitub.com/nemequ/portable-snippets
* Created by Evan Nemerson <evan@nemerson.com>
*
* To the extent possible under law, the authors have waived all
* copyright and related or neighboring rights to this code. For
* details, see the Creative Commons Zero 1.0 Universal license at
* https://creativecommons.org/publicdomain/zero/1.0/
*/
#if !defined(SIMDE_CHECK_H)
#define SIMDE_CHECK_H
#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG)
# define SIMDE_NDEBUG 1
#endif
#include <stdint.h>
#if !defined(_WIN32)
# define SIMDE_SIZE_MODIFIER "z"
# define SIMDE_CHAR_MODIFIER "hh"
# define SIMDE_SHORT_MODIFIER "h"
#else
# if defined(_M_X64) || defined(__amd64__)
# define SIMDE_SIZE_MODIFIER "I64"
# else
# define SIMDE_SIZE_MODIFIER ""
# endif
# define SIMDE_CHAR_MODIFIER ""
# define SIMDE_SHORT_MODIFIER ""
#endif
#if defined(_MSC_VER) && (_MSC_VER >= 1500)
# define SIMDE__PUSH_DISABLE_MSVC_C4127 __pragma(warning(push)) __pragma(warning(disable:4127))
# define SIMDE__POP_DISABLE_MSVC_C4127 __pragma(warning(pop))
#else
# define SIMDE__PUSH_DISABLE_MSVC_C4127
# define SIMDE__POP_DISABLE_MSVC_C4127
#endif
#if !defined(simde_errorf)
# include <stdio.h>
# include <stdlib.h>
# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort())
#endif
#define simde_error(msg) simde_errorf("%s", msg)
#if defined(SIMDE_NDEBUG)
# if defined(SIMDE_CHECK_FAIL_DEFINED)
# define simde_assert(expr)
# else
# if defined(HEDLEY_ASSUME)
# define simde_assert(expr) HEDLEY_ASSUME(expr)
# elif HEDLEY_GCC_VERSION_CHECK(4,5,0)
# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1)))
# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0)
# define simde_assert(expr) __assume(expr)
# else
# define simde_assert(expr)
# endif
# endif
# define simde_assert_true(expr) simde_assert(expr)
# define simde_assert_false(expr) simde_assert(!(expr))
# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b)))
# define simde_assert_double_equal(a, b, precision)
# define simde_assert_string_equal(a, b)
# define simde_assert_string_not_equal(a, b)
# define simde_assert_memory_equal(size, a, b)
# define simde_assert_memory_not_equal(size, a, b)
#else
# define simde_assert(expr) \
do { \
if (!HEDLEY_LIKELY(expr)) { \
simde_error("assertion failed: " #expr "\n"); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# define simde_assert_true(expr) \
do { \
if (!HEDLEY_LIKELY(expr)) { \
simde_error("assertion failed: " #expr " is not true\n"); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# define simde_assert_false(expr) \
do { \
if (!HEDLEY_LIKELY(!(expr))) { \
simde_error("assertion failed: " #expr " is not false\n"); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \
do { \
T simde_tmp_a_ = (a); \
T simde_tmp_b_ = (b); \
if (!(simde_tmp_a_ op simde_tmp_b_)) { \
simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \
#a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# define simde_assert_double_equal(a, b, precision) \
do { \
const double simde_tmp_a_ = (a); \
const double simde_tmp_b_ = (b); \
const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \
-(simde_tmp_a_ - simde_tmp_b_) : \
(simde_tmp_a_ - simde_tmp_b_); \
if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \
simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \
#a, #b, simde_tmp_a_, simde_tmp_b_); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# include <string.h>
# define simde_assert_string_equal(a, b) \
do { \
const char* simde_tmp_a_ = a; \
const char* simde_tmp_b_ = b; \
if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \
simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \
#a, #b, simde_tmp_a_, simde_tmp_b_); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# define simde_assert_string_not_equal(a, b) \
do { \
const char* simde_tmp_a_ = a; \
const char* simde_tmp_b_ = b; \
if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \
simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \
#a, #b, simde_tmp_a_, simde_tmp_b_); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# define simde_assert_memory_equal(size, a, b) \
do { \
const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \
const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \
const size_t simde_tmp_size_ = (size); \
if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \
size_t simde_tmp_pos_; \
for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \
if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \
simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \
#a, #b, simde_tmp_pos_); \
break; \
} \
} \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# define simde_assert_memory_not_equal(size, a, b) \
do { \
const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \
const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \
const size_t simde_tmp_size_ = (size); \
if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \
simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \
#a, #b, simde_tmp_size_); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
#endif
#define simde_assert_type(T, fmt, a, op, b) \
simde_assert_type_full("", "", T, fmt, a, op, b)
#define simde_assert_char(a, op, b) \
simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b)
#define simde_assert_uchar(a, op, b) \
simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b)
#define simde_assert_short(a, op, b) \
simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b)
#define simde_assert_ushort(a, op, b) \
simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b)
#define simde_assert_int(a, op, b) \
simde_assert_type(int, "d", a, op, b)
#define simde_assert_uint(a, op, b) \
simde_assert_type(unsigned int, "u", a, op, b)
#define simde_assert_long(a, op, b) \
simde_assert_type(long int, "ld", a, op, b)
#define simde_assert_ulong(a, op, b) \
simde_assert_type(unsigned long int, "lu", a, op, b)
#define simde_assert_llong(a, op, b) \
simde_assert_type(long long int, "lld", a, op, b)
#define simde_assert_ullong(a, op, b) \
simde_assert_type(unsigned long long int, "llu", a, op, b)
#define simde_assert_size(a, op, b) \
simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b)
#define simde_assert_float(a, op, b) \
simde_assert_type(float, "f", a, op, b)
#define simde_assert_double(a, op, b) \
simde_assert_type(double, "g", a, op, b)
#define simde_assert_ptr(a, op, b) \
simde_assert_type(const void*, "p", a, op, b)
#define simde_assert_int8(a, op, b) \
simde_assert_type(int8_t, PRIi8, a, op, b)
#define simde_assert_uint8(a, op, b) \
simde_assert_type(uint8_t, PRIu8, a, op, b)
#define simde_assert_int16(a, op, b) \
simde_assert_type(int16_t, PRIi16, a, op, b)
#define simde_assert_uint16(a, op, b) \
simde_assert_type(uint16_t, PRIu16, a, op, b)
#define simde_assert_int32(a, op, b) \
simde_assert_type(int32_t, PRIi32, a, op, b)
#define simde_assert_uint32(a, op, b) \
simde_assert_type(uint32_t, PRIu32, a, op, b)
#define simde_assert_int64(a, op, b) \
simde_assert_type(int64_t, PRIi64, a, op, b)
#define simde_assert_uint64(a, op, b) \
simde_assert_type(uint64_t, PRIu64, a, op, b)
#define simde_assert_ptr_equal(a, b) \
simde_assert_ptr(a, ==, b)
#define simde_assert_ptr_not_equal(a, b) \
simde_assert_ptr(a, !=, b)
#define simde_assert_null(ptr) \
simde_assert_ptr(ptr, ==, NULL)
#define simde_assert_not_null(ptr) \
simde_assert_ptr(ptr, !=, NULL)
#define simde_assert_ptr_null(ptr) \
simde_assert_ptr(ptr, ==, NULL)
#define simde_assert_ptr_not_null(ptr) \
simde_assert_ptr(ptr, !=, NULL)
#endif /* !defined(SIMDE_CHECK_H) */
This diff is collapsed.
/* Architecture detection
* Created by Evan Nemerson <evan@nemerson.com>
*
* To the extent possible under law, the authors have waived all
* copyright and related or neighboring rights to this code. For
* details, see the Creative Commons Zero 1.0 Universal license at
* <https://creativecommons.org/publicdomain/zero/1.0/>
*
* Different compilers define different preprocessor macros for the
* same architecture. This is an attempt to provide a single
* interface which is usable on any compiler.
*
* In general, a macro named SIMDE_ARCH_* is defined for each
* architecture the CPU supports. When there are multiple possible
* versions, we try to define the macro to the target version. For
* example, if you want to check for i586+, you could do something
* like:
*
* #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5)
* ...
* #endif
*
* You could also just check that SIMDE_ARCH_X86 >= 5 without checking
* if it's defined first, but some compilers may emit a warning about
* an undefined macro being used (e.g., GCC with -Wundef).
*
* This was originally created for SIMDe
* <https://github.com/nemequ/simde> (hence the prefix), but this
* header has no dependencies and may be used anywhere. It is
* originally based on information from
* <https://sourceforge.net/p/predef/wiki/Architectures/>, though it
* has been enhanced with additional information.
*
* If you improve this file, or find a bug, please file the issue at
* <https://github.com/nemequ/simde/issues>. If you copy this into
* your project, even if you change the prefix, please keep the links
* to SIMDe intact so others know where to report issues, submit
* enhancements, and find the latest version. */
#if !defined(SIMDE_ARCH_H)
#define SIMDE_ARCH_H
/* Alpha
<https://en.wikipedia.org/wiki/DEC_Alpha> */
#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA)
# if defined(__alpha_ev6__)
# define SIMDE_ARCH_ALPHA 6
# elif defined(__alpha_ev5__)
# define SIMDE_ARCH_ALPHA 5
# elif defined(__alpha_ev4__)
# define SIMDE_ARCH_ALPHA 4
# else
# define SIMDE_ARCH_ALPHA 1
# endif
#endif
/* Atmel AVR
<https://en.wikipedia.org/wiki/Atmel_AVR> */
#if defined(__AVR_ARCH__)
# define SIMDE_ARCH_AVR __AVR_ARCH__
#endif
/* AMD64 / x86_64
<https://en.wikipedia.org/wiki/X86-64> */
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X66) || defined(_M_AMD64)
# define SIMDE_ARCH_AMD64 1
#endif
/* ARM
<https://en.wikipedia.org/wiki/ARM_architecture> */
#if defined(__ARM_ARCH_8A__)
# define SIMDE_ARCH_ARM 82
#elif defined(__ARM_ARCH_8R__)
# define SIMDE_ARCH_ARM 81
#elif defined(__ARM_ARCH_8__)
# define SIMDE_ARCH_ARM 80
#elif defined(__ARM_ARCH_7S__)
# define SIMDE_ARCH_ARM 74
#elif defined(__ARM_ARCH_7M__)
# define SIMDE_ARCH_ARM 73
#elif defined(__ARM_ARCH_7R__)
# define SIMDE_ARCH_ARM 72
#elif defined(__ARM_ARCH_7A__)
# define SIMDE_ARCH_ARM 71
#elif defined(__ARM_ARCH_7__)
# define SIMDE_ARCH_ARM 70
#elif defined(__ARM_ARCH)
# define SIMDE_ARCH_ARM (__ARM_ARCH * 10)
#elif defined(_M_ARM)
# define SIMDE_ARCH_ARM (_M_ARM * 10)
#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM)
# define SIMDE_ARCH_ARM 1
#endif
/* AArch64
<https://en.wikipedia.org/wiki/ARM_architecture> */
#if defined(__aarch64__) || defined(_M_ARM64)
# define SIMDE_ARCH_AARCH64 10
#endif
/* Blackfin
<https://en.wikipedia.org/wiki/Blackfin> */
#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__)
# define SIMDE_ARCH_BLACKFIN 1
#endif
/* CRIS
<https://en.wikipedia.org/wiki/ETRAX_CRIS> */
#if defined(__CRIS_arch_version)
# define SIMDE_ARCH_CRIS __CRIS_arch_version
#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__)
# define SIMDE_ARCH_CRIS 1
#endif
/* Convex
<https://en.wikipedia.org/wiki/Convex_Computer> */
#if defined(__convex_c38__)
# define SIMDE_ARCH_CONVEX 38
#elif defined(__convex_c34__)
# define SIMDE_ARCH_CONVEX 34
#elif defined(__convex_c32__)
# define SIMDE_ARCH_CONVEX 32
#elif defined(__convex_c2__)
# define SIMDE_ARCH_CONVEX 2
#elif defined(__convex__)
# define SIMDE_ARCH_CONVEX 1
#endif
/* Adapteva Epiphany
<https://en.wikipedia.org/wiki/Adapteva_Epiphany> */
#if defined(__epiphany__)
# define SIMDE_ARCH_EPIPHANY 1
#endif
/* Fujitsu FR-V
<https://en.wikipedia.org/wiki/FR-V_(microprocessor)> */
#if defined(__frv__)
# define SIMDE_ARCH_FRV 1
#endif
/* H8/300
<https://en.wikipedia.org/wiki/H8_Family> */
#if defined(__H8300__)
# define SIMDE_ARCH_H8300
#endif
/* HP/PA / PA-RISC
<https://en.wikipedia.org/wiki/PA-RISC> */
#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0)
# define SIMDE_ARCH_HPPA 20
#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1)
# define SIMDE_ARCH_HPPA 11
#elif defined(_PA_RISC1_0)
# define SIMDE_ARCH_HPPA 10
#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa)
# define SIMDE_ARCH_HPPA 1
#endif
/* x86
<https://en.wikipedia.org/wiki/X86> */
#if defined(_M_IX86)
# define SIMDE_ARCH_X86 (_M_IX86 / 100)
#elif defined(__I86__)
# define SIMDE_ARCH_X86 __I86__
#elif defined(i686) || defined(__i686) || defined(__i686__)
# define SIMDE_ARCH_X86 6
#elif defined(i586) || defined(__i586) || defined(__i586__)
# define SIMDE_ARCH_X86 5
#elif defined(i486) || defined(__i486) || defined(__i486__)
# define SIMDE_ARCH_X86 4
#elif defined(i386) || defined(__i386) || defined(__i386__)
# define SIMDE_ARCH_X86 3
#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__)
# define SIMDE_ARCH_X86 3
#endif
/* Itanium
<https://en.wikipedia.org/wiki/Itanium> */
#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__)
# define SIMDE_ARCH_IA64 1
#endif
/* Renesas M32R
<https://en.wikipedia.org/wiki/M32R> */
#if defined(__m32r__) || defined(__M32R__)
# define SIMDE_ARCH_M32R
#endif
/* Motorola 68000
<https://en.wikipedia.org/wiki/Motorola_68000> */
#if defined(__mc68060__) || defined(__MC68060__)
# define SIMDE_ARCH_M68K 68060
#elif defined(__mc68040__) || defined(__MC68040__)
# define SIMDE_ARCH_M68K 68040
#elif defined(__mc68030__) || defined(__MC68030__)
# define SIMDE_ARCH_M68K 68030
#elif defined(__mc68020__) || defined(__MC68020__)
# define SIMDE_ARCH_M68K 68020
#elif defined(__mc68010__) || defined(__MC68010__)
# define SIMDE_ARCH_M68K 68010
#elif defined(__mc68000__) || defined(__MC68000__)
# define SIMDE_ARCH_M68K 68000
#endif
/* Xilinx MicroBlaze
<https://en.wikipedia.org/wiki/MicroBlaze> */
#if defined(__MICROBLAZE__) || defined(__microblaze__)
# define SIMDE_ARCH_MICROBLAZE
#endif
/* MIPS
<https://en.wikipedia.org/wiki/MIPS_architecture> */
#if defined(_MIPS_ISA_MIPS64R2)
# define SIMDE_ARCH_MIPS 642
#elif defined(_MIPS_ISA_MIPS64)
# define SIMDE_ARCH_MIPS 640
#elif defined(_MIPS_ISA_MIPS32R2)
# define SIMDE_ARCH_MIPS 322
#elif defined(_MIPS_ISA_MIPS32)
# define SIMDE_ARCH_MIPS 320
#elif defined(_MIPS_ISA_MIPS4)
# define SIMDE_ARCH_MIPS 4
#elif defined(_MIPS_ISA_MIPS3)
# define SIMDE_ARCH_MIPS 3
#elif defined(_MIPS_ISA_MIPS2)
# define SIMDE_ARCH_MIPS 2
#elif defined(_MIPS_ISA_MIPS1)
# define SIMDE_ARCH_MIPS 1
#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__)
# define SIMDE_ARCH_MIPS 1
#endif
/* Matsushita MN10300
<https://en.wikipedia.org/wiki/MN103> */
#if defined(__MN10300__) || defined(__mn10300__)
# define SIMDE_ARCH_MN10300 1
#endif
/* POWER
<https://en.wikipedia.org/wiki/IBM_POWER_Instruction_Set_Architecture> */
#if defined(_M_PPC)
# define SIMDE_ARCH_POWER _M_PPC
#elif defined(_ARCH_PWR8)
# define SIMDE_ARCH_POWER 800
#elif defined(_ARCH_PWR7)
# define SIMDE_ARCH_POWER 700
#elif defined(_ARCH_PWR6)
# define SIMDE_ARCH_POWER 600
#elif defined(_ARCH_PWR5)
# define SIMDE_ARCH_POWER 500
#elif defined(_ARCH_PWR4)
# define SIMDE_ARCH_POWER 400
#elif defined(_ARCH_440) || defined(__ppc440__)
# define SIMDE_ARCH_POWER 440
#elif defined(_ARCH_450) || defined(__ppc450__)
# define SIMDE_ARCH_POWER 450
#elif defined(_ARCH_601) || defined(__ppc601__)
# define SIMDE_ARCH_POWER 601
#elif defined(_ARCH_603) || defined(__ppc603__)
# define SIMDE_ARCH_POWER 603
#elif defined(_ARCH_604) || defined(__ppc604__)
# define SIMDE_ARCH_POWER 604
#elif defined(_ARCH_605) || defined(__ppc605__)
# define SIMDE_ARCH_POWER 605
#elif defined(_ARCH_620) || defined(__ppc620__)
# define SIMDE_ARCH_POWER 620
#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc)
# define SIMDE_ARCH_POWER 1
#endif
/* SPARC
<https://en.wikipedia.org/wiki/SPARC> */
#if defined(__sparc_v9__) || defined(__sparcv9)
# define SIMDE_ARCH_SPARC 9
#elif defined(__sparc_v8__) || defined(__sparcv8)
# define SIMDE_ARCH_SPARC 8
#elif defined(__sparc_v7__) || defined(__sparcv7)
# define SIMDE_ARCH_SPARC 7
#elif defined(__sparc_v6__) || defined(__sparcv6)
# define SIMDE_ARCH_SPARC 6
#elif defined(__sparc_v5__) || defined(__sparcv5)
# define SIMDE_ARCH_SPARC 5
#elif defined(__sparc_v4__) || defined(__sparcv4)
# define SIMDE_ARCH_SPARC 4
#elif defined(__sparc_v3__) || defined(__sparcv3)
# define SIMDE_ARCH_SPARC 3
#elif defined(__sparc_v2__) || defined(__sparcv2)
# define SIMDE_ARCH_SPARC 2
#elif defined(__sparc_v1__) || defined(__sparcv1)
# define SIMDE_ARCH_SPARC 1
#elif defined(__sparc__) || defined(__sparc)
# define SIMDE_ARCH_SPARC 1
#endif
/* SuperH
<https://en.wikipedia.org/wiki/SuperH> */
#if defined(__sh5__) || defined(__SH5__)
# define SIMDE_ARCH_SUPERH 5
#elif defined(__sh4__) || defined(__SH4__)
# define SIMDE_ARCH_SUPERH 4
#elif defined(__sh3__) || defined(__SH3__)
# define SIMDE_ARCH_SUPERH 3
#elif defined(__sh2__) || defined(__SH2__)
# define SIMDE_ARCH_SUPERH 2
#elif defined(__sh1__) || defined(__SH1__)
# define SIMDE_ARCH_SUPERH 1
#elif defined(__sh__) || defined(__SH__)
# define SIMDE_ARCH_SUPERH 1
#endif
/* IBM System z
<https://en.wikipedia.org/wiki/IBM_System_z> */
#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__)
# define SIMDE_ARCH_SYSTEMZ
#endif
/* TMS320 DSP
<https://en.wikipedia.org/wiki/Texas_Instruments_TMS320> */
#if defined(_TMS320C6740) || defined(__TMS320C6740__)
# define SIMDE_ARCH_TMS320 6740
#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__)
# define SIMDE_ARCH_TMS320 6701
#elif defined(_TMS320C6700) || defined(__TMS320C6700__)
# define SIMDE_ARCH_TMS320 6700
#elif defined(_TMS320C6600) || defined(__TMS320C6600__)
# define SIMDE_ARCH_TMS320 6600
#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__)
# define SIMDE_ARCH_TMS320 6401
#elif defined(_TMS320C6400) || defined(__TMS320C6400__)
# define SIMDE_ARCH_TMS320 6400
#elif defined(_TMS320C6200) || defined(__TMS320C6200__)
# define SIMDE_ARCH_TMS320 6200
#elif defined(_TMS320C55X) || defined(__TMS320C55X__)
# define SIMDE_ARCH_TMS320 550
#elif defined(_TMS320C54X) || defined(__TMS320C54X__)
# define SIMDE_ARCH_TMS320 540
#elif defined(_TMS320C28X) || defined(__TMS320C28X__)
# define SIMDE_ARCH_TMS320 280
#endif
/* Xtensa
<https://en.wikipedia.org/wiki/> */
#if defined(__xtensa__) || defined(__XTENSA__)
# define SIMDE_ARCH_XTENSA 1
#endif
#endif /* !defined(SIMDE_ARCH_H) */
/* Copyright (c) 2017-2019 Evan Nemerson <evan@nemerson.com>
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if !defined(SIMDE_COMMON_H)
#define SIMDE_COMMON_H
#include "hedley.h"
#include "check.h"
#include "simde-arch.h"
#if \
HEDLEY_HAS_ATTRIBUTE(aligned) || \
HEDLEY_GCC_VERSION_CHECK(2,95,0) || \
HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \
HEDLEY_IBM_VERSION_CHECK(11,1,0) || \
HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
HEDLEY_PGI_VERSION_CHECK(19,4,0) || \
HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \
HEDLEY_TI_VERSION_CHECK(8,1,0)
# define SIMDE_ALIGN(alignment) __attribute__((aligned(alignment)))
#elif defined(_MSC_VER) && (!defined(_M_IX86) || defined(_M_AMD64))
# define SIMDE_ALIGN(alignment) __declspec(align(alignment))
#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
# define SIMDE_ALIGN(alignment) _Alignas(alignment)
#elif defined(__cplusplus) && (__cplusplus >= 201103L)
# define SIMDE_ALIGN(alignment) alignas(alignment)
#else
# define SIMDE_ALIGN(alignment)
#endif
#define simde_assert_aligned(alignment, val) \
simde_assert_int(((uintptr_t) (val)) % (alignment), ==, 0)
/* TODO: this should really do something like
HEDLEY_STATIC_CAST(T, (simde_assert_int(alignment, v), v))
but I need to think about how to handle it in all compilers...
may end up moving to Hedley, too. */
#if HEDLEY_HAS_BUILTIN(__builtin_assume_aligned)
# define SIMDE_CAST_ALIGN(alignment, T, v) ((T) __builtin_assume_aligned(v, alignment))
#elif HEDLEY_HAS_WARNING("-Wcast-align")
# define SIMDE_CAST_ALIGN(alignment, T, v) \
HEDLEY_DIAGNOSTIC_PUSH \
_Pragma("clang diagnostic ignored \"-Wcast-align\"") \
HEDLEY_STATIC_CAST(T, v) \
HEDLEY_DIAGNOSTIC_POP
#else
# define SIMDE_CAST_ALIGN(alignment, T, v) HEDLEY_STATIC_CAST(T, v)
#endif
#if HEDLEY_GCC_HAS_ATTRIBUTE(vector_size,4,6,0)
# define SIMDE__ENABLE_GCC_VEC_EXT
#endif
#if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L)))
# define SIMDE_ENABLE_OPENMP
#endif
#if !defined(SIMDE_ENABLE_CILKPLUS) && defined(__cilk)
# define SIMDE_ENABLE_CILKPLUS
#endif
#if defined(SIMDE_ENABLE_OPENMP)
# define SIMDE__VECTORIZE _Pragma("omp simd")
# define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l))
# define SIMDE__VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r))
# define SIMDE__VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a))
#elif defined(SIMDE_ENABLE_CILKPLUS)
# define SIMDE__VECTORIZE _Pragma("simd")
# define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l))
# define SIMDE__VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r))
# define SIMDE__VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a))
#elif defined(__INTEL_COMPILER)
# define SIMDE__VECTORIZE _Pragma("simd")
# define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l))
# define SIMDE__VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r))
# define SIMDE__VECTORIZE_ALIGNED(a)
#elif defined(__clang__)
# define SIMDE__VECTORIZE _Pragma("clang loop vectorize(enable)")
# define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l))
# define SIMDE__VECTORIZE_REDUCTION(r) SIMDE__VECTORIZE
# define SIMDE__VECTORIZE_ALIGNED(a)
#elif HEDLEY_GCC_VERSION_CHECK(4,9,0)
# define SIMDE__VECTORIZE _Pragma("GCC ivdep")
# define SIMDE__VECTORIZE_SAFELEN(l) SIMDE__VECTORIZE
# define SIMDE__VECTORIZE_REDUCTION(r) SIMDE__VECTORIZE
# define SIMDE__VECTORIZE_ALIGNED(a)
#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0)
# define SIMDE__VECTORIZE _Pragma("_CRI ivdep")
# define SIMDE__VECTORIZE_SAFELEN(l) SIMDE__VECTORIZE
# define SIMDE__VECTORIZE_REDUCTION(r) SIMDE__VECTORIZE
# define SIMDE__VECTORIZE_ALIGNED(a)
#else
# define SIMDE__VECTORIZE
# define SIMDE__VECTORIZE_SAFELEN(l)
# define SIMDE__VECTORIZE_REDUCTION(r)
# define SIMDE__VECTORIZE_ALIGNED(a)
#endif
#if HEDLEY_GCC_HAS_ATTRIBUTE(unused,3,1,0)
# define SIMDE__UNUSED __attribute__((__unused__))
#else
# define SIMDE__UNUSED
#endif
#if HEDLEY_GCC_HAS_ATTRIBUTE(artificial,4,3,0)
# define SIMDE__ARTIFICIAL __attribute__((__artificial__))
#else
# define SIMDE__ARTIFICIAL
#endif
/* Intended for checking coverage, you should never use this in
production. */
#if defined(SIMDE_NO_INLINE)
# define SIMDE__FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE SIMDE__UNUSED static
#else
# define SIMDE__FUNCTION_ATTRIBUTES HEDLEY_INLINE SIMDE__ARTIFICIAL static
#endif
#if defined(_MSC_VER)
# define SIMDE__BEGIN_DECLS HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS
# define SIMDE__END_DECLS HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS
#else
# define SIMDE__BEGIN_DECLS HEDLEY_BEGIN_C_DECLS
# define SIMDE__END_DECLS HEDLEY_END_C_DECLS
#endif
#if defined(__SIZEOF_INT128__)
# define SIMDE__HAVE_INT128
typedef __int128 simde_int128;
typedef unsigned __int128 simde_uint128;
#endif
/* TODO: we should at least make an attempt to detect the correct
types for simde_float32/float64 instead of just assuming float and
double. */
#if !defined(SIMDE_FLOAT32_TYPE)
# define SIMDE_FLOAT32_TYPE float
# define SIMDE_FLOAT32_C(value) value##f
#else
# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value)
#endif
typedef SIMDE_FLOAT32_TYPE simde_float32;
HEDLEY_STATIC_ASSERT(sizeof(simde_float32) == 4, "Unable to find 32-bit floating-point type.");
#if !defined(SIMDE_FLOAT64_TYPE)
# define SIMDE_FLOAT64_TYPE double
# define SIMDE_FLOAT64_C(value) value
#else
# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT64_TYPE) value)
#endif
typedef SIMDE_FLOAT64_TYPE simde_float64;
HEDLEY_STATIC_ASSERT(sizeof(simde_float64) == 8, "Unable to find 64-bit floating-point type.");
/* Whether to assume that the compiler can auto-vectorize reasonably
well. This will cause SIMDe to attempt to compose vector
operations using more simple vector operations instead of minimize
serial work.
As an example, consider the _mm_add_ss(a, b) function from SSE,
which returns { a0 + b0, a1, a2, a3 }. This pattern is repeated
for other operations (sub, mul, etc.).
The naïve implementation would result in loading a0 and b0, adding
them into a temporary variable, then splicing that value into a new
vector with the remaining elements from a.
On platforms which support vectorization, it's generally faster to
simply perform the operation on the entire vector to avoid having
to move data between SIMD registers and non-SIMD registers.
Basically, instead of the temporary variable being (a0 + b0) it
would be a vector of (a + b), which is then combined with a to form
the result.
By default, SIMDe will prefer the pure-vector versions if we detect
a vector ISA extension, but this can be overridden by defining
SIMDE_NO_ASSUME_VECTORIZATION. You can also define
SIMDE_ASSUME_VECTORIZATION if you want to force SIMDe to use the
vectorized version. */
#if !defined(SIMDE_NO_ASSUME_VECTORIZATION) && !defined(SIMDE_ASSUME_VECTORIZATION)
# if defined(__SSE__) || defined(__ARM_NEON) || defined(__mips_msa) || defined(__ALTIVEC__)
# define SIMDE_ASSUME_VECTORIZATION
# endif
#endif
/* GCC and clang have built-in functions to handle shuffling of
vectors, but the implementations are slightly different. This
macro is just an abstraction over them. Note that elem_size is in
bits but vec_size is in bytes. */
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
# define SIMDE__SHUFFLE_VECTOR(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__)
#elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER)
# define SIMDE__SHUFFLE_VECTOR(elem_size, vec_size, a, b, ...) __builtin_shuffle(a, b, (int##elem_size##_t __attribute__((__vector_size__(vec_size)))) { __VA_ARGS__ })
#endif
#if HEDLEY_GCC_HAS_BUILTIN(__builtin_convertvector,9,0,0)
# define SIMDE__CONVERT_VECTOR(to, from) ((to) = __builtin_convertvector((from), __typeof__(to)))
#endif
#if HEDLEY_HAS_WARNING("-Wbad-function-cast")
# define SIMDE_CONVERT_FTOI(T,v) \
HEDLEY_DIAGNOSTIC_PUSH \
_Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \
((T) (v)) \
HEDLEY_DIAGNOSTIC_POP
#else
# define SIMDE_CONVERT_FTOI(T,v) ((T) (v))
#endif
/* Some algorithms are iterative, and fewer iterations means less
accuracy. Lower values here will result in faster, but less
accurate, calculations for some functions. */
#if !defined(SIMDE_ACCURACY_ITERS)
# define SIMDE_ACCURACY_ITERS 2
#endif
#if defined(SIMDE__ASSUME_ALIGNED)
# undef SIMDE__ASSUME_ALIGNED
#endif
#if HEDLEY_INTEL_VERSION_CHECK(9,0,0)
# define SIMDE__ASSUME_ALIGNED(ptr, align) __assume_aligned(ptr, align)
#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0)
# define SIMDE__ASSUME_ALIGNED(ptr, align) __assume((((char*) ptr) - ((char*) 0)) % (align) == 0)
#elif HEDLEY_GCC_HAS_BUILTIN(__builtin_assume_aligned,4,7,0)
# define SIMDE__ASSUME_ALIGNED(ptr, align) (ptr = (__typeof__(ptr)) __builtin_assume_aligned((ptr), align))
#elif HEDLEY_CLANG_HAS_BUILTIN(__builtin_assume)
# define SIMDE__ASSUME_ALIGNED(ptr, align) __builtin_assume((((char*) ptr) - ((char*) 0)) % (align) == 0)
#elif HEDLEY_GCC_HAS_BUILTIN(__builtin_unreachable,4,5,0)
# define SIMDE__ASSUME_ALIGNED(ptr, align) ((((char*) ptr) - ((char*) 0)) % (align) == 0) ? (1) : (__builtin_unreachable(), 0)
#else
# define SIMDE__ASSUME_ALIGNED(ptr, align)
#endif
/* This is only to help us implement functions like _mm_undefined_ps. */
#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
# undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_
#endif
#if HEDLEY_HAS_WARNING("-Wuninitialized")
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"")
#elif HEDLEY_GCC_VERSION_CHECK(4,2,0)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"")
#elif HEDLEY_PGI_VERSION_CHECK(19,10,0)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549")
#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)")
#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)")
#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)")
/* #elif \
HEDLEY_TI_VERSION_CHECK(16,9,9) || \
HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \
HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") */
#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)")
#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700))
#endif
/* Sometimes we run into problems with specific versions of compilers
which make the native versions unusable for us. Often this is due
to missing functions, sometimes buggy implementations, etc. These
macros are how we check for specific bugs. As they are fixed we'll
start only defining them for problematic compiler versions. */
#if !defined(SIMDE_IGNORE_COMPILER_BUGS)
# if !HEDLEY_GCC_VERSION_CHECK(4,9,0)
# define SIMDE_BUG_GCC_REV_208793
# endif
# if !HEDLEY_GCC_VERSION_CHECK(5,0,0)
# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */
# endif
# if !HEDLEY_GCC_VERSION_CHECK(4,6,0)
# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */
# endif
# if !HEDLEY_GCC_VERSION_CHECK(10,0,0)
# define SIMDE_BUG_GCC_REV_274313
# endif
# if defined(HEDLEY_EMSCRIPTEN_VERSION)
# define SIMDE_BUG_EMSCRIPTEN_MISSING_IMPL /* Placeholder for (as yet) unfiled issues. */
# define SIMDE_BUG_EMSCRIPTEN_5242
# endif
#endif
#if !defined(__cplusplus)
#define SIMDE_F64_ALL_SET (((union { uint64_t u64; simde_float64 f64; }) { .u64 = ~UINT64_C(0x0) }).f64)
#define SIMDE_F64_ALL_UNSET (((union { uint64_t u64; simde_float64 f64; }) { .u64 = UINT64_C(0x0) }).f64)
#define SIMDE_F32_ALL_SET (((union { uint32_t u32; simde_float32 f32; }) { .u32 = ~UINT32_C(0x0) }).f32)
#define SIMDE_F32_ALL_UNSET (((union { uint32_t u32; simde_float32 f32; }) { .u32 = UINT32_C(0x0) }).f32)
#else
static const union { uint64_t u64; simde_float64 f64; } simde_f64_all_set = { .u64 = ~UINT64_C(0) };
static const union { uint64_t u64; simde_float64 f64; } simde_f64_all_unset = { .u64 = UINT64_C(0) };
static const union { uint64_t u32; simde_float64 f32; } simde_f32_all_set = { .u32 = ~UINT32_C(0) };
static const union { uint64_t u32; simde_float64 f32; } simde_f32_all_unset = { .u32 = UINT32_C(0) };
# define SIMDE_F64_ALL_SET (simde_f64_all_set.f64)
# define SIMDE_F64_ALL_UNSET (simde_f64_all_unset.f64)
# define SIMDE_F32_ALL_SET (simde_f32_all_set.f32)
# define SIMDE_F32_ALL_UNSET (simde_f32_all_unset.f32)
#endif
#endif /* !defined(SIMDE_COMMON_H) */
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/* Copyright (c) 2017 Evan Nemerson <evan@nemerson.com>
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if !defined(SIMDE__SSE3_H)
# if !defined(SIMDE__SSE3_H)
# define SIMDE__SSE3_H
# endif
# include "sse2.h"
# if defined(SIMDE_SSE3_NATIVE)
# undef SIMDE_SSE3_NATIVE
# endif
# if defined(SIMDE_SSE3_FORCE_NATIVE)
# define SIMDE_SSE3_NATIVE
# elif defined(__SSE3__) && (!defined(SIMDE_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE))
# define SIMDE_SSE3_NATIVE
# elif defined(__ARM_NEON) && !defined(SIMDE_SSE3_NO_NEON) && !defined(SIMDE_NO_NEON)
# define SIMDE_SSE3_NEON
# endif
# if defined(SIMDE_SSE3_NATIVE) && !defined(SIMDE_SSE2_NATIVE)
# if defined(SIMDE_SSE3_FORCE_NATIVE)
# error Native SSE3 support requires native SSE2 support
# else
# warning Native SSE3 support requires native SSE2 support, disabling
# undef SIMDE_SSE3_NATIVE
# endif
# elif defined(SIMDE_SSE3_NEON) && !defined(SIMDE_SSE2_NEON)
# warning SSE3 NEON support requires SSE2 NEON support, disabling
# undef SIMDE_SSE3_NEON
# endif
# if defined(SIMDE_SSE3_NATIVE)
# include <pmmintrin.h>
# endif
#if !defined(SIMDE_SSE3_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
# define SIMDE_SSE3_ENABLE_NATIVE_ALIASES
#endif
SIMDE__BEGIN_DECLS
SIMDE__FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_addsub_pd (simde__m128d a, simde__m128d b) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128D_FROM_NATIVE(_mm_addsub_pd(a.n, b.n));
#else
simde__m128d r;
for (size_t i = 0 ; i < (sizeof(r.f64) / sizeof(r.f64[0])) ; i += 2) {
r.f64[ i] = a.f64[ i] - b.f64[ i];
r.f64[1 + i] = a.f64[1 + i] + b.f64[1 + i];
}
return r;
#endif
}
#if defined(SIMDE_SSE3_ENABLE_NATIVE_ALIASES)
# define _mm_addsub_pd(a, b) SIMDE__M128D_TO_NATIVE(simde_mm_addsub_pd(SIMDE__M128D_FROM_NATIVE(a), SIMDE__M128D_FROM_NATIVE(b)))
#endif
SIMDE__FUNCTION_ATTRIBUTES
simde__m128
simde_mm_addsub_ps (simde__m128 a, simde__m128 b) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128_FROM_NATIVE(_mm_addsub_ps(a.n, b.n));
#else
return simde_mm_add_ps(a, simde_mm_mul_ps(simde_mm_set_ps( 1.0f, -1.0f, 1.0f, -1.0f), b));
#endif
}
#if defined(SIMDE_SSE3_ENABLE_NATIVE_ALIASES)
# define _mm_addsub_ps(a, b) SIMDE__M128_TO_NATIVE(simde_mm_addsub_ps(SIMDE__M128_FROM_NATIVE(a), SIMDE__M128_FROM_NATIVE(b)))
#endif
SIMDE__FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_hadd_pd (simde__m128d a, simde__m128d b) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128D_FROM_NATIVE(_mm_hadd_pd(a.n, b.n));
#else
simde__m128d r;
r.f64[0] = a.f64[0] + a.f64[1];
r.f64[1] = b.f64[0] + b.f64[1];
return r;
#endif
}
#if defined(SIMDE_SSE3_ENABLE_NATIVE_ALIASES)
# define _mm_hadd_pd(a, b) SIMDE__M128D_TO_NATIVE(simde_mm_hadd_pd(SIMDE__M128D_FROM_NATIVE(a), SIMDE__M128D_FROM_NATIVE(b)))
#endif
SIMDE__FUNCTION_ATTRIBUTES
simde__m128
simde_mm_hadd_ps (simde__m128 a, simde__m128 b) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128_FROM_NATIVE(_mm_hadd_ps(a.n, b.n));
#elif defined(SIMDE_SSE3_NEON)
#if defined(SIMDE_ARCH_AARCH64)
return SIMDE__M128_NEON_C(f32, vpaddq_f32(a.neon_f32, b.neon_f32));
#else
float32x2_t a10 = vget_low_f32(a.neon_f32);
float32x2_t a32 = vget_high_f32(a.neon_f32);
float32x2_t b10 = vget_low_f32(b.neon_f32);
float32x2_t b32 = vget_high_f32(b.neon_f32);
return SIMDE__M128_NEON_C(f32, vcombine_f32(vpadd_f32(a10, a32), vpadd_f32(b10, b32)));
#endif
#else
simde__m128 r;
r.f32[0] = a.f32[0] + a.f32[1];
r.f32[1] = a.f32[2] + a.f32[3];
r.f32[2] = b.f32[0] + b.f32[1];
r.f32[3] = b.f32[2] + b.f32[3];
return r;
#endif
}
#if defined(SIMDE_SSE3_ENABLE_NATIVE_ALIASES)
# define _mm_hadd_ps(a, b) SIMDE__M128_TO_NATIVE(simde_mm_hadd_ps(SIMDE__M128_FROM_NATIVE(a), SIMDE__M128_FROM_NATIVE(b)))
#endif
SIMDE__FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_hsub_pd (simde__m128d a, simde__m128d b) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128D_FROM_NATIVE(_mm_hsub_pd(a.n, b.n));
#else
simde__m128d r;
r.f64[0] = a.f64[0] - a.f64[1];
r.f64[1] = b.f64[0] - b.f64[1];
return r;
#endif
}
#if defined(SIMDE_SSE3_ENABLE_NATIVE_ALIASES)
# define _mm_hsub_pd(a, b) SIMDE__M128D_TO_NATIVE(simde_mm_hsub_pd(SIMDE__M128D_FROM_NATIVE(a), SIMDE__M128D_FROM_NATIVE(b)))
#endif
SIMDE__FUNCTION_ATTRIBUTES
simde__m128
simde_mm_hsub_ps (simde__m128 a, simde__m128 b) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128_FROM_NATIVE(_mm_hsub_ps(a.n, b.n));
#elif defined(SIMDE_SSE3_NEON)
const float32_t mp[] = { 1.0f, -1.0f, 1.0f, -1.0f };
const float32x4_t m = vld1q_f32(mp);
float32x4_t ap = vmulq_f32(a.neon_f32, m);
float32x4_t bp = vmulq_f32(b.neon_f32, m);
float32x2_t ax = vpadd_f32(vget_low_f32(ap), vget_high_f32(ap));
float32x2_t bx = vpadd_f32(vget_low_f32(bp), vget_high_f32(bp));
return SIMDE__M128_NEON_C(f32, vcombine_f32(ax, bx));
#else
simde__m128 r;
r.f32[0] = a.f32[0] - a.f32[1];
r.f32[1] = a.f32[2] - a.f32[3];
r.f32[2] = b.f32[0] - b.f32[1];
r.f32[3] = b.f32[2] - b.f32[3];
return r;
#endif
}
#if defined(SIMDE_SSE3_ENABLE_NATIVE_ALIASES)
# define _mm_hsub_ps(a, b) SIMDE__M128_TO_NATIVE(simde_mm_hsub_ps(SIMDE__M128_FROM_NATIVE(a), SIMDE__M128_FROM_NATIVE(b)))
#endif
SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_lddqu_si128 (simde__m128i const* mem_addr) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128I_FROM_NATIVE(_mm_lddqu_si128(&mem_addr->n));
#elif defined(SIMDE_SSE3_NEON)
return SIMDE__M128I_NEON_C(i32, vld1q_s32((int32_t const*) mem_addr));
#else
simde__m128i r;
memcpy(&r, mem_addr, sizeof(r));
return r;
#endif
}
#if defined(SIMDE_SSE3_ENABLE_NATIVE_ALIASES)
# define _mm_lddqu_si128(mem_addr) SIMDE__M128I_TO_NATIVE(simde_mm_lddqu_si128(mem_addr))
#endif
SIMDE__FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_movedup_pd (simde__m128d a) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128D_FROM_NATIVE(_mm_movedup_pd(a.n));
#else
simde__m128d r;
r.f64[0] = a.f64[0];
r.f64[1] = a.f64[0];
return r;
#endif
}
#if defined(SIMDE_SSE3_ENABLE_NATIVE_ALIASES)
# define _mm_movedup_pd(a) SIMDE__M128D_TO_NATIVE(simde_mm_movedup_pd(SIMDE__M128D_FROM_NATIVE(a)))
#endif
SIMDE__FUNCTION_ATTRIBUTES
simde__m128
simde_mm_movehdup_ps (simde__m128 a) {
#if defined(SIMDE_SSE3_NATIVE)
return SIMDE__M128_FROM_NATIVE(_mm_movehdup_ps(a.n));
#else
simde__m128 r;
r.f32[0] = a.f32[1];
r.f32[1] = a.f32[1];
r.f32[2] = a.f32[3];
r.f32[3] = a.f32[3];
return r;
#endif
}
#if defined(SIMDE_SSE3_ENABLE_NATIVE_ALIASES)
# define _mm_movehdup_ps(a) SIMDE__M128_TO_NATIVE(simde_mm_movehdup_ps(SIMDE__M128_FROM_NATIVE(a)))
#endif
SIMDE__FUNCTION_ATTRIBUTES
simde__m128
simde_mm_moveldup_ps (simde__m128 a) {
#if defined(SIMDE__SSE3_NATIVE)
return SIMDE__M128_FROM_NATIVE(_mm_moveldup_ps(a.n));
#else
simde__m128 r;
r.f32[0] = a.f32[0];
r.f32[1] = a.f32[0];
r.f32[2] = a.f32[2];
r.f32[3] = a.f32[2];
return r;
#endif
}
#if defined(SIMDE_SSE3_ENABLE_NATIVE_ALIASES)
# define _mm_moveldup_ps(a) SIMDE__M128_TO_NATIVE(simde_mm_moveldup_ps(SIMDE__M128_FROM_NATIVE(a)))
#endif
SIMDE__END_DECLS
#endif /* !defined(SIMDE__SSE3_H) */
This diff is collapsed.
This diff is collapsed.
debian/tmpbuild/* usr/bin
debian/bin/* usr/bin
Author: Michael R. Crusoe
Description: improve the build hardening
--- raxml.orig/Makefile.AVX.PTHREADS.gcc
+++ raxml/Makefile.AVX.PTHREADS.gcc
@@ -3,8 +3,11 @@
CC = gcc
-CFLAGS := -D_USE_PTHREADS -D__SIM_SSE3 -O2 -D_GNU_SOURCE -msse3 -fomit-frame-pointer -funroll-loops -D__AVX #-Wall -pedantic -Wunused-parameter -Wredundant-decls -Wreturn-type -Wswitch-default -Wunused-value -Wimplicit -Wimplicit-function-declaration -Wimplicit-int -Wimport -Wunused -Wunused-function -Wunused-label -Wno-int-to-pointer-cast -Wbad-function-cast -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wold-style-definition -Wstrict-prototypes -Wdeclaration-after-statement -Wpointer-sign -Wextra -Wredundant-decls -Wunused -Wunused-function -Wunused-parameter -Wunused-value -Wunused-variable -Wformat -Wformat-nonliteral -Wparentheses -Wsequence-point -Wuninitialized -Wundef -Wbad-function-cast
+CFLAGS += -D_USE_PTHREADS -D__SIM_SSE3 -O2 -D_GNU_SOURCE -fomit-frame-pointer -funroll-loops -D__AVX #-Wall -pedantic -Wunused-parameter -Wredundant-decls -Wreturn-type -Wswitch-default -Wunused-value -Wimplicit -Wimplicit-function-declaration -Wimplicit-int -Wimport -Wunused -Wunused-function -Wunused-label -Wno-int-to-pointer-cast -Wbad-function-cast -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wold-style-definition -Wstrict-prototypes -Wdeclaration-after-statement -Wpointer-sign -Wextra -Wredundant-decls -Wunused -Wunused-function -Wunused-parameter -Wunused-value -Wunused-variable -Wformat -Wformat-nonliteral -Wparentheses -Wsequence-point -Wuninitialized -Wundef -Wbad-function-cast
+ifeq (amd64,$(shell dpkg-architecture -q DEB_TARGET_ARCH_CPU))
+ CFLAGS += -mavx -msse3
+endif
LIBRARIES = -lm -pthread
@@ -20,10 +23,10 @@
$(CC) -o raxmlHPC-PTHREADS-AVX $(objs) $(LIBRARIES) $(LDFLAGS)
avxLikelihood.o : avxLikelihood.c $(GLOBAL_DEPS)
- $(CC) $(CFLAGS) -mavx -c -o avxLikelihood.o avxLikelihood.c
+ $(CC) $(CPPFLAGS) $(CFLAGS) -c -o avxLikelihood.o avxLikelihood.c
fastDNAparsimony.o : fastDNAparsimony.c $(GLOBAL_DEPS)
- $(CC) $(CFLAGS) -mavx -c -o fastDNAparsimony.o fastDNAparsimony.c
+ $(CC) $(CPPFLAGS) $(CFLAGS) -c -o fastDNAparsimony.o fastDNAparsimony.c
#rev_functions.o : rev_functions.c $(GLOBAL_DEPS)
rmqs.o : rmqs.c $(GLOBAL_DEPS)
@@ -49,7 +52,7 @@
mem_alloc.o eigen.o : mem_alloc.c $(GLOBAL_DEPS)
eigen.o : eigen.c $(GLOBAL_DEPS)
- $(CC) -c -o eigen.o eigen.c
+ $(CC) $(CPPFLAGS) $(CFLAGS) -c -o eigen.o eigen.c
clean :
$(RM) *.o raxmlHPC-PTHREADS-AVX
--- raxml.orig/Makefile.PTHREADS.gcc
+++ raxml/Makefile.PTHREADS.gcc
@@ -4,7 +4,7 @@
CC = gcc
-CFLAGS = -D_GNU_SOURCE -fomit-frame-pointer -funroll-loops -O2 -D_USE_PTHREADS -msse #-Wall -Wunused-parameter -Wredundant-decls -Wreturn-type -Wswitch-default -Wunused-value -Wimplicit -Wimplicit-function-declaration -Wimplicit-int -Wimport -Wunused -Wunused-function -Wunused-label -Wno-int-to-pointer-cast -Wbad-function-cast -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wold-style-definition -Wstrict-prototypes -Wpointer-sign -Wextra -Wredundant-decls -Wunused -Wunused-function -Wunused-parameter -Wunused-value -Wunused-variable -Wformat -Wformat-nonliteral -Wparentheses -Wsequence-point -Wuninitialized -Wundef -Wbad-function-cast
+CFLAGS += -D_GNU_SOURCE -fomit-frame-pointer -funroll-loops -O2 -D_USE_PTHREADS -msse #-Wall -Wunused-parameter -Wredundant-decls -Wreturn-type -Wswitch-default -Wunused-value -Wimplicit -Wimplicit-function-declaration -Wimplicit-int -Wimport -Wunused -Wunused-function -Wunused-label -Wno-int-to-pointer-cast -Wbad-function-cast -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wold-style-definition -Wstrict-prototypes -Wpointer-sign -Wextra -Wredundant-decls -Wunused -Wunused-function -Wunused-parameter -Wunused-value -Wunused-variable -Wformat -Wformat-nonliteral -Wparentheses -Wsequence-point -Wuninitialized -Wundef -Wbad-function-cast
LIBRARIES = -lm -pthread
@@ -45,8 +45,8 @@
mem_alloc.o eigen.o : mem_alloc.c $(GLOBAL_DEPS)
eigen.o : eigen.c $(GLOBAL_DEPS)
- $(CC) -c -o eigen.o eigen.c
+ $(CC) $(CPPFLAGS) $(CFLAGS) -c -o eigen.o eigen.c
clean :
$(RM) *.o raxmlHPC-PTHREADS
-dev : raxmlHPC-PTHREADS
\ No newline at end of file
+dev : raxmlHPC-PTHREADS
--- raxml.orig/Makefile.SSE3.PTHREADS.gcc
+++ raxml/Makefile.SSE3.PTHREADS.gcc
@@ -3,7 +3,7 @@
CC = gcc
-CFLAGS = -D_USE_PTHREADS -D__SIM_SSE3 -D_GNU_SOURCE -msse3 -O2 -fomit-frame-pointer -funroll-loops #-Wall -pedantic -Wunused-parameter -Wredundant-decls -Wreturn-type -Wswitch-default -Wunused-value -Wimplicit -Wimplicit-function-declaration -Wimplicit-int -Wimport -Wunused -Wunused-function -Wunused-label -Wno-int-to-pointer-cast -Wbad-function-cast -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wold-style-definition -Wstrict-prototypes -Wdeclaration-after-statement -Wpointer-sign -Wextra -Wredundant-decls -Wunused -Wunused-function -Wunused-parameter -Wunused-value -Wunused-variable -Wformat -Wformat-nonliteral -Wparentheses -Wsequence-point -Wuninitialized -Wundef -Wbad-function-cast
+CFLAGS += -D_USE_PTHREADS -D__SIM_SSE3 -D_GNU_SOURCE -msse3 -O2 -fomit-frame-pointer -funroll-loops #-Wall -pedantic -Wunused-parameter -Wredundant-decls -Wreturn-type -Wswitch-default -Wunused-value -Wimplicit -Wimplicit-function-declaration -Wimplicit-int -Wimport -Wunused -Wunused-function -Wunused-label -Wno-int-to-pointer-cast -Wbad-function-cast -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wold-style-definition -Wstrict-prototypes -Wdeclaration-after-statement -Wpointer-sign -Wextra -Wredundant-decls -Wunused -Wunused-function -Wunused-parameter -Wunused-value -Wunused-variable -Wformat -Wformat-nonliteral -Wparentheses -Wsequence-point -Wuninitialized -Wundef -Wbad-function-cast
LIBRARIES = -lm -pthread
@@ -41,11 +41,11 @@
fastSearch.o : fastSearch.c $(GLOBAL_DEPS)
leaveDropping.o : leaveDropping.c $(GLOBAL_DEPS)
ancestralStates.o : ancestralStates.c $(GLOBAL_DEPS)
- mem_alloc.o eigen.o : mem_alloc.c $(GLOBAL_DEPS)
+mem_alloc.o eigen.o : mem_alloc.c $(GLOBAL_DEPS)
eigen.o : eigen.c $(GLOBAL_DEPS)
- $(CC) -c -o eigen.o eigen.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) -c -o eigen.o eigen.c
clean :
$(RM) *.o raxmlHPC-PTHREADS-SSE3
-dev : raxmlHPC-PTHREADS-SSE3
\ No newline at end of file
+dev : raxmlHPC-PTHREADS-SSE3
This diff is collapsed.
......@@ -8,10 +8,16 @@ export DEB_BUILD_MAINT_OPTIONS = hardening=+all
include /usr/share/dpkg/default.mk
exe=raxmlHPC
ifeq (amd64,$(DEB_TARGET_ARCH_CPU))
##MFILES:=Makefile.gcc \ # we use a wrapper to the faster versions instead of shipping the "plain" version
MFILES:=Makefile.PTHREADS.gcc \
MFILES =Makefile.PTHREADS.gcc \
Makefile.SSE3.PTHREADS.gcc \
Makefile.AVX.PTHREADS.gcc
MFLAGS=
else
MFILES = Makefile.AVX.PTHREADS.gcc
MFLAGS = CFLAGS="$(shell dpkg-buildflags --get CFLAGS) -DSIMDE_ENABLE_OPENMP -fopenmp-simd -O3"
endif
BINDIR=$(CURDIR)/debian/tmpbuild
......@@ -24,8 +30,8 @@ override_dh_auto_clean:
override_dh_auto_build:
mkdir -p $(BINDIR)
for mfile in $(MFILES); do \
$(MAKE) -f $${mfile} ; \
set -e ; for mfile in $(MFILES); do \
$(MAKE) -f $${mfile} $(MFLAGS); \
curexe=`ls $(exe)* | head -n 1` ; \
if [ -e $(BINDIR)/$${curexe} ] ; then \
echo "**** ERROR: $${curexe} was created before!!" && false ; \
......@@ -35,6 +41,14 @@ override_dh_auto_build:
$(MAKE) -f $${mfile} clean ; \
done
override_dh_install:
dh_install
ifeq (amd64,$(DEB_TARGET_ARCH_CPU))
dh_install debian/bin/raxmlHPC usr/bin
else
mv debian/raxml/usr/bin/raxmlHPC-PTHREADS-AVX debian/raxml/usr/bin/raxmlHPC
endif
override_dh_installman:
help2man --no-info --help-option="-h" --version-option="-v" \
--name="Randomized Axelerated Maximum Likelihood" \
......