Skip to content
Commits on Source (10)
libssw (1.1-6) unstable; urgency=medium
* Release to unstable.
-- Michael R. Crusoe <michael.crusoe@gmail.com> Fri, 13 Dec 2019 13:24:05 +0100
libssw (1.1-6~0expsimde2) experimental; urgency=medium
* Fix some simde c/c++ compat
-- Michael R. Crusoe <michael.crusoe@gmail.com> Fri, 13 Dec 2019 12:11:05 +0100
libssw (1.1-6~0expsimde1) experimental; urgency=medium
* Include patch from upstream SIMDE for SH4
-- Michael R. Crusoe <michael.crusoe@gmail.com> Fri, 13 Dec 2019 08:12:10 +0100
libssw (1.1-6~0expsimde0) experimental; urgency=medium
* Enable for all architectures using simde
* Add myself as an uploader
-- Michael R. Crusoe <michael.crusoe@gmail.com> Wed, 11 Dec 2019 15:14:03 +0100
libssw (1.1-4) unstable; urgency=medium
* Source upload to enable testing migration.
......
Source: libssw
Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.org>
Uploaders: Sascha Steinbiss <satta@debian.org>
Uploaders: Sascha Steinbiss <satta@debian.org>,
Michael R. Crusoe <michael.crusoe@gmail.com>
Section: science
Priority: optional
Build-Depends: debhelper (>= 12~),
......@@ -16,7 +17,7 @@ Vcs-Git: https://salsa.debian.org/med-team/libssw.git
Homepage: https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library
Package: libssw0
Architecture: any-amd64 x32 ppc64el
Architecture: any
Multi-Arch: same
Section: libs
Depends: ${shlibs:Depends},
......@@ -35,11 +36,10 @@ Description: fast SIMD parallelized implementation of the Smith-Waterman algorit
the sub-optimal alignment score and location heuristically.
Package: libssw-dev
Architecture: any-amd64 x32 ppc64el
Architecture: any
Multi-Arch: same
Section: libdevel
Depends: ${shlibs:Depends},
${misc:Depends},
Depends: ${misc:Depends},
libssw0 (= ${binary:Version})
Pre-Depends: ${misc:Pre-Depends}
Provides: libssw-dev
......@@ -50,7 +50,7 @@ Description: Development headers and static libraries for libssw
algorithm at the instruction level.
Package: libssw-java
Architecture: any-amd64 x32 ppc64el
Architecture: any
Section: java
Depends: ${java:Depends},
${shlibs:Depends},
......@@ -63,7 +63,7 @@ Description: Java bindings for libssw
instruction level.
Package: ssw-align
Architecture: any-amd64 x32 ppc64el
Architecture: any
Depends: ${shlibs:Depends},
${misc:Depends},
libssw0 (= ${binary:Version})
......
......@@ -15,6 +15,10 @@ Files: debian/*
Copyright: © 2016 Sascha Steinbiss <satta@debian.org>
License: MIT
Files: debian/include/simde/*
Copyright: 2013-2019, Evan Nemerson <evan@nemerson.com>
License: MIT
License: MIT
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
......
/* Check (assertions)
* Portable Snippets - https://gitub.com/nemequ/portable-snippets
* Created by Evan Nemerson <evan@nemerson.com>
*
* To the extent possible under law, the authors have waived all
* copyright and related or neighboring rights to this code. For
* details, see the Creative Commons Zero 1.0 Universal license at
* https://creativecommons.org/publicdomain/zero/1.0/
*/
#if !defined(SIMDE_CHECK_H)
#define SIMDE_CHECK_H
#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG)
# define SIMDE_NDEBUG 1
#endif
#include <stdint.h>
#if !defined(_WIN32)
# define SIMDE_SIZE_MODIFIER "z"
# define SIMDE_CHAR_MODIFIER "hh"
# define SIMDE_SHORT_MODIFIER "h"
#else
# if defined(_M_X64) || defined(__amd64__)
# define SIMDE_SIZE_MODIFIER "I64"
# else
# define SIMDE_SIZE_MODIFIER ""
# endif
# define SIMDE_CHAR_MODIFIER ""
# define SIMDE_SHORT_MODIFIER ""
#endif
#if defined(_MSC_VER) && (_MSC_VER >= 1500)
# define SIMDE__PUSH_DISABLE_MSVC_C4127 __pragma(warning(push)) __pragma(warning(disable:4127))
# define SIMDE__POP_DISABLE_MSVC_C4127 __pragma(warning(pop))
#else
# define SIMDE__PUSH_DISABLE_MSVC_C4127
# define SIMDE__POP_DISABLE_MSVC_C4127
#endif
#if !defined(simde_errorf)
# include <stdio.h>
# include <stdlib.h>
# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort())
#endif
#define simde_error(msg) simde_errorf("%s", msg)
#if defined(SIMDE_NDEBUG)
# if defined(SIMDE_CHECK_FAIL_DEFINED)
# define simde_assert(expr)
# else
# if defined(HEDLEY_ASSUME)
# define simde_assert(expr) HEDLEY_ASSUME(expr)
# elif HEDLEY_GCC_VERSION_CHECK(4,5,0)
# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1)))
# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0)
# define simde_assert(expr) __assume(expr)
# else
# define simde_assert(expr)
# endif
# endif
# define simde_assert_true(expr) simde_assert(expr)
# define simde_assert_false(expr) simde_assert(!(expr))
# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b)))
# define simde_assert_double_equal(a, b, precision)
# define simde_assert_string_equal(a, b)
# define simde_assert_string_not_equal(a, b)
# define simde_assert_memory_equal(size, a, b)
# define simde_assert_memory_not_equal(size, a, b)
#else
# define simde_assert(expr) \
do { \
if (!HEDLEY_LIKELY(expr)) { \
simde_error("assertion failed: " #expr "\n"); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# define simde_assert_true(expr) \
do { \
if (!HEDLEY_LIKELY(expr)) { \
simde_error("assertion failed: " #expr " is not true\n"); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# define simde_assert_false(expr) \
do { \
if (!HEDLEY_LIKELY(!(expr))) { \
simde_error("assertion failed: " #expr " is not false\n"); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \
do { \
T simde_tmp_a_ = (a); \
T simde_tmp_b_ = (b); \
if (!(simde_tmp_a_ op simde_tmp_b_)) { \
simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \
#a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# define simde_assert_double_equal(a, b, precision) \
do { \
const double simde_tmp_a_ = (a); \
const double simde_tmp_b_ = (b); \
const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \
-(simde_tmp_a_ - simde_tmp_b_) : \
(simde_tmp_a_ - simde_tmp_b_); \
if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \
simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \
#a, #b, simde_tmp_a_, simde_tmp_b_); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# include <string.h>
# define simde_assert_string_equal(a, b) \
do { \
const char* simde_tmp_a_ = a; \
const char* simde_tmp_b_ = b; \
if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \
simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \
#a, #b, simde_tmp_a_, simde_tmp_b_); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# define simde_assert_string_not_equal(a, b) \
do { \
const char* simde_tmp_a_ = a; \
const char* simde_tmp_b_ = b; \
if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \
simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \
#a, #b, simde_tmp_a_, simde_tmp_b_); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# define simde_assert_memory_equal(size, a, b) \
do { \
const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \
const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \
const size_t simde_tmp_size_ = (size); \
if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \
size_t simde_tmp_pos_; \
for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \
if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \
simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \
#a, #b, simde_tmp_pos_); \
break; \
} \
} \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
# define simde_assert_memory_not_equal(size, a, b) \
do { \
const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \
const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \
const size_t simde_tmp_size_ = (size); \
if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \
simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \
#a, #b, simde_tmp_size_); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) \
SIMDE__POP_DISABLE_MSVC_C4127
#endif
#define simde_assert_type(T, fmt, a, op, b) \
simde_assert_type_full("", "", T, fmt, a, op, b)
#define simde_assert_char(a, op, b) \
simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b)
#define simde_assert_uchar(a, op, b) \
simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b)
#define simde_assert_short(a, op, b) \
simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b)
#define simde_assert_ushort(a, op, b) \
simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b)
#define simde_assert_int(a, op, b) \
simde_assert_type(int, "d", a, op, b)
#define simde_assert_uint(a, op, b) \
simde_assert_type(unsigned int, "u", a, op, b)
#define simde_assert_long(a, op, b) \
simde_assert_type(long int, "ld", a, op, b)
#define simde_assert_ulong(a, op, b) \
simde_assert_type(unsigned long int, "lu", a, op, b)
#define simde_assert_llong(a, op, b) \
simde_assert_type(long long int, "lld", a, op, b)
#define simde_assert_ullong(a, op, b) \
simde_assert_type(unsigned long long int, "llu", a, op, b)
#define simde_assert_size(a, op, b) \
simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b)
#define simde_assert_float(a, op, b) \
simde_assert_type(float, "f", a, op, b)
#define simde_assert_double(a, op, b) \
simde_assert_type(double, "g", a, op, b)
#define simde_assert_ptr(a, op, b) \
simde_assert_type(const void*, "p", a, op, b)
#define simde_assert_int8(a, op, b) \
simde_assert_type(int8_t, PRIi8, a, op, b)
#define simde_assert_uint8(a, op, b) \
simde_assert_type(uint8_t, PRIu8, a, op, b)
#define simde_assert_int16(a, op, b) \
simde_assert_type(int16_t, PRIi16, a, op, b)
#define simde_assert_uint16(a, op, b) \
simde_assert_type(uint16_t, PRIu16, a, op, b)
#define simde_assert_int32(a, op, b) \
simde_assert_type(int32_t, PRIi32, a, op, b)
#define simde_assert_uint32(a, op, b) \
simde_assert_type(uint32_t, PRIu32, a, op, b)
#define simde_assert_int64(a, op, b) \
simde_assert_type(int64_t, PRIi64, a, op, b)
#define simde_assert_uint64(a, op, b) \
simde_assert_type(uint64_t, PRIu64, a, op, b)
#define simde_assert_ptr_equal(a, b) \
simde_assert_ptr(a, ==, b)
#define simde_assert_ptr_not_equal(a, b) \
simde_assert_ptr(a, !=, b)
#define simde_assert_null(ptr) \
simde_assert_ptr(ptr, ==, NULL)
#define simde_assert_not_null(ptr) \
simde_assert_ptr(ptr, !=, NULL)
#define simde_assert_ptr_null(ptr) \
simde_assert_ptr(ptr, ==, NULL)
#define simde_assert_ptr_not_null(ptr) \
simde_assert_ptr(ptr, !=, NULL)
#endif /* !defined(SIMDE_CHECK_H) */
This diff is collapsed.
/* Architecture detection
* Created by Evan Nemerson <evan@nemerson.com>
*
* To the extent possible under law, the authors have waived all
* copyright and related or neighboring rights to this code. For
* details, see the Creative Commons Zero 1.0 Universal license at
* <https://creativecommons.org/publicdomain/zero/1.0/>
*
* Different compilers define different preprocessor macros for the
* same architecture. This is an attempt to provide a single
* interface which is usable on any compiler.
*
* In general, a macro named SIMDE_ARCH_* is defined for each
* architecture the CPU supports. When there are multiple possible
* versions, we try to define the macro to the target version. For
* example, if you want to check for i586+, you could do something
* like:
*
* #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5)
* ...
* #endif
*
* You could also just check that SIMDE_ARCH_X86 >= 5 without checking
* if it's defined first, but some compilers may emit a warning about
* an undefined macro being used (e.g., GCC with -Wundef).
*
* This was originally created for SIMDe
* <https://github.com/nemequ/simde> (hence the prefix), but this
* header has no dependencies and may be used anywhere. It is
* originally based on information from
* <https://sourceforge.net/p/predef/wiki/Architectures/>, though it
* has been enhanced with additional information.
*
* If you improve this file, or find a bug, please file the issue at
* <https://github.com/nemequ/simde/issues>. If you copy this into
* your project, even if you change the prefix, please keep the links
* to SIMDe intact so others know where to report issues, submit
* enhancements, and find the latest version. */
#if !defined(SIMDE_ARCH_H)
#define SIMDE_ARCH_H
/* Alpha
<https://en.wikipedia.org/wiki/DEC_Alpha> */
#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA)
# if defined(__alpha_ev6__)
# define SIMDE_ARCH_ALPHA 6
# elif defined(__alpha_ev5__)
# define SIMDE_ARCH_ALPHA 5
# elif defined(__alpha_ev4__)
# define SIMDE_ARCH_ALPHA 4
# else
# define SIMDE_ARCH_ALPHA 1
# endif
#endif
/* Atmel AVR
<https://en.wikipedia.org/wiki/Atmel_AVR> */
#if defined(__AVR_ARCH__)
# define SIMDE_ARCH_AVR __AVR_ARCH__
#endif
/* AMD64 / x86_64
<https://en.wikipedia.org/wiki/X86-64> */
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X66) || defined(_M_AMD64)
# define SIMDE_ARCH_AMD64 1
#endif
/* ARM
<https://en.wikipedia.org/wiki/ARM_architecture> */
#if defined(__ARM_ARCH_8A__)
# define SIMDE_ARCH_ARM 82
#elif defined(__ARM_ARCH_8R__)
# define SIMDE_ARCH_ARM 81
#elif defined(__ARM_ARCH_8__)
# define SIMDE_ARCH_ARM 80
#elif defined(__ARM_ARCH_7S__)
# define SIMDE_ARCH_ARM 74
#elif defined(__ARM_ARCH_7M__)
# define SIMDE_ARCH_ARM 73
#elif defined(__ARM_ARCH_7R__)
# define SIMDE_ARCH_ARM 72
#elif defined(__ARM_ARCH_7A__)
# define SIMDE_ARCH_ARM 71
#elif defined(__ARM_ARCH_7__)
# define SIMDE_ARCH_ARM 70
#elif defined(__ARM_ARCH)
# define SIMDE_ARCH_ARM (__ARM_ARCH * 10)
#elif defined(_M_ARM)
# define SIMDE_ARCH_ARM (_M_ARM * 10)
#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM)
# define SIMDE_ARCH_ARM 1
#endif
/* AArch64
<https://en.wikipedia.org/wiki/ARM_architecture> */
#if defined(__aarch64__) || defined(_M_ARM64)
# define SIMDE_ARCH_AARCH64 10
#endif
/* Blackfin
<https://en.wikipedia.org/wiki/Blackfin> */
#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__)
# define SIMDE_ARCH_BLACKFIN 1
#endif
/* CRIS
<https://en.wikipedia.org/wiki/ETRAX_CRIS> */
#if defined(__CRIS_arch_version)
# define SIMDE_ARCH_CRIS __CRIS_arch_version
#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__)
# define SIMDE_ARCH_CRIS 1
#endif
/* Convex
<https://en.wikipedia.org/wiki/Convex_Computer> */
#if defined(__convex_c38__)
# define SIMDE_ARCH_CONVEX 38
#elif defined(__convex_c34__)
# define SIMDE_ARCH_CONVEX 34
#elif defined(__convex_c32__)
# define SIMDE_ARCH_CONVEX 32
#elif defined(__convex_c2__)
# define SIMDE_ARCH_CONVEX 2
#elif defined(__convex__)
# define SIMDE_ARCH_CONVEX 1
#endif
/* Adapteva Epiphany
<https://en.wikipedia.org/wiki/Adapteva_Epiphany> */
#if defined(__epiphany__)
# define SIMDE_ARCH_EPIPHANY 1
#endif
/* Fujitsu FR-V
<https://en.wikipedia.org/wiki/FR-V_(microprocessor)> */
#if defined(__frv__)
# define SIMDE_ARCH_FRV 1
#endif
/* H8/300
<https://en.wikipedia.org/wiki/H8_Family> */
#if defined(__H8300__)
# define SIMDE_ARCH_H8300
#endif
/* HP/PA / PA-RISC
<https://en.wikipedia.org/wiki/PA-RISC> */
#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0)
# define SIMDE_ARCH_HPPA 20
#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1)
# define SIMDE_ARCH_HPPA 11
#elif defined(_PA_RISC1_0)
# define SIMDE_ARCH_HPPA 10
#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa)
# define SIMDE_ARCH_HPPA 1
#endif
/* x86
<https://en.wikipedia.org/wiki/X86> */
#if defined(_M_IX86)
# define SIMDE_ARCH_X86 (_M_IX86 / 100)
#elif defined(__I86__)
# define SIMDE_ARCH_X86 __I86__
#elif defined(i686) || defined(__i686) || defined(__i686__)
# define SIMDE_ARCH_X86 6
#elif defined(i586) || defined(__i586) || defined(__i586__)
# define SIMDE_ARCH_X86 5
#elif defined(i486) || defined(__i486) || defined(__i486__)
# define SIMDE_ARCH_X86 4
#elif defined(i386) || defined(__i386) || defined(__i386__)
# define SIMDE_ARCH_X86 3
#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__)
# define SIMDE_ARCH_X86 3
#endif
/* Itanium
<https://en.wikipedia.org/wiki/Itanium> */
#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__)
# define SIMDE_ARCH_IA64 1
#endif
/* Renesas M32R
<https://en.wikipedia.org/wiki/M32R> */
#if defined(__m32r__) || defined(__M32R__)
# define SIMDE_ARCH_M32R
#endif
/* Motorola 68000
<https://en.wikipedia.org/wiki/Motorola_68000> */
#if defined(__mc68060__) || defined(__MC68060__)
# define SIMDE_ARCH_M68K 68060
#elif defined(__mc68040__) || defined(__MC68040__)
# define SIMDE_ARCH_M68K 68040
#elif defined(__mc68030__) || defined(__MC68030__)
# define SIMDE_ARCH_M68K 68030
#elif defined(__mc68020__) || defined(__MC68020__)
# define SIMDE_ARCH_M68K 68020
#elif defined(__mc68010__) || defined(__MC68010__)
# define SIMDE_ARCH_M68K 68010
#elif defined(__mc68000__) || defined(__MC68000__)
# define SIMDE_ARCH_M68K 68000
#endif
/* Xilinx MicroBlaze
<https://en.wikipedia.org/wiki/MicroBlaze> */
#if defined(__MICROBLAZE__) || defined(__microblaze__)
# define SIMDE_ARCH_MICROBLAZE
#endif
/* MIPS
<https://en.wikipedia.org/wiki/MIPS_architecture> */
#if defined(_MIPS_ISA_MIPS64R2)
# define SIMDE_ARCH_MIPS 642
#elif defined(_MIPS_ISA_MIPS64)
# define SIMDE_ARCH_MIPS 640
#elif defined(_MIPS_ISA_MIPS32R2)
# define SIMDE_ARCH_MIPS 322
#elif defined(_MIPS_ISA_MIPS32)
# define SIMDE_ARCH_MIPS 320
#elif defined(_MIPS_ISA_MIPS4)
# define SIMDE_ARCH_MIPS 4
#elif defined(_MIPS_ISA_MIPS3)
# define SIMDE_ARCH_MIPS 3
#elif defined(_MIPS_ISA_MIPS2)
# define SIMDE_ARCH_MIPS 2
#elif defined(_MIPS_ISA_MIPS1)
# define SIMDE_ARCH_MIPS 1
#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__)
# define SIMDE_ARCH_MIPS 1
#endif
/* Matsushita MN10300
<https://en.wikipedia.org/wiki/MN103> */
#if defined(__MN10300__) || defined(__mn10300__)
# define SIMDE_ARCH_MN10300 1
#endif
/* POWER
<https://en.wikipedia.org/wiki/IBM_POWER_Instruction_Set_Architecture> */
#if defined(_M_PPC)
# define SIMDE_ARCH_POWER _M_PPC
#elif defined(_ARCH_PWR8)
# define SIMDE_ARCH_POWER 800
#elif defined(_ARCH_PWR7)
# define SIMDE_ARCH_POWER 700
#elif defined(_ARCH_PWR6)
# define SIMDE_ARCH_POWER 600
#elif defined(_ARCH_PWR5)
# define SIMDE_ARCH_POWER 500
#elif defined(_ARCH_PWR4)
# define SIMDE_ARCH_POWER 400
#elif defined(_ARCH_440) || defined(__ppc440__)
# define SIMDE_ARCH_POWER 440
#elif defined(_ARCH_450) || defined(__ppc450__)
# define SIMDE_ARCH_POWER 450
#elif defined(_ARCH_601) || defined(__ppc601__)
# define SIMDE_ARCH_POWER 601
#elif defined(_ARCH_603) || defined(__ppc603__)
# define SIMDE_ARCH_POWER 603
#elif defined(_ARCH_604) || defined(__ppc604__)
# define SIMDE_ARCH_POWER 604
#elif defined(_ARCH_605) || defined(__ppc605__)
# define SIMDE_ARCH_POWER 605
#elif defined(_ARCH_620) || defined(__ppc620__)
# define SIMDE_ARCH_POWER 620
#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc)
# define SIMDE_ARCH_POWER 1
#endif
/* SPARC
<https://en.wikipedia.org/wiki/SPARC> */
#if defined(__sparc_v9__) || defined(__sparcv9)
# define SIMDE_ARCH_SPARC 9
#elif defined(__sparc_v8__) || defined(__sparcv8)
# define SIMDE_ARCH_SPARC 8
#elif defined(__sparc_v7__) || defined(__sparcv7)
# define SIMDE_ARCH_SPARC 7
#elif defined(__sparc_v6__) || defined(__sparcv6)
# define SIMDE_ARCH_SPARC 6
#elif defined(__sparc_v5__) || defined(__sparcv5)
# define SIMDE_ARCH_SPARC 5
#elif defined(__sparc_v4__) || defined(__sparcv4)
# define SIMDE_ARCH_SPARC 4
#elif defined(__sparc_v3__) || defined(__sparcv3)
# define SIMDE_ARCH_SPARC 3
#elif defined(__sparc_v2__) || defined(__sparcv2)
# define SIMDE_ARCH_SPARC 2
#elif defined(__sparc_v1__) || defined(__sparcv1)
# define SIMDE_ARCH_SPARC 1
#elif defined(__sparc__) || defined(__sparc)
# define SIMDE_ARCH_SPARC 1
#endif
/* SuperH
<https://en.wikipedia.org/wiki/SuperH> */
#if defined(__sh5__) || defined(__SH5__)
# define SIMDE_ARCH_SUPERH 5
#elif defined(__sh4__) || defined(__SH4__)
# define SIMDE_ARCH_SUPERH 4
#elif defined(__sh3__) || defined(__SH3__)
# define SIMDE_ARCH_SUPERH 3
#elif defined(__sh2__) || defined(__SH2__)
# define SIMDE_ARCH_SUPERH 2
#elif defined(__sh1__) || defined(__SH1__)
# define SIMDE_ARCH_SUPERH 1
#elif defined(__sh__) || defined(__SH__)
# define SIMDE_ARCH_SUPERH 1
#endif
/* IBM System z
<https://en.wikipedia.org/wiki/IBM_System_z> */
#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__)
# define SIMDE_ARCH_SYSTEMZ
#endif
/* TMS320 DSP
<https://en.wikipedia.org/wiki/Texas_Instruments_TMS320> */
#if defined(_TMS320C6740) || defined(__TMS320C6740__)
# define SIMDE_ARCH_TMS320 6740
#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__)
# define SIMDE_ARCH_TMS320 6701
#elif defined(_TMS320C6700) || defined(__TMS320C6700__)
# define SIMDE_ARCH_TMS320 6700
#elif defined(_TMS320C6600) || defined(__TMS320C6600__)
# define SIMDE_ARCH_TMS320 6600
#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__)
# define SIMDE_ARCH_TMS320 6401
#elif defined(_TMS320C6400) || defined(__TMS320C6400__)
# define SIMDE_ARCH_TMS320 6400
#elif defined(_TMS320C6200) || defined(__TMS320C6200__)
# define SIMDE_ARCH_TMS320 6200
#elif defined(_TMS320C55X) || defined(__TMS320C55X__)
# define SIMDE_ARCH_TMS320 550
#elif defined(_TMS320C54X) || defined(__TMS320C54X__)
# define SIMDE_ARCH_TMS320 540
#elif defined(_TMS320C28X) || defined(__TMS320C28X__)
# define SIMDE_ARCH_TMS320 280
#endif
/* Xtensa
<https://en.wikipedia.org/wiki/> */
#if defined(__xtensa__) || defined(__XTENSA__)
# define SIMDE_ARCH_XTENSA 1
#endif
#endif /* !defined(SIMDE_ARCH_H) */
/* Copyright (c) 2017-2019 Evan Nemerson <evan@nemerson.com>
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if !defined(SIMDE_COMMON_H)
#define SIMDE_COMMON_H
#include "hedley.h"
#include "check.h"
#include "simde-arch.h"
#if \
HEDLEY_HAS_ATTRIBUTE(aligned) || \
HEDLEY_GCC_VERSION_CHECK(2,95,0) || \
HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \
HEDLEY_IBM_VERSION_CHECK(11,1,0) || \
HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
HEDLEY_PGI_VERSION_CHECK(19,4,0) || \
HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \
HEDLEY_TI_VERSION_CHECK(8,1,0)
# define SIMDE_ALIGN(alignment) __attribute__((aligned(alignment)))
#elif defined(_MSC_VER) && (!defined(_M_IX86) || defined(_M_AMD64))
# define SIMDE_ALIGN(alignment) __declspec(align(alignment))
#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
# define SIMDE_ALIGN(alignment) _Alignas(alignment)
#elif defined(__cplusplus) && (__cplusplus >= 201103L)
# define SIMDE_ALIGN(alignment) alignas(alignment)
#else
# define SIMDE_ALIGN(alignment)
#endif
#define simde_assert_aligned(alignment, val) \
simde_assert_int(((uintptr_t) (val)) % (alignment), ==, 0)
/* TODO: this should really do something like
HEDLEY_STATIC_CAST(T, (simde_assert_int(alignment, v), v))
but I need to think about how to handle it in all compilers...
may end up moving to Hedley, too. */
#if HEDLEY_HAS_BUILTIN(__builtin_assume_aligned)
# define SIMDE_CAST_ALIGN(alignment, T, v) ((T) __builtin_assume_aligned(v, alignment))
#elif HEDLEY_HAS_WARNING("-Wcast-align")
# define SIMDE_CAST_ALIGN(alignment, T, v) \
HEDLEY_DIAGNOSTIC_PUSH \
_Pragma("clang diagnostic ignored \"-Wcast-align\"") \
HEDLEY_STATIC_CAST(T, v) \
HEDLEY_DIAGNOSTIC_POP
#else
# define SIMDE_CAST_ALIGN(alignment, T, v) HEDLEY_STATIC_CAST(T, v)
#endif
#if HEDLEY_GCC_HAS_ATTRIBUTE(vector_size,4,6,0)
# define SIMDE__ENABLE_GCC_VEC_EXT
#endif
#if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L)))
# define SIMDE_ENABLE_OPENMP
#endif
#if !defined(SIMDE_ENABLE_CILKPLUS) && defined(__cilk)
# define SIMDE_ENABLE_CILKPLUS
#endif
#if defined(SIMDE_ENABLE_OPENMP)
# define SIMDE__VECTORIZE _Pragma("omp simd")
# define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l))
# define SIMDE__VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r))
# define SIMDE__VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a))
#elif defined(SIMDE_ENABLE_CILKPLUS)
# define SIMDE__VECTORIZE _Pragma("simd")
# define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l))
# define SIMDE__VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r))
# define SIMDE__VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a))
#elif defined(__INTEL_COMPILER)
# define SIMDE__VECTORIZE _Pragma("simd")
# define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l))
# define SIMDE__VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r))
# define SIMDE__VECTORIZE_ALIGNED(a)
#elif defined(__clang__)
# define SIMDE__VECTORIZE _Pragma("clang loop vectorize(enable)")
# define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l))
# define SIMDE__VECTORIZE_REDUCTION(r) SIMDE__VECTORIZE
# define SIMDE__VECTORIZE_ALIGNED(a)
#elif HEDLEY_GCC_VERSION_CHECK(4,9,0)
# define SIMDE__VECTORIZE _Pragma("GCC ivdep")
# define SIMDE__VECTORIZE_SAFELEN(l) SIMDE__VECTORIZE
# define SIMDE__VECTORIZE_REDUCTION(r) SIMDE__VECTORIZE
# define SIMDE__VECTORIZE_ALIGNED(a)
#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0)
# define SIMDE__VECTORIZE _Pragma("_CRI ivdep")
# define SIMDE__VECTORIZE_SAFELEN(l) SIMDE__VECTORIZE
# define SIMDE__VECTORIZE_REDUCTION(r) SIMDE__VECTORIZE
# define SIMDE__VECTORIZE_ALIGNED(a)
#else
# define SIMDE__VECTORIZE
# define SIMDE__VECTORIZE_SAFELEN(l)
# define SIMDE__VECTORIZE_REDUCTION(r)
# define SIMDE__VECTORIZE_ALIGNED(a)
#endif
#if HEDLEY_GCC_HAS_ATTRIBUTE(unused,3,1,0)
# define SIMDE__UNUSED __attribute__((__unused__))
#else
# define SIMDE__UNUSED
#endif
#if HEDLEY_GCC_HAS_ATTRIBUTE(artificial,4,3,0)
# define SIMDE__ARTIFICIAL __attribute__((__artificial__))
#else
# define SIMDE__ARTIFICIAL
#endif
/* Intended for checking coverage, you should never use this in
production. */
#if defined(SIMDE_NO_INLINE)
# define SIMDE__FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE SIMDE__UNUSED static
#else
# define SIMDE__FUNCTION_ATTRIBUTES HEDLEY_INLINE SIMDE__ARTIFICIAL static
#endif
#if defined(_MSC_VER)
# define SIMDE__BEGIN_DECLS HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS
# define SIMDE__END_DECLS HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS
#else
# define SIMDE__BEGIN_DECLS HEDLEY_BEGIN_C_DECLS
# define SIMDE__END_DECLS HEDLEY_END_C_DECLS
#endif
#if defined(__SIZEOF_INT128__)
# define SIMDE__HAVE_INT128
typedef __int128 simde_int128;
typedef unsigned __int128 simde_uint128;
#endif
/* TODO: we should at least make an attempt to detect the correct
types for simde_float32/float64 instead of just assuming float and
double. */
#if !defined(SIMDE_FLOAT32_TYPE)
# define SIMDE_FLOAT32_TYPE float
# define SIMDE_FLOAT32_C(value) value##f
#else
# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value)
#endif
typedef SIMDE_FLOAT32_TYPE simde_float32;
HEDLEY_STATIC_ASSERT(sizeof(simde_float32) == 4, "Unable to find 32-bit floating-point type.");
#if !defined(SIMDE_FLOAT64_TYPE)
# define SIMDE_FLOAT64_TYPE double
# define SIMDE_FLOAT64_C(value) value
#else
# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT64_TYPE) value)
#endif
typedef SIMDE_FLOAT64_TYPE simde_float64;
HEDLEY_STATIC_ASSERT(sizeof(simde_float64) == 8, "Unable to find 64-bit floating-point type.");
/* Whether to assume that the compiler can auto-vectorize reasonably
well. This will cause SIMDe to attempt to compose vector
operations using more simple vector operations instead of minimize
serial work.
As an example, consider the _mm_add_ss(a, b) function from SSE,
which returns { a0 + b0, a1, a2, a3 }. This pattern is repeated
for other operations (sub, mul, etc.).
The naïve implementation would result in loading a0 and b0, adding
them into a temporary variable, then splicing that value into a new
vector with the remaining elements from a.
On platforms which support vectorization, it's generally faster to
simply perform the operation on the entire vector to avoid having
to move data between SIMD registers and non-SIMD registers.
Basically, instead of the temporary variable being (a0 + b0) it
would be a vector of (a + b), which is then combined with a to form
the result.
By default, SIMDe will prefer the pure-vector versions if we detect
a vector ISA extension, but this can be overridden by defining
SIMDE_NO_ASSUME_VECTORIZATION. You can also define
SIMDE_ASSUME_VECTORIZATION if you want to force SIMDe to use the
vectorized version. */
#if !defined(SIMDE_NO_ASSUME_VECTORIZATION) && !defined(SIMDE_ASSUME_VECTORIZATION)
# if defined(__SSE__) || defined(__ARM_NEON) || defined(__mips_msa) || defined(__ALTIVEC__)
# define SIMDE_ASSUME_VECTORIZATION
# endif
#endif
/* GCC and clang have built-in functions to handle shuffling of
vectors, but the implementations are slightly different. This
macro is just an abstraction over them. Note that elem_size is in
bits but vec_size is in bytes. */
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
# define SIMDE__SHUFFLE_VECTOR(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__)
#elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER)
# define SIMDE__SHUFFLE_VECTOR(elem_size, vec_size, a, b, ...) __builtin_shuffle(a, b, (int##elem_size##_t __attribute__((__vector_size__(vec_size)))) { __VA_ARGS__ })
#endif
#if HEDLEY_GCC_HAS_BUILTIN(__builtin_convertvector,9,0,0)
# define SIMDE__CONVERT_VECTOR(to, from) ((to) = __builtin_convertvector((from), __typeof__(to)))
#endif
#if HEDLEY_HAS_WARNING("-Wbad-function-cast")
# define SIMDE_CONVERT_FTOI(T,v) \
HEDLEY_DIAGNOSTIC_PUSH \
_Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \
((T) (v)) \
HEDLEY_DIAGNOSTIC_POP
#else
# define SIMDE_CONVERT_FTOI(T,v) ((T) (v))
#endif
/* Some algorithms are iterative, and fewer iterations means less
accuracy. Lower values here will result in faster, but less
accurate, calculations for some functions. */
#if !defined(SIMDE_ACCURACY_ITERS)
# define SIMDE_ACCURACY_ITERS 2
#endif
#if defined(SIMDE__ASSUME_ALIGNED)
# undef SIMDE__ASSUME_ALIGNED
#endif
#if HEDLEY_INTEL_VERSION_CHECK(9,0,0)
# define SIMDE__ASSUME_ALIGNED(ptr, align) __assume_aligned(ptr, align)
#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0)
# define SIMDE__ASSUME_ALIGNED(ptr, align) __assume((((char*) ptr) - ((char*) 0)) % (align) == 0)
#elif HEDLEY_GCC_HAS_BUILTIN(__builtin_assume_aligned,4,7,0)
# define SIMDE__ASSUME_ALIGNED(ptr, align) (ptr = (__typeof__(ptr)) __builtin_assume_aligned((ptr), align))
#elif HEDLEY_CLANG_HAS_BUILTIN(__builtin_assume)
# define SIMDE__ASSUME_ALIGNED(ptr, align) __builtin_assume((((char*) ptr) - ((char*) 0)) % (align) == 0)
#elif HEDLEY_GCC_HAS_BUILTIN(__builtin_unreachable,4,5,0)
# define SIMDE__ASSUME_ALIGNED(ptr, align) ((((char*) ptr) - ((char*) 0)) % (align) == 0) ? (1) : (__builtin_unreachable(), 0)
#else
# define SIMDE__ASSUME_ALIGNED(ptr, align)
#endif
/* This is only to help us implement functions like _mm_undefined_ps. */
#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
# undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_
#endif
#if HEDLEY_HAS_WARNING("-Wuninitialized")
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"")
#elif HEDLEY_GCC_VERSION_CHECK(4,2,0)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"")
#elif HEDLEY_PGI_VERSION_CHECK(19,10,0)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549")
#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)")
#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)")
#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)")
/* #elif \
HEDLEY_TI_VERSION_CHECK(16,9,9) || \
HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \
HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") */
#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)")
#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0)
# define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700))
#endif
/* Sometimes we run into problems with specific versions of compilers
which make the native versions unusable for us. Often this is due
to missing functions, sometimes buggy implementations, etc. These
macros are how we check for specific bugs. As they are fixed we'll
start only defining them for problematic compiler versions. */
#if !defined(SIMDE_IGNORE_COMPILER_BUGS)
# if !HEDLEY_GCC_VERSION_CHECK(4,9,0)
# define SIMDE_BUG_GCC_REV_208793
# endif
# if !HEDLEY_GCC_VERSION_CHECK(5,0,0)
# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */
# endif
# if !HEDLEY_GCC_VERSION_CHECK(4,6,0)
# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */
# endif
# if !HEDLEY_GCC_VERSION_CHECK(10,0,0)
# define SIMDE_BUG_GCC_REV_274313
# endif
# if defined(HEDLEY_EMSCRIPTEN_VERSION)
# define SIMDE_BUG_EMSCRIPTEN_MISSING_IMPL /* Placeholder for (as yet) unfiled issues. */
# define SIMDE_BUG_EMSCRIPTEN_5242
# endif
#endif
#if !defined(__cplusplus)
#define SIMDE_F64_ALL_SET (((union { uint64_t u64; simde_float64 f64; }) { .u64 = ~UINT64_C(0x0) }).f64)
#define SIMDE_F64_ALL_UNSET (((union { uint64_t u64; simde_float64 f64; }) { .u64 = UINT64_C(0x0) }).f64)
#define SIMDE_F32_ALL_SET (((union { uint32_t u32; simde_float32 f32; }) { .u32 = ~UINT32_C(0x0) }).f32)
#define SIMDE_F32_ALL_UNSET (((union { uint32_t u32; simde_float32 f32; }) { .u32 = UINT32_C(0x0) }).f32)
#else
static const union { uint64_t u64; simde_float64 f64; } simde_f64_all_set = { .u64 = ~UINT64_C(0) };
static const union { uint64_t u64; simde_float64 f64; } simde_f64_all_unset = { .u64 = UINT64_C(0) };
static const union { uint64_t u32; simde_float64 f32; } simde_f32_all_set = { .u32 = ~UINT32_C(0) };
static const union { uint64_t u32; simde_float64 f32; } simde_f32_all_unset = { .u32 = UINT32_C(0) };
# define SIMDE_F64_ALL_SET (simde_f64_all_set.f64)
# define SIMDE_F64_ALL_UNSET (simde_f64_all_unset.f64)
# define SIMDE_F32_ALL_SET (simde_f32_all_set.f32)
# define SIMDE_F32_ALL_UNSET (simde_f32_all_unset.f32)
#endif
#endif /* !defined(SIMDE_COMMON_H) */
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -2,8 +2,8 @@ Description: build command line aligner
This patch ensures that 'ssw_test' is called 'ssw-align' in Debian
and also links against libssw.
Author: Sascha Steinbiss <satta@debian.org>
--- a/src/Makefile
+++ b/src/Makefile
--- libssw.orig/src/Makefile
+++ libssw/src/Makefile
@@ -4,9 +4,10 @@
#CXXFLAGS := $(CFLAGS)
LOBJS = ssw.o
......@@ -37,8 +37,8 @@ Author: Sascha Steinbiss <satta@debian.org>
$(CC) -o $@ $(filter-out %.h,$^) $(CPPFLAGS) $(CFLAGS) -lm -lz $(LDFLAGS)
$(EXAMPLE_CPP): example.cpp $(LOBJS) $(LCPPOBJS)
--- a/src/main.c
+++ b/src/main.c
--- libssw.orig/src/main.c
+++ libssw/src/main.c
@@ -297,7 +297,7 @@
}
if (optind + 2 > argc) {
......
build_all_libs.patch
hardening.patch
rename_tool.patch
simde.patch
Author: Michael R. Crusoe <michael.crusoe@gmail.com>
Description: use the simde header library for greater compatibility
Forwarded: https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library/pull/69
--- libssw.orig/src/ssw.c
+++ libssw/src/ssw.c
@@ -35,7 +35,7 @@
*
*/
-#include <emmintrin.h>
+#include "../debian/include/simde/x86/sse2.h"
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
@@ -76,8 +76,8 @@
} cigar;
struct _profile{
- __m128i* profile_byte; // 0: none
- __m128i* profile_word; // 0: none
+ simde__m128i* profile_byte; // 0: none
+ simde__m128i* profile_word; // 0: none
const int8_t* read;
const int8_t* mat;
int32_t readLen;
@@ -86,7 +86,7 @@
};
/* Generate query profile rearrange query sequence & calculate the weight of match/mismatch. */
-static __m128i* qP_byte (const int8_t* read_num,
+static simde__m128i* qP_byte (const int8_t* read_num,
const int8_t* mat,
const int32_t readLen,
const int32_t n, /* the edge length of the squre matrix mat */
@@ -96,7 +96,7 @@
Each piece is 8 bit. Split the read into 16 segments.
Calculat 16 segments in parallel.
*/
- __m128i* vProfile = (__m128i*)malloc(n * segLen * sizeof(__m128i));
+ simde__m128i* vProfile = (simde__m128i*)malloc(n * segLen * sizeof(simde__m128i));
int8_t* t = (int8_t*)vProfile;
int32_t nt, i, j, segNum;
@@ -126,7 +126,7 @@
int32_t readLen,
const uint8_t weight_gapO, /* will be used as - */
const uint8_t weight_gapE, /* will be used as - */
- const __m128i* vProfile,
+ const simde__m128i* vProfile,
uint8_t terminate, /* the best alignment score: used to terminate
the matrix calculation when locating the
alignment beginning point. If this score
@@ -134,11 +134,11 @@
uint8_t bias, /* Shift 0 point to a positive value. */
int32_t maskLen) {
-#define max16(m, vm) (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 8)); \
- (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 4)); \
- (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 2)); \
- (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 1)); \
- (m) = _mm_extract_epi16((vm), 0)
+#define max16(m, vm) (vm) = simde_mm_max_epu8((vm), simde_mm_srli_si128((vm), 8)); \
+ (vm) = simde_mm_max_epu8((vm), simde_mm_srli_si128((vm), 4)); \
+ (vm) = simde_mm_max_epu8((vm), simde_mm_srli_si128((vm), 2)); \
+ (vm) = simde_mm_max_epu8((vm), simde_mm_srli_si128((vm), 1)); \
+ (m) = simde_mm_extract_epi16((vm), 0)
uint8_t max = 0; /* the max alignment score */
int32_t end_read = readLen - 1;
@@ -152,26 +152,26 @@
int32_t* end_read_column = (int32_t*) calloc(refLen, sizeof(int32_t));
/* Define 16 byte 0 vector. */
- __m128i vZero = _mm_set1_epi32(0);
+ simde__m128i vZero = simde_mm_set1_epi32(0);
- __m128i* pvHStore = (__m128i*) calloc(segLen, sizeof(__m128i));
- __m128i* pvHLoad = (__m128i*) calloc(segLen, sizeof(__m128i));
- __m128i* pvE = (__m128i*) calloc(segLen, sizeof(__m128i));
- __m128i* pvHmax = (__m128i*) calloc(segLen, sizeof(__m128i));
+ simde__m128i* pvHStore = (simde__m128i*) calloc(segLen, sizeof(simde__m128i));
+ simde__m128i* pvHLoad = (simde__m128i*) calloc(segLen, sizeof(simde__m128i));
+ simde__m128i* pvE = (simde__m128i*) calloc(segLen, sizeof(simde__m128i));
+ simde__m128i* pvHmax = (simde__m128i*) calloc(segLen, sizeof(simde__m128i));
int32_t i, j;
/* 16 byte insertion begin vector */
- __m128i vGapO = _mm_set1_epi8(weight_gapO);
+ simde__m128i vGapO = simde_mm_set1_epi8(weight_gapO);
/* 16 byte insertion extension vector */
- __m128i vGapE = _mm_set1_epi8(weight_gapE);
+ simde__m128i vGapE = simde_mm_set1_epi8(weight_gapE);
/* 16 byte bias vector */
- __m128i vBias = _mm_set1_epi8(bias);
+ simde__m128i vBias = simde_mm_set1_epi8(bias);
- __m128i vMaxScore = vZero; /* Trace the highest score of the whole SW matrix. */
- __m128i vMaxMark = vZero; /* Trace the highest score till the previous column. */
- __m128i vTemp;
+ simde__m128i vMaxScore = vZero; /* Trace the highest score of the whole SW matrix. */
+ simde__m128i vMaxMark = vZero; /* Trace the highest score till the previous column. */
+ simde__m128i vTemp;
int32_t edge, begin = 0, end = refLen, step = 1;
/* outer loop to process the reference sequence */
@@ -182,84 +182,84 @@
}
for (i = begin; LIKELY(i != end); i += step) {
int32_t cmp;
- __m128i e, vF = vZero, vMaxColumn = vZero; /* Initialize F value to 0.
+ simde__m128i e, vF = vZero, vMaxColumn = vZero; /* Initialize F value to 0.
Any errors to vH values will be corrected in the Lazy_F loop.
*/
- __m128i vH = pvHStore[segLen - 1];
- vH = _mm_slli_si128 (vH, 1); /* Shift the 128-bit value in vH left by 1 byte. */
- const __m128i* vP = vProfile + ref[i] * segLen; /* Right part of the vProfile */
+ simde__m128i vH = pvHStore[segLen - 1];
+ vH = simde_mm_slli_si128 (vH, 1); /* Shift the 128-bit value in vH left by 1 byte. */
+ const simde__m128i* vP = vProfile + ref[i] * segLen; /* Right part of the vProfile */
/* Swap the 2 H buffers. */
- __m128i* pv = pvHLoad;
+ simde__m128i* pv = pvHLoad;
pvHLoad = pvHStore;
pvHStore = pv;
/* inner loop to process the query sequence */
for (j = 0; LIKELY(j < segLen); ++j) {
- vH = _mm_adds_epu8(vH, _mm_load_si128(vP + j));
- vH = _mm_subs_epu8(vH, vBias); /* vH will be always > 0 */
+ vH = simde_mm_adds_epu8(vH, simde_mm_load_si128(vP + j));
+ vH = simde_mm_subs_epu8(vH, vBias); /* vH will be always > 0 */
/* Get max from vH, vE and vF. */
- e = _mm_load_si128(pvE + j);
- vH = _mm_max_epu8(vH, e);
- vH = _mm_max_epu8(vH, vF);
- vMaxColumn = _mm_max_epu8(vMaxColumn, vH);
+ e = simde_mm_load_si128(pvE + j);
+ vH = simde_mm_max_epu8(vH, e);
+ vH = simde_mm_max_epu8(vH, vF);
+ vMaxColumn = simde_mm_max_epu8(vMaxColumn, vH);
/* Save vH values. */
- _mm_store_si128(pvHStore + j, vH);
+ simde_mm_store_si128(pvHStore + j, vH);
/* Update vE value. */
- vH = _mm_subs_epu8(vH, vGapO); /* saturation arithmetic, result >= 0 */
- e = _mm_subs_epu8(e, vGapE);
- e = _mm_max_epu8(e, vH);
- _mm_store_si128(pvE + j, e);
+ vH = simde_mm_subs_epu8(vH, vGapO); /* saturation arithmetic, result >= 0 */
+ e = simde_mm_subs_epu8(e, vGapE);
+ e = simde_mm_max_epu8(e, vH);
+ simde_mm_store_si128(pvE + j, e);
/* Update vF value. */
- vF = _mm_subs_epu8(vF, vGapE);
- vF = _mm_max_epu8(vF, vH);
+ vF = simde_mm_subs_epu8(vF, vGapE);
+ vF = simde_mm_max_epu8(vF, vH);
/* Load the next vH. */
- vH = _mm_load_si128(pvHLoad + j);
+ vH = simde_mm_load_si128(pvHLoad + j);
}
/* Lazy_F loop: has been revised to disallow adjecent insertion and then deletion, so don't update E(i, j), learn from SWPS3 */
/* reset pointers to the start of the saved data */
j = 0;
- vH = _mm_load_si128 (pvHStore + j);
+ vH = simde_mm_load_si128 (pvHStore + j);
/* the computed vF value is for the given column. since */
/* we are at the end, we need to shift the vF value over */
/* to the next column. */
- vF = _mm_slli_si128 (vF, 1);
- vTemp = _mm_subs_epu8 (vH, vGapO);
- vTemp = _mm_subs_epu8 (vF, vTemp);
- vTemp = _mm_cmpeq_epi8 (vTemp, vZero);
- cmp = _mm_movemask_epi8 (vTemp);
+ vF = simde_mm_slli_si128 (vF, 1);
+ vTemp = simde_mm_subs_epu8 (vH, vGapO);
+ vTemp = simde_mm_subs_epu8 (vF, vTemp);
+ vTemp = simde_mm_cmpeq_epi8 (vTemp, vZero);
+ cmp = simde_mm_movemask_epi8 (vTemp);
while (cmp != 0xffff)
{
- vH = _mm_max_epu8 (vH, vF);
- vMaxColumn = _mm_max_epu8(vMaxColumn, vH);
- _mm_store_si128 (pvHStore + j, vH);
- vF = _mm_subs_epu8 (vF, vGapE);
+ vH = simde_mm_max_epu8 (vH, vF);
+ vMaxColumn = simde_mm_max_epu8(vMaxColumn, vH);
+ simde_mm_store_si128 (pvHStore + j, vH);
+ vF = simde_mm_subs_epu8 (vF, vGapE);
j++;
if (j >= segLen)
{
j = 0;
- vF = _mm_slli_si128 (vF, 1);
+ vF = simde_mm_slli_si128 (vF, 1);
}
- vH = _mm_load_si128 (pvHStore + j);
+ vH = simde_mm_load_si128 (pvHStore + j);
- vTemp = _mm_subs_epu8 (vH, vGapO);
- vTemp = _mm_subs_epu8 (vF, vTemp);
- vTemp = _mm_cmpeq_epi8 (vTemp, vZero);
- cmp = _mm_movemask_epi8 (vTemp);
+ vTemp = simde_mm_subs_epu8 (vH, vGapO);
+ vTemp = simde_mm_subs_epu8 (vF, vTemp);
+ vTemp = simde_mm_cmpeq_epi8 (vTemp, vZero);
+ cmp = simde_mm_movemask_epi8 (vTemp);
}
- vMaxScore = _mm_max_epu8(vMaxScore, vMaxColumn);
- vTemp = _mm_cmpeq_epi8(vMaxMark, vMaxScore);
- cmp = _mm_movemask_epi8(vTemp);
+ vMaxScore = simde_mm_max_epu8(vMaxScore, vMaxColumn);
+ vTemp = simde_mm_cmpeq_epi8(vMaxMark, vMaxScore);
+ cmp = simde_mm_movemask_epi8(vTemp);
if (cmp != 0xffff) {
uint8_t temp;
vMaxMark = vMaxScore;
@@ -327,13 +327,13 @@
return bests;
}
-static __m128i* qP_word (const int8_t* read_num,
+static simde__m128i* qP_word (const int8_t* read_num,
const int8_t* mat,
const int32_t readLen,
const int32_t n) {
int32_t segLen = (readLen + 7) / 8;
- __m128i* vProfile = (__m128i*)malloc(n * segLen * sizeof(__m128i));
+ simde__m128i* vProfile = (simde__m128i*)malloc(n * segLen * sizeof(simde__m128i));
int16_t* t = (int16_t*)vProfile;
int32_t nt, i, j;
int32_t segNum;
@@ -357,14 +357,14 @@
int32_t readLen,
const uint8_t weight_gapO, /* will be used as - */
const uint8_t weight_gapE, /* will be used as - */
- const __m128i* vProfile,
+ const simde__m128i* vProfile,
uint16_t terminate,
int32_t maskLen) {
-#define max8(m, vm) (vm) = _mm_max_epi16((vm), _mm_srli_si128((vm), 8)); \
- (vm) = _mm_max_epi16((vm), _mm_srli_si128((vm), 4)); \
- (vm) = _mm_max_epi16((vm), _mm_srli_si128((vm), 2)); \
- (m) = _mm_extract_epi16((vm), 0)
+#define max8(m, vm) (vm) = simde_mm_max_epi16((vm), simde_mm_srli_si128((vm), 8)); \
+ (vm) = simde_mm_max_epi16((vm), simde_mm_srli_si128((vm), 4)); \
+ (vm) = simde_mm_max_epi16((vm), simde_mm_srli_si128((vm), 2)); \
+ (m) = simde_mm_extract_epi16((vm), 0)
uint16_t max = 0; /* the max alignment score */
int32_t end_read = readLen - 1;
@@ -378,23 +378,23 @@
int32_t* end_read_column = (int32_t*) calloc(refLen, sizeof(int32_t));
/* Define 16 byte 0 vector. */
- __m128i vZero = _mm_set1_epi32(0);
+ simde__m128i vZero = simde_mm_set1_epi32(0);
- __m128i* pvHStore = (__m128i*) calloc(segLen, sizeof(__m128i));
- __m128i* pvHLoad = (__m128i*) calloc(segLen, sizeof(__m128i));
- __m128i* pvE = (__m128i*) calloc(segLen, sizeof(__m128i));
- __m128i* pvHmax = (__m128i*) calloc(segLen, sizeof(__m128i));
+ simde__m128i* pvHStore = (simde__m128i*) calloc(segLen, sizeof(simde__m128i));
+ simde__m128i* pvHLoad = (simde__m128i*) calloc(segLen, sizeof(simde__m128i));
+ simde__m128i* pvE = (simde__m128i*) calloc(segLen, sizeof(simde__m128i));
+ simde__m128i* pvHmax = (simde__m128i*) calloc(segLen, sizeof(simde__m128i));
int32_t i, j, k;
/* 16 byte insertion begin vector */
- __m128i vGapO = _mm_set1_epi16(weight_gapO);
+ simde__m128i vGapO = simde_mm_set1_epi16(weight_gapO);
/* 16 byte insertion extension vector */
- __m128i vGapE = _mm_set1_epi16(weight_gapE);
+ simde__m128i vGapE = simde_mm_set1_epi16(weight_gapE);
- __m128i vMaxScore = vZero; /* Trace the highest score of the whole SW matrix. */
- __m128i vMaxMark = vZero; /* Trace the highest score till the previous column. */
- __m128i vTemp;
+ simde__m128i vMaxScore = vZero; /* Trace the highest score of the whole SW matrix. */
+ simde__m128i vMaxMark = vZero; /* Trace the highest score till the previous column. */
+ simde__m128i vTemp;
int32_t edge, begin = 0, end = refLen, step = 1;
/* outer loop to process the reference sequence */
@@ -405,66 +405,66 @@
}
for (i = begin; LIKELY(i != end); i += step) {
int32_t cmp;
- __m128i e, vF = vZero; /* Initialize F value to 0.
+ simde__m128i e, vF = vZero; /* Initialize F value to 0.
Any errors to vH values will be corrected in the Lazy_F loop.
*/
- __m128i vH = pvHStore[segLen - 1];
- vH = _mm_slli_si128 (vH, 2); /* Shift the 128-bit value in vH left by 2 byte. */
+ simde__m128i vH = pvHStore[segLen - 1];
+ vH = simde_mm_slli_si128 (vH, 2); /* Shift the 128-bit value in vH left by 2 byte. */
/* Swap the 2 H buffers. */
- __m128i* pv = pvHLoad;
+ simde__m128i* pv = pvHLoad;
- __m128i vMaxColumn = vZero; /* vMaxColumn is used to record the max values of column i. */
+ simde__m128i vMaxColumn = vZero; /* vMaxColumn is used to record the max values of column i. */
- const __m128i* vP = vProfile + ref[i] * segLen; /* Right part of the vProfile */
+ const simde__m128i* vP = vProfile + ref[i] * segLen; /* Right part of the vProfile */
pvHLoad = pvHStore;
pvHStore = pv;
/* inner loop to process the query sequence */
for (j = 0; LIKELY(j < segLen); j ++) {
- vH = _mm_adds_epi16(vH, _mm_load_si128(vP + j));
+ vH = simde_mm_adds_epi16(vH, simde_mm_load_si128(vP + j));
/* Get max from vH, vE and vF. */
- e = _mm_load_si128(pvE + j);
- vH = _mm_max_epi16(vH, e);
- vH = _mm_max_epi16(vH, vF);
- vMaxColumn = _mm_max_epi16(vMaxColumn, vH);
+ e = simde_mm_load_si128(pvE + j);
+ vH = simde_mm_max_epi16(vH, e);
+ vH = simde_mm_max_epi16(vH, vF);
+ vMaxColumn = simde_mm_max_epi16(vMaxColumn, vH);
/* Save vH values. */
- _mm_store_si128(pvHStore + j, vH);
+ simde_mm_store_si128(pvHStore + j, vH);
/* Update vE value. */
- vH = _mm_subs_epu16(vH, vGapO); /* saturation arithmetic, result >= 0 */
- e = _mm_subs_epu16(e, vGapE);
- e = _mm_max_epi16(e, vH);
- _mm_store_si128(pvE + j, e);
+ vH = simde_mm_subs_epu16(vH, vGapO); /* saturation arithmetic, result >= 0 */
+ e = simde_mm_subs_epu16(e, vGapE);
+ e = simde_mm_max_epi16(e, vH);
+ simde_mm_store_si128(pvE + j, e);
/* Update vF value. */
- vF = _mm_subs_epu16(vF, vGapE);
- vF = _mm_max_epi16(vF, vH);
+ vF = simde_mm_subs_epu16(vF, vGapE);
+ vF = simde_mm_max_epi16(vF, vH);
/* Load the next vH. */
- vH = _mm_load_si128(pvHLoad + j);
+ vH = simde_mm_load_si128(pvHLoad + j);
}
/* Lazy_F loop: has been revised to disallow adjecent insertion and then deletion, so don't update E(i, j), learn from SWPS3 */
for (k = 0; LIKELY(k < 8); ++k) {
- vF = _mm_slli_si128 (vF, 2);
+ vF = simde_mm_slli_si128 (vF, 2);
for (j = 0; LIKELY(j < segLen); ++j) {
- vH = _mm_load_si128(pvHStore + j);
- vH = _mm_max_epi16(vH, vF);
- vMaxColumn = _mm_max_epi16(vMaxColumn, vH); //newly added line
- _mm_store_si128(pvHStore + j, vH);
- vH = _mm_subs_epu16(vH, vGapO);
- vF = _mm_subs_epu16(vF, vGapE);
- if (UNLIKELY(! _mm_movemask_epi8(_mm_cmpgt_epi16(vF, vH)))) goto end;
+ vH = simde_mm_load_si128(pvHStore + j);
+ vH = simde_mm_max_epi16(vH, vF);
+ vMaxColumn = simde_mm_max_epi16(vMaxColumn, vH); //newly added line
+ simde_mm_store_si128(pvHStore + j, vH);
+ vH = simde_mm_subs_epu16(vH, vGapO);
+ vF = simde_mm_subs_epu16(vF, vGapE);
+ if (UNLIKELY(! simde_mm_movemask_epi8(simde_mm_cmpgt_epi16(vF, vH)))) goto end;
}
}
end:
- vMaxScore = _mm_max_epi16(vMaxScore, vMaxColumn);
- vTemp = _mm_cmpeq_epi16(vMaxMark, vMaxScore);
- cmp = _mm_movemask_epi8(vTemp);
+ vMaxScore = simde_mm_max_epi16(vMaxScore, vMaxColumn);
+ vTemp = simde_mm_cmpeq_epi16(vMaxMark, vMaxScore);
+ cmp = simde_mm_movemask_epi8(vTemp);
if (cmp != 0xffff) {
uint16_t temp;
vMaxMark = vMaxScore;
@@ -801,7 +801,7 @@
const int32_t maskLen) {
alignment_end* bests = 0, *bests_reverse = 0;
- __m128i* vP = 0;
+ simde__m128i* vP = 0;
int32_t word = 0, band_width = 0, readLen = prof->readLen;
int8_t* read_reverse = 0;
cigar* path;
--- libssw.orig/src/ssw.h
+++ libssw/src/ssw.h
@@ -14,7 +14,6 @@
#include <stdio.h>
#include <stdint.h>
#include <string.h>
-#include <emmintrin.h>
#ifdef __cplusplus
extern "C" {
--- libssw.orig/src/main.c
+++ libssw/src/main.c
@@ -6,7 +6,6 @@
#include <stdlib.h>
#include <stdint.h>
-#include <emmintrin.h>
#include <zlib.h>
#include <stdio.h>
#include <time.h>
......@@ -2,6 +2,9 @@
# DH_VERBOSE := 1
export DEB_BUILD_MAINT_OPTIONS = hardening=+bindnow
export DEB_CFLAGS_MAINT_APPEND += -DSIMDE_ENABLE_OPENMP -fopenmp-simd -O3
export DEB_CXXFLAGS_MAINT_APPEND += -DSIMDE_ENABLE_OPENMP -fopenmp-simd -O3
ifeq ($(DEB_BUILD_ARCH), ppc64el)
export DEB_CFLAGS_MAINT_APPEND = -DNO_WARN_X86_INTRINSICS
export DEB_CXXFLAGS_MAINT_APPEND = -DNO_WARN_X86_INTRINSICS
......