Skip to content
Commits on Source (3)
......@@ -146,7 +146,8 @@ AM_SH_LOG_FLAGS =
TESTS = tests/generate_sequence.sh tests/parallel_hashing.sh \
tests/merge.sh tests/bloom_filter.sh tests/big.sh \
tests/subset_hashing.sh tests/multi_file.sh \
tests/bloom_counter.sh tests/large_key.sh tests/sam.sh
tests/bloom_counter.sh tests/large_key.sh tests/sam.sh \
tests/small_mers.sh
EXTRA_DIST += $(TESTS)
clean-local: clean-local-check
......@@ -164,6 +165,7 @@ tests/min_qual.log: tests/generate_fastq_sequence.log
tests/large_key.log: tests/generate_sequence.log
tests/quality_filter.log: tests/generate_sequence.log
tests/sam.log: tests/generate_sequence.log
tests/small_mers.log: tests/generate_sequence.log
# SWIG tests
TESTS += tests/swig_python.sh tests/swig_ruby.sh tests/swig_perl.sh
......
AC_INIT([jellyfish], [2.2.7], [gmarcais@umd.edu])
AC_INIT([jellyfish], [2.2.8], [gmarcais@umd.edu])
AC_CANONICAL_HOST
AC_CONFIG_MACRO_DIR([m4])
AM_INIT_AUTOMAKE([subdir-objects foreign parallel-tests color-tests])
......@@ -10,18 +10,23 @@ AC_LIB_RPATH
PKG_PROG_PKG_CONFIG
# Change default compilation flags
AC_SUBST([ALL_CXXFLAGS], [-std=c++0x])
CXXFLAGS="-std=c++0x $CXXFLAGS"
AC_LANG(C++)
AC_PROG_CXX
# Major version of the library
AC_SUBST([PACKAGE_LIB], [2.0])
# Check if gnu++11 is necessary
save_CXXFLAGS=$CXXFLAGS
AC_CANONICAL_HOST
case "${host_os}" in
cygwin*) CXXFLAGS="-std=gnu++11 $save_CXXFLAGS" ;;
*) CXXFLAGS="-std=c++11 $save_CXXFLAGS" ;;
esac
# Try to find htslib to read SAM/BAM/CRAM files
AC_ARG_ENABLE([htslib],
[AS_HELP_STRING([--enable-htslib], [Look for the HTS library (default=yes)])])
echo "enable_htslib $enable_htslib"
AS_IF([test "x$enable_htslib" = "xyes" -o "x$enable_htslib" = "x"],
[PKG_CHECK_MODULES([HTSLIB], [htslib], [AC_DEFINE([HAVE_HTSLIB], [1], [Defined if htslib is available])], [true])]
[AC_LIB_LINKFLAGS_FROM_LIBS([HTSLIB_RPATH], [$HTSLIB_LIBS], [LIBTOOL])])
......@@ -88,8 +93,7 @@ AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <mach-o/dyld.h>]],
[AC_DEFINE([HAVE_NSGETEXECUTABLEPATH], [1], [Used to find executable path on MacOS X])],
[AC_MSG_RESULT([no])])
# Check the version of strerror_r
AC_CHECK_HEADERS_ONCE([execinfo.h ext/stdio_filebuf.h])
AC_CHECK_HEADERS_ONCE([execinfo.h ext/stdio_filebuf.h sys/syscall.h])
AC_CHECK_MEMBER([siginfo_t.si_int],
[AC_DEFINE([HAVE_SI_INT], [1], [Define if siginfo_t.si_int exists])],
[], [[#include <signal.h>]])
......@@ -134,6 +138,9 @@ AM_CONDITIONAL(PYTHON_BINDING, [test -n "$enable_python_binding" -a x$enable_pyt
AM_COND_IF([PYTHON_BINDING],
[AS_IF([test x$enable_python_binding != xyes], [PYTHON_SITE_PKG=$enable_python_binding])]
[AX_PYTHON_DEVEL([], [$prefix])])
AC_ARG_ENABLE([python-deprecated],
[AC_HELP_STRING([--enable-python-deprecated], [enable the deprecated 'jellyfish' module (in addition to 'dna_jellyfish')])])
AM_CONDITIONAL([PYTHON_DEPRECATED], [test -z "$enable_python_deprecated" -o x$enable_python_deprecated != xno])
# Ruby binding setup
AS_IF([test -z "$enable_ruby_binding"], [enable_ruby_binding="$enable_all_binding"])
......
jellyfish (2.2.8-1) unstable; urgency=medium
* New upstream version
* Dropped rename-python-module-to-dna_jellyfish.patch as upstream has
incorporated it.
-- Michael R. Crusoe <michael.crusoe@gmail.com> Sun, 11 Feb 2018 09:21:51 -0800
jellyfish (2.2.7-1) unstable; urgency=medium
* New upstream version
......
......@@ -2,7 +2,7 @@ Source: jellyfish
Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.org>
Uploaders: Shaun Jackman <sjackman@debian.org>,
Andreas Tille <tille@debian.org>,
Michael R. Crusoe <crusoe@ucdavis.edu>
Michael R. Crusoe <michael.crusoe@gmail.com>
Section: science
Priority: optional
Build-Depends: debhelper (>= 11~),
......
......@@ -17,7 +17,7 @@ Author: Michael R. Crusoe <crusoe@ucdavis.edu>
- extra_link_args = jf_ldflags + jf_rpath,
+ extra_link_args = jf_ldflags,
language = "c++")
setup(name = 'jellyfish',
setup(name = 'dna_jellyfish',
version = '0.0.1',
--- jellyfish.orig/swig/Tuprules.tup
+++ jellyfish/swig/Tuprules.tup
......
......@@ -4,9 +4,9 @@ Bug-Debian: https://bugs.debian.org/871697
Description: Replace asm statements by C code to increase portability
Note: Does not work as described - needs verification
--- a/include/jellyfish/rectangular_binary_matrix.hpp
+++ b/include/jellyfish/rectangular_binary_matrix.hpp
@@ -276,13 +276,20 @@ namespace jellyfish {
--- jellyfish.orig/include/jellyfish/rectangular_binary_matrix.hpp
+++ jellyfish/include/jellyfish/rectangular_binary_matrix.hpp
@@ -302,13 +302,20 @@
// #pragma GCC diagnostic pop
// i is the lower 2 bits of x, and an index into the smear array. Compute res ^= smear[i] & p[j].
......@@ -29,7 +29,7 @@ Note: Does not work as described - needs verification
uint64_t i, j = 0, x = 0;
for(unsigned int w = 0; w < nb_words(); ++w) {
@@ -294,16 +301,16 @@ namespace jellyfish {
@@ -320,16 +327,16 @@
}
for( ; j > 7; j -= 8, p -= 8) {
i = (x & (uint64_t)0x3) << 4;
......@@ -50,7 +50,7 @@ Note: Does not work as described - needs verification
x >>= 2;
}
}
@@ -313,18 +320,19 @@ namespace jellyfish {
@@ -339,18 +346,19 @@
switch(j) {
case 6:
i = (x & (uint64_t)0x3) << 4;
......@@ -73,7 +73,7 @@ Note: Does not work as described - needs verification
uint64_t res1, res2;
asm("movd %[acc], %[res1]\n\t"
"psrldq $8, %[acc]\n\t"
@@ -332,6 +340,10 @@ namespace jellyfish {
@@ -358,6 +366,10 @@
: [res1]"=r"(res1), [res2]"=r"(res2)
: [acc]"x"(acc));
return res1 ^ res2;
......
Author: "Diego M. Rodriguez" <diego.plan9@gmail.com>
Last-Update: Tue, 22 Mar 2016 23:19:15 +0100
Bug-Debian: https://bugs.debian.org/819016
Subject: Rename python bindings module name
--- a/swig/python/setup.py
+++ b/swig/python/setup.py
@@ -12,13 +12,13 @@ from distutils.core import setup, Extens
swig_time = os.path.getmtime('../jellyfish.i')
older = True
try:
- older = os.path.getmtime('jellyfish_wrap.cxx') < swig_time or os.path.getmtime('jellyfish.py') < swig_time
+ older = os.path.getmtime('jellyfish_wrap.cxx') < swig_time or os.path.getmtime('dna_jellyfish.py') < swig_time
except FileNotFoundError:
older = True
if older:
- print("Running swig: swig -c++ -python -o jellyfish_wrap.cxx ../jellyfish.i")
- os.system("swig -c++ -python -o jellyfish_wrap.cxx ../jellyfish.i")
+ print("Running swig: swig -c++ -python -module dna_jellyfish -o jellyfish_wrap.cxx ../jellyfish.i")
+ os.system("swig -c++ -python -module dna_jellyfish -o jellyfish_wrap.cxx ../jellyfish.i")
jf_include = [re.sub(r'-I', '', x) for x in os.popen("pkg-config --cflags-only-I jellyfish-2.0").read().rstrip().split()]
jf_cflags = os.popen("pkg-config --cflags-only-other jellyfish-2.0").read().rstrip().split()
@@ -28,7 +28,7 @@ jf_libdir = [re.sub(r'-L', '', x) for x
jf_ldflags = os.popen("pkg-config --libs-only-other jellyfish-2.0").read().rstrip().split()
-jellyfish_module = Extension('_jellyfish',
+jellyfish_module = Extension('_dna_jellyfish',
sources = ['jellyfish_wrap.cxx'],
include_dirs = jf_include,
libraries = jf_libs,
@@ -36,9 +36,9 @@ jellyfish_module = Extension('_jellyfish
extra_compile_args = ["-std=c++0x"] + jf_cflags,
extra_link_args = jf_ldflags,
language = "c++")
-setup(name = 'jellyfish',
+setup(name = 'dna_jellyfish',
version = '0.0.1',
author = 'Guillaume Marcais',
description = 'Access to jellyfish k-mer counting',
ext_modules = [jellyfish_module],
- py_modules = ["jellyfish"])
+ py_modules = ["dna_jellyfish"])
--- a/swig/python/test_hash_counter.py
+++ b/swig/python/test_hash_counter.py
@@ -1,20 +1,20 @@
import unittest
import sys
import random
-import jellyfish
+import dna_jellyfish
class TestHashCounter(unittest.TestCase):
def setUp(self):
- jellyfish.MerDNA.k(100)
- self.hash = jellyfish.HashCounter(1024, 5)
+ dna_jellyfish.MerDNA.k(100)
+ self.hash = dna_jellyfish.HashCounter(1024, 5)
def test_info(self):
- self.assertEqual(100, jellyfish.MerDNA.k())
+ self.assertEqual(100, dna_jellyfish.MerDNA.k())
self.assertEqual(1024, self.hash.size())
self.assertEqual(5, self.hash.val_len())
def test_add(self):
- mer = jellyfish.MerDNA()
+ mer = dna_jellyfish.MerDNA()
good = True
for i in range(1000):
mer.randomize()
--- a/swig/python/test_mer_file.py
+++ b/swig/python/test_mer_file.py
@@ -1,4 +1,4 @@
-import jellyfish
+import dna_jellyfish
import unittest
import sys
import os
@@ -6,7 +6,7 @@ from collections import Counter
class TestMerFile(unittest.TestCase):
def setUp(self):
- self.mf = jellyfish.ReadMerFile(os.path.join(data, "swig_python.jf"))
+ self.mf = dna_jellyfish.ReadMerFile(os.path.join(data, "swig_python.jf"))
def test_histo(self):
histo = Counter()
@@ -46,7 +46,7 @@ class TestMerFile(unittest.TestCase):
def test_query(self):
good = True
- qf = jellyfish.QueryMerFile(os.path.join(data, "swig_python.jf"))
+ qf = dna_jellyfish.QueryMerFile(os.path.join(data, "swig_python.jf"))
for mer, count in self.mf:
good = good and count == qf[mer]
if not good: break
--- a/swig/python/test_string_mers.py
+++ b/swig/python/test_string_mers.py
@@ -1,21 +1,21 @@
import unittest
import sys
import random
-import jellyfish
+import dna_jellyfish
class TestStringMers(unittest.TestCase):
def setUp(self):
bases = "ACGTacgt"
self.str = ''.join(random.choice(bases) for _ in range(1000))
self.k = random.randint(10, 110)
- jellyfish.MerDNA.k(self.k)
+ dna_jellyfish.MerDNA.k(self.k)
def test_all_mers(self):
count = 0
good = True
- mers = jellyfish.string_mers(self.str)
+ mers = dna_jellyfish.string_mers(self.str)
for m in mers:
- m2 = jellyfish.MerDNA(self.str[count:count+self.k])
+ m2 = dna_jellyfish.MerDNA(self.str[count:count+self.k])
good = good and m == m2
count += 1
self.assertTrue(good)
@@ -23,9 +23,9 @@ class TestStringMers(unittest.TestCase):
def test_canonical_mers(self):
good = True
- mers = jellyfish.string_canonicals(self.str)
+ mers = dna_jellyfish.string_canonicals(self.str)
for count, m in enumerate(mers):
- m2 = jellyfish.MerDNA(self.str[count:count+self.k])
+ m2 = dna_jellyfish.MerDNA(self.str[count:count+self.k])
rm2 = m2.get_reverse_complement()
good = good and (m == m2 or m == rm2)
good = good and (not (m > m2)) and (not (m > rm2))
......@@ -3,7 +3,7 @@ pkg-config-with-name
drop-rpath
modern-g++
spelling
rename-python-module-to-dna_jellyfish.patch
gcc-7.patch
reproducible.patch
portability.patch
skip_small_mers.sh
Author: Michael R. Crusoe <michael.crusoe@gmail.com>
Description: Skip missing test file from upstream
--- jellyfish.orig/Makefile.am
+++ jellyfish/Makefile.am
@@ -146,8 +146,7 @@
TESTS = tests/generate_sequence.sh tests/parallel_hashing.sh \
tests/merge.sh tests/bloom_filter.sh tests/big.sh \
tests/subset_hashing.sh tests/multi_file.sh \
- tests/bloom_counter.sh tests/large_key.sh tests/sam.sh \
- tests/small_mers.sh
+ tests/bloom_counter.sh tests/large_key.sh tests/sam.sh
EXTRA_DIST += $(TESTS)
clean-local: clean-local-check
@@ -165,7 +164,6 @@
tests/large_key.log: tests/generate_sequence.log
tests/quality_filter.log: tests/generate_sequence.log
tests/sam.log: tests/generate_sequence.log
-tests/small_mers.log: tests/generate_sequence.log
# SWIG tests
TESTS += tests/swig_python.sh tests/swig_ruby.sh tests/swig_perl.sh
version=4
https://github.com/gmarcais/Jellyfish/releases .*/archive/v(\d[\d.-]+)\.(?:tar(?:\.gz|\.bz2)?|tgz)
https://github.com/gmarcais/Jellyfish/releases .*/jellyfish-(\d[\d.-]+)\.(?:tar(?:\.gz|\.bz2)?|tgz)
......@@ -45,6 +45,9 @@ public:
name += std::to_string((long long int)i); // Cast to make gcc4.4 happy!
const unsigned int r = root_[name]["r"].asUInt();
const unsigned int c = root_[name]["c"].asUInt();
if(root_[name]["identity"].asBool())
return RectangularBinaryMatrix::identity(r, c);
std::vector<uint64_t> raw(c, (uint64_t)0);
for(unsigned int i = 0; i < c; ++i)
raw[i] = root_[name]["columns"][i].asUInt64();
......@@ -57,11 +60,16 @@ public:
root_[name].clear();
root_[name]["r"] = m.r();
root_[name]["c"] = m.c();
if(m.is_low_identity()) {
root_[name]["identity"] = true;
} else {
root_[name]["identity"] = false;
for(unsigned int i = 0; i < m.c(); ++i) {
Json::UInt64 x = m[i];
root_[name]["columns"].append(x);
}
}
}
size_t size() const { return root_["size"].asLargestUInt(); }
void size(size_t s) { root_["size"] = (Json::UInt64)s; }
......
......@@ -204,8 +204,15 @@ protected:
bool double_size(bool serial_thread) {
if(serial_thread) {// Allocate new array for size doubling
try {
if(ary_->key_len() >= sizeof(size_t) * 8 || ary_->size() < ((size_t)1 << ary_->key_len())) {
// Increase number of keys
new_ary_ = new array(ary_->size() * 2, ary_->key_len(), ary_->val_len(),
ary_->max_reprobe(), ary_->reprobes());
} else {
// Array is already maximum compared to key len, increase val_len
new_ary_ = new array(ary_->size(), ary_->key_len(), ary_->val_len() + 1,
ary_->max_reprobe(), ary_->reprobes());
}
} catch(typename array::ErrorAllocation e) {
new_ary_ = 0;
}
......@@ -219,10 +226,6 @@ protected:
// Copy data from old to new
uint16_t id = atomic_t::fetch_add(&size_thid_, (uint16_t)1);
// Why doesn't the following work? Seems like a bug to
// me. Equivalent call works in test_large_hash_array. Or am I
// missing something?
// eager_iterator it = ary_->iterator_slice<eager_iterator>(id, nb_threads_);
eager_iterator it = ary_->eager_slice(id, nb_threads_);
while(it.next())
my_ary->add(it.key(), it.val());
......
......@@ -930,25 +930,37 @@ public:
};
template<typename Key, typename word = uint64_t, typename atomic_t = ::atomic::gcc, typename mem_block_t = ::allocators::mmap>
class array :
// Large array. Memory managed by the mmap allocator. Do not check the
// relation between the size of the array and key_len.
template<typename Key, typename word = uint64_t,
typename atomic_t = ::atomic::gcc, typename mem_block_t = ::allocators::mmap>
class unbounded_array :
protected mem_block_t,
public array_base<Key, word, atomic_t, array<Key, word, atomic_t, mem_block_t> >
public array_base<Key, word, atomic_t, unbounded_array<Key, word, atomic_t, mem_block_t> >
{
typedef array_base<Key, word, atomic_t, array<Key, word, atomic_t, mem_block_t> > super;
friend class array_base<Key, word, atomic_t, array<Key, word, atomic_t, mem_block_t> >;
typedef array_base<Key, word, atomic_t, unbounded_array<Key, word, atomic_t, mem_block_t> > super;
friend class array_base<Key, word, atomic_t, unbounded_array<Key, word, atomic_t, mem_block_t> >;
public:
array(size_t size, // Size of hash. To be rounded up to a power of 2
unbounded_array(size_t size, // Size of hash. To be rounded up to a power of 2
uint16_t key_len, // Size of key in bits
uint16_t val_len, // Size of val in bits
uint16_t reprobe_limit, // Maximum reprobe
const size_t* reprobes = quadratic_reprobes) : // Reprobing policy
mem_block_t(),
super(size, key_len, val_len, reprobe_limit, RectangularBinaryMatrix(ceilLog2(size), key_len).randomize_pseudo_inverse(),
const size_t* reprobes = quadratic_reprobes) // Reprobing policy
: super(size, key_len, val_len, reprobe_limit,
RectangularBinaryMatrix(ceilLog2(size), key_len).randomize_pseudo_inverse(),
reprobes)
{ }
unbounded_array(size_t size, // Size of hash. To be rounded up to a power of 2
uint16_t key_len, // Size of key in bits
uint16_t val_len, // Size of val in bits
uint16_t reprobe_limit, // Maximum reprobe
RectangularBinaryMatrix&& m, // Hashing matrix
const size_t* reprobes = quadratic_reprobes) // Reprobing policy
: super(size, key_len, val_len, reprobe_limit, m, reprobes)
{ }
protected:
word* alloc_data(size_t s) {
mem_block_t::realloc(s);
......@@ -956,6 +968,35 @@ protected:
}
};
// Large array. Memory managed by the mmap allocator, bound the size
// of the array if the key_len is small.
template<typename Key, typename word = uint64_t,
typename atomic_t = ::atomic::gcc, typename mem_block_t = ::allocators::mmap>
class array : public unbounded_array<Key, word, atomic_t, mem_block_t>
{
typedef unbounded_array<Key, word, atomic_t, mem_block_t> super;
static size_t key_len_size(uint16_t key_len) {
return key_len >= std::numeric_limits<size_t>::digits ? std::numeric_limits<size_t>::max() / 2 : (size_t)1 << key_len;
}
public:
array(size_t size, // Size of hash. To be rounded up to a power of 2
uint16_t key_len, // Size of key in bits
uint16_t val_len, // Size of val in bits
uint16_t reprobe_limit, // Maximum reprobe
const size_t* reprobes = quadratic_reprobes) : // Reprobing policy
super(std::min(size, key_len_size(key_len)), key_len, val_len, reprobe_limit,
(size < key_len_size(key_len))
? RectangularBinaryMatrix(ceilLog2(size), key_len).randomize_pseudo_inverse()
: RectangularBinaryMatrix::identity(key_len),
reprobes)
{
// std::cerr << this->size() << ' ' << this->val_len() << '\n';
}
};
struct ptr_info {
void* ptr_;
size_t bytes_;
......
......@@ -131,7 +131,7 @@ protected:
// streams_iterator_ noticed that we closed that stream before
// requesting a new one.
st.stream.reset();
st.stream = streams_iterator_.next();
st.stream = std::move(streams_iterator_.next());
if(!st.stream.good()) {
st.type = DONE_TYPE;
return false;
......
......@@ -41,19 +41,33 @@
// bits of each word are set to 0).
//
// Multiplication between a matrix and vector of size _c x 1 gives a
// vector of size _r x 1 stored as one 64 bit word.
// vector of size _r x 1 stored as one 64 bit word. A matrix with a
// NULL _columns pointer behaves like the identity.
namespace jellyfish {
class RectangularBinaryMatrix {
explicit RectangularBinaryMatrix(unsigned int c)
: _columns(NULL)
, _r(c)
, _c(c)
{ }
public:
RectangularBinaryMatrix(unsigned int r, unsigned c)
: _columns(alloc(r, c)), _r(r), _c(c) { }
RectangularBinaryMatrix(const RectangularBinaryMatrix &rhs)
: _columns(alloc(rhs._r, rhs._c)), _r(rhs._r), _c(rhs._c) {
: _columns(rhs._columns ? alloc(rhs._r, rhs._c) : NULL)
, _r(rhs._r)
, _c(rhs._c)
{
if(_columns)
memcpy(_columns, rhs._columns, sizeof(uint64_t) * _c);
}
RectangularBinaryMatrix(RectangularBinaryMatrix&& rhs) :
_columns(rhs._columns), _r(rhs._r), _c(rhs._c) {
RectangularBinaryMatrix(RectangularBinaryMatrix&& rhs)
: _columns(rhs._columns)
, _r(rhs._r)
, _c(rhs._c)
{
rhs._columns = 0;
}
// Initialize from raw data. raw must contain at least c words.
......@@ -67,6 +81,16 @@ namespace jellyfish {
free(_columns);
}
static RectangularBinaryMatrix identity(unsigned c) {
return RectangularBinaryMatrix(c);
}
static RectangularBinaryMatrix identity(unsigned r, unsigned c) {
RectangularBinaryMatrix res(r, c);
res.init_low_identity();
return res;
}
RectangularBinaryMatrix &operator=(const RectangularBinaryMatrix &rhs) {
if(_r != rhs._r || _c != rhs._c)
throw std::invalid_argument("RHS matrix dimensions do not match");
......@@ -90,7 +114,7 @@ namespace jellyfish {
}
// Get i-th column. No check on range
const uint64_t & operator[](unsigned int i) const { return _columns[i]; }
uint64_t operator[](unsigned int i) const { return _columns ? _columns[i] : ((uint64_t)1 << i); }
unsigned int r() const { return _r; }
unsigned int c() const { return _c; }
......@@ -112,8 +136,8 @@ namespace jellyfish {
// Make and check that the matrix the lower right corner of the
// identity.
void init_low_identity();
bool is_low_identity();
void init_low_identity(bool simplify = true);
bool is_low_identity() const;
// Left matrix vector multiplication. Type T supports the operator
// v[i] to return the i-th 64 bit word of v.
......@@ -204,6 +228,7 @@ namespace jellyfish {
template<typename T>
uint64_t RectangularBinaryMatrix::times_loop(const T &v) const {
if(!_columns) return v[0] & cmask();
uint64_t *p = _columns + _c - 1;
uint64_t res = 0, x = 0, j = 0;
const uint64_t one = (uint64_t)1;
......@@ -244,6 +269,7 @@ namespace jellyfish {
#ifdef HAVE_SSE
template<typename T>
uint64_t RectangularBinaryMatrix::times_sse(const T &v) const {
if(!_columns) return v[0] & cmask();
#define FFs ((uint64_t)-1)
static const uint64_t smear[8] asm("smear") __attribute__ ((aligned(16),used)) =
{0, 0, 0, FFs, FFs, 0, FFs, FFs};
......@@ -338,6 +364,7 @@ namespace jellyfish {
#ifdef HAVE_INT128
template<typename T>
uint64_t RectangularBinaryMatrix::times_128(const T &v) const {
if(!_columns) return v[0] & cmask();
typedef unsigned __int128 u128;
static const u128 smear[4] =
{ (u128)0,
......
......@@ -92,7 +92,7 @@ public:
protected:
void open_next_file(stream_status& st) {
st.stream.reset();
st.stream = streams_iterator_.next();
st.stream = std::move(streams_iterator_.next());
if(!st.stream.good()) {
st.type = DONE_TYPE;
return;
......
......@@ -16,7 +16,14 @@
#include <jellyfish/dbg.hpp>
#include <jellyfish/time.hpp>
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#ifdef HAVE_SYS_SYSCALL_H
#include <sys/syscall.h>
#endif
namespace dbg {
pthread_mutex_t print_t::_lock = PTHREAD_MUTEX_INITIALIZER;
......
......@@ -31,13 +31,20 @@ uint64_t *jellyfish::RectangularBinaryMatrix::alloc(unsigned int r, unsigned int
// Make sure the number of words allocated is a multiple of
// 8. Necessary for loop unrolling of vector multiplication
size_t alloc_columns = (c / 8 + (c % 8 != 0)) * 8;
if(posix_memalign(&mem, sizeof(uint64_t) * 2, alloc_columns * sizeof(uint64_t)))
// if(posix_memalign(&mem, sizeof(uint64_t) * 2, alloc_columns * sizeof(uint64_t)))
if(!(mem = aligned_alloc(sizeof(uint64_t) * 2, alloc_columns * sizeof(uint64_t))))
throw std::bad_alloc();
memset(mem, '\0', sizeof(uint64_t) * alloc_columns);
return (uint64_t *)mem;
}
void jellyfish::RectangularBinaryMatrix::init_low_identity() {
void jellyfish::RectangularBinaryMatrix::init_low_identity(bool simplify) {
if(!_columns) return;
if(_c == _r && simplify) {
free(_columns);
_columns = NULL;
return;
}
memset(_columns, '\0', sizeof(uint64_t) * _c);
unsigned int row = std::min(_c, _r);
unsigned int col = _c - row;
......@@ -46,7 +53,8 @@ void jellyfish::RectangularBinaryMatrix::init_low_identity() {
_columns[i] = _columns[i - 1] >> 1;
}
bool jellyfish::RectangularBinaryMatrix::is_low_identity() {
bool jellyfish::RectangularBinaryMatrix::is_low_identity() const {
if(!_columns) return true;
unsigned int row = std::min(_c, _r);
unsigned int col = _c - row;
......@@ -64,6 +72,9 @@ bool jellyfish::RectangularBinaryMatrix::is_low_identity() {
jellyfish::RectangularBinaryMatrix jellyfish::RectangularBinaryMatrix::pseudo_multiplication(const jellyfish::RectangularBinaryMatrix &rhs) const {
if(_r != rhs._r || _c != rhs._c)
throw std::domain_error("Matrices of different size");
if(!_columns) return rhs;
if(!rhs._columns) return *this;
RectangularBinaryMatrix res(_r, _c);
// v is a vector. The lower part is equal to the given column of rhs
......@@ -102,6 +113,8 @@ jellyfish::RectangularBinaryMatrix jellyfish::RectangularBinaryMatrix::pseudo_mu
}
unsigned int jellyfish::RectangularBinaryMatrix::pseudo_rank() const {
if(!_columns) return _c;
unsigned int rank = _c;
RectangularBinaryMatrix pivot(*this);
......@@ -136,8 +149,10 @@ unsigned int jellyfish::RectangularBinaryMatrix::pseudo_rank() const {
}
jellyfish::RectangularBinaryMatrix jellyfish::RectangularBinaryMatrix::pseudo_inverse() const {
if(!_columns) return *this;
RectangularBinaryMatrix pivot(*this);
RectangularBinaryMatrix res(_r, _c); res.init_low_identity();
RectangularBinaryMatrix res(_r, _c); res.init_low_identity(false);
unsigned int i, j;
uint64_t mask;
......@@ -186,12 +201,19 @@ jellyfish::RectangularBinaryMatrix jellyfish::RectangularBinaryMatrix::pseudo_in
}
void jellyfish::RectangularBinaryMatrix::print(std::ostream &os) const {
if(!_columns) {
for(unsigned int i = 0; i < _c; ++i) {
for(unsigned int j = 0; j < _c; ++j)
os << (i == j ? '1' : '0');
os << '\n';
}
} else {
uint64_t mask = (uint64_t)1 << (_r - 1);
for( ; mask; mask >>= 1) {
for(unsigned int j = 0; j < _c; ++j) {
os << (mask & _columns[j] ? "1" : "0");
for(unsigned int j = 0; j < _c; ++j)
os << (mask & _columns[j] ? '1' : '0');
os << '\n';
}
os << "\n";
}
}
......
......@@ -16,23 +16,27 @@ endif
# Python support
if PYTHON_BINDING
PYTHON_BUILT = swig/python/swig_wrap.cpp swig/python/jellyfish.py
PYTHON_BUILT = swig/python/swig_wrap.cpp swig/python/dna_jellyfish.py
BUILT_SOURCES += $(PYTHON_BUILT)
pythonextdir = $(PYTHON_SITE_PKG)/jellyfish
if PYTHON_DEPRECATED
pythonglobaldir = $(PYTHON_SITE_PKG)
pythonglobal_SCRIPTS = swig/python/jellyfish.py
endif
pythonextdir = $(PYTHON_SITE_PKG)/dna_jellyfish
pythonext_SCRIPTS = swig/python/__init__.pyc
pythonext_LTLIBRARIES = swig/python/_jellyfish.la
swig_python__jellyfish_la_SOURCES = swig/python/swig_wrap.cpp $(SWIG_SRC)
swig_python__jellyfish_la_CPPFLAGS = $(PYTHON_CPPFLAGS) -I$(srcdir)/include
swig_python__jellyfish_la_LDFLAGS = -module
swig_python__jellyfish_la_LIBADD = libjellyfish-2.0.la
pythonext_LTLIBRARIES = swig/python/_dna_jellyfish.la
swig_python__dna_jellyfish_la_SOURCES = swig/python/swig_wrap.cpp $(SWIG_SRC)
swig_python__dna_jellyfish_la_CPPFLAGS = $(PYTHON_CPPFLAGS) -I$(srcdir)/include
swig_python__dna_jellyfish_la_LDFLAGS = -module
swig_python__dna_jellyfish_la_LIBADD = libjellyfish-2.0.la
CLEANFILES += $(PYTHON_BUILT) $(pythonext_SCRIPTS)
PYTHONC_V_GEN = $(pythonc_v_GEN_$(V))
pythonc_v_GEN_ = $(pythonc_v_GEN_$(AM_DEFAULT_VERBOSITY))
pythonc_v_GEN_0 = @echo " PYTHONC " $@;
%/__init__.pyc: %/jellyfish.py
%/__init__.pyc: %/dna_jellyfish.py
$(PYTHONC_V_GEN)$(PYTHON) -c 'import py_compile, sys; py_compile.compile(sys.argv[1], sys.argv[2])' $< $@
swig/python/jellyfish.py: swig/python/swig_wrap.cpp
swig/python/dna_jellyfish.py: swig/python/swig_wrap.cpp
EXTRA_DIST += $(PYTHON_BUILT)
endif
......
#ifdef SWIGPYTHON
// Default Python loading code does not seem to work. Use our own.
%define MODULEIMPORT
"
import os
if os.path.basename(__file__) == \"__init__.pyc\" or os.path.basename(__file__) == \"__init__.py\":
import dna_jellyfish.$module
else:
import $module
"
%enddef
%module(docstring="Jellyfish binding", moduleimport=MODULEIMPORT) dna_jellyfish
#else
%module(docstring="Jellyfish binding") jellyfish
#endif
%naturalvar; // Use const reference instead of pointers
%include "std_string.i"
%include "exception.i"
%include "std_except.i"
%include "typemaps.i"
......@@ -8,7 +24,6 @@
%{
#ifdef SWIGPYTHON
#define SWIG_FILE_WITH_INIT
#endif
#ifdef SWIGPERL
......