Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille
--- a/debian/changelog
+++ b/debian/changelog
+gatb-core (1.4.1+git20181225.44d5a44+dfsg-1) unstable; urgency=medium
+
+  * New upstream checkout
+  * Adapt symbols file to new upstream code
+  * Bump SOVERSION due to ABI changes
+
+ -- Andreas Tille <tille@debian.org>  Thu, 24 Jan 2019 11:50:33 +0100
+
 gatb-core (1.4.1+git20180206.6f8fce8+dfsg-2) unstable; urgency=medium

  * Provide symbols for amd64 only

--- a/debian/control
+++ b/debian/control
@@ -35,7 +35,7 @@ Description: Genome Analysis Toolbox with de-Bruijn graph
 exist a set of ready-to-use tools relying on GATB-CORE
 library: see https://gatb.inria.fr/software/

-Package: libgatbcore1
+Package: libgatbcore2
 Architecture: any
 Section: libs
 Depends: ${shlibs:Depends},
@@ -60,7 +60,7 @@ Architecture: any
 Section: libdevel
 Depends: ${shlibs:Depends},
         ${misc:Depends},
-         libgatbcore1 (= ${binary:Version})
+         libgatbcore2 (= ${binary:Version})
 Description: development library of the Genome Analysis Toolbox
 The GATB-CORE project provides a set of highly efficient
 algorithms to analyse NGS data sets. These methods enable

--- a/debian/libgatbcore1.symbols.amd64
+++ b/debian/libgatbcore1.symbols.amd64
--- a/debian/patches/0c580ab.patch
+++ b/debian/patches/0c580ab.patch
-From 0c580ab9e5f4a848c9c245060415e9acfe1655c7 Mon Sep 17 00:00:00 2001
-From: rchikhi <rayan.chikhi@univ-lille1.fr>
-Date: Mon, 24 Dec 2018 14:57:48 +0100
-Origin: https://github.com/GATB/gatb-core/commit/0c580ab.patch
-Subject: [PATCH] fixed bug #37 where unitigs are not output with consecutive
- IDs, and also made sure that even if they're not, links are robust to that
-
---
- gatb-core/src/gatb/bcalm2/bglue_algo.cpp      | 19 ++++----------
- gatb-core/src/gatb/bcalm2/bglue_algo.hpp      | 15 +++++++++--
- gatb-core/src/gatb/debruijn/impl/LinkTigs.cpp | 25 +++++++++++--------
- 3 files changed, 32 insertions(+), 27 deletions(-)
-
-diff --git a/gatb-core/src/gatb/bcalm2/bglue_algo.cpp b/gatb-core/src/gatb/bcalm2/bglue_algo.cpp
-index 6256282e..c12d1f79 100644
--- a/gatb-core/src/gatb/bcalm2/bglue_algo.cpp
-+++ b/gatb-core/src/gatb/bcalm2/bglue_algo.cpp
-@@ -904,9 +904,7 @@ void bglue(Storage *storage,
-   
-     // setup output file
-     string output_prefix = prefix;
-    std::atomic<unsigned long> out_id; // identified for output sequences
-    out_id = 0;
-    BufferedFasta out (output_prefix, 100000);
-+    BufferedFasta out (output_prefix, 100000, true);
- 
-     auto get_UFclass = [&modelCanon, &ufkmers_vector, &hasher, &uf_mphf]
-         (const string &kmerBegin, const string &kmerEnd,
-@@ -966,7 +964,7 @@ void bglue(Storage *storage,
-     std::mutex mtx; // lock to avoid a nasty bug when calling output()
-     auto partitionGlue = [k, &modelCanon /* crashes if copied!*/, \
-         &get_UFclass, &gluePartitions,
-        &out, &outLock, &nb_seqs_in_partition, &out_id, nbGluePartitions, &mtx]
-+        &out, &outLock, &nb_seqs_in_partition, nbGluePartitions, &mtx]
-             (const Sequence& sequence)
-     {
-         const string &seq = sequence.toString();
-@@ -992,12 +990,8 @@ void bglue(Storage *storage,
-             float mean_abundance = get_mean_abundance(abundances);
-             uint32_t sum_abundances = get_sum_abundance(abundances);
-             
-            // for some reason i do need that lock_guard here.. even though output is itself lock guarded. maybe some lazyness in the evauation of the to_string(out_id++)? who kon
-            // anyway this fixes the problem, i'll understand it some other time.
-            std::lock_guard<std::mutex> lock(mtx);
-            output(seq, out, std::to_string(out_id++) + " LN:i:" + to_string(seq.size()) + " KC:i:" + to_string(sum_abundances) + " km:f:" + to_string_with_precision(mean_abundance)); 
-             // km is not a standard GFA field so i'm putting it in lower case as per the spec
-            // maybe could optimize by writing to disk using queues, if that's ever a bottleneck
-+            output(seq, out, "LN:i:" + to_string(seq.size()) + " KC:i:" + to_string(sum_abundances) + " km:f:" + to_string_with_precision(mean_abundance)); 
-             return;
-         }
- 
-@@ -1051,7 +1045,7 @@ void bglue(Storage *storage,
-     for (int partition = 0; partition < nbGluePartitions; partition++)
-     {
-         auto glue_partition = [&modelCanon, &ufkmers, partition, &gluePartition_prefix, nbGluePartitions, &copy_nb_seqs_in_partition,
-        &get_UFclass, &out, &outLock, &out_id, kmerSize, &mtx]( int thread_id)
-+        &get_UFclass, &out, &outLock, kmerSize, &mtx]( int thread_id)
-         {
-             int k = kmerSize;
- 
-@@ -1140,10 +1134,7 @@ void bglue(Storage *storage,
-                 float mean_abundance = get_mean_abundance(abs);
-                 uint32_t sum_abundances = get_sum_abundance(abs);
-                 {
-                    // for some reason i do need that lock_guard here.. even though output is itself lock guarded. maybe some lazyness in the evauation of the to_string(out_id++)? who kon
-                    // anyway this fixes the problem, i'll understand it some other time.
-                    std::lock_guard<std::mutex> lock(mtx);
-                    output(seq, out, std::to_string(out_id++) + " LN:i:" + to_string(seq.size()) + " KC:i:" + to_string(sum_abundances) + " km:f:" + to_string_with_precision(mean_abundance)); 
-+                    output(seq, out, "LN:i:" + to_string(seq.size()) + " KC:i:" + to_string(sum_abundances) + " km:f:" + to_string_with_precision(mean_abundance));
-                 }
-             }
-                 
-diff --git a/gatb-core/src/gatb/bcalm2/bglue_algo.hpp b/gatb-core/src/gatb/bcalm2/bglue_algo.hpp
-index 50ca9176..7783215f 100644
--- a/gatb-core/src/gatb/bcalm2/bglue_algo.hpp
-+++ b/gatb-core/src/gatb/bcalm2/bglue_algo.hpp
-@@ -43,16 +43,21 @@ class BufferedFasta
-         std::string buffer;
-         unsigned long buffer_length;
-         FILE* _insertHandle;
-+        bool needs_consecutive_ids;
-+        unsigned long consecutive_id;
- 
-     public:
-         unsigned long max_buffer;
-         bool threadsafe;
- 
-        BufferedFasta(const std::string filename, unsigned long given_max_buffer = 50000)
-+        BufferedFasta(const std::string filename, unsigned long given_max_buffer = 50000, bool given_needs_consecutive_ids = false)
-         {
-             max_buffer = given_max_buffer; // that much of buffering will be written to the file at once (in bytes)
-             threadsafe = true;
-             buffer_length = 0;
-+            consecutive_id = 0;
-+            needs_consecutive_ids = given_needs_consecutive_ids;
-+            if ((!threadsafe) && needs_consecutive_ids) { std::cout << "error, non-threadsafe bufferedfasta with consecutive ids asked" << std::endl; exit(1);}
-             _insertHandle = fopen (filename.c_str(), "w");
-             if (!_insertHandle) { std::cout << "error opening " << filename << " for writing." << std::endl; exit(1);}
-             buffer.reserve(max_buffer+1000/*security*/);
-@@ -73,7 +78,13 @@ class BufferedFasta
-             if (buffer_length + insert_size > max_buffer)
-                 flush();
-             buffer_length += insert_size;
-            buffer += ">" + comment + "\n" + seq + "\n";
-+            buffer += ">";
-+            if (needs_consecutive_ids)
-+            {
-+                buffer += std::to_string(consecutive_id) + " "; 
-+                consecutive_id++;
-+            }
-+            buffer += comment + "\n" + seq + "\n";
-             if (threadsafe)
-                 mtx.unlock();
-         }
-diff --git a/gatb-core/src/gatb/debruijn/impl/LinkTigs.cpp b/gatb-core/src/gatb/debruijn/impl/LinkTigs.cpp
-index a13668ac..b21d164e 100644
--- a/gatb-core/src/gatb/debruijn/impl/LinkTigs.cpp
-+++ b/gatb-core/src/gatb/debruijn/impl/LinkTigs.cpp
-@@ -41,6 +41,9 @@ namespace gatb { namespace core { namespace debruijn { namespace impl  {
-  * it uses the disk to store the links for extremities until they're merged into the final unitigs file. 
-  *
-  * so the memory usage is just that of the hash tables that record kmers, not of the links
-+ *
-+ * Assumption: FASTA header of unitigs starts with a unique number (unitig ID). 
-+ * Normally bcalm outputs consecutive unitig ID's but LinkTigs could also work with non-consecutive, non-sorted IDs
-  */
- template<size_t span>
- void link_tigs(string unitigs_filename, int kmerSize, int nb_threads, uint64_t &nb_unitigs, bool verbose)
-@@ -219,7 +222,6 @@ void link_unitigs_pass(const string unitigs_filename, bool verbose, const int pa
- 
-     BankFasta inputBank (unitigs_filename);
-     BankFasta::Iterator itSeq (inputBank);
-    uint64_t utig_counter = 0;
-     
-     Model modelKminusOne(kmerSize - 1); // it's canonical (defined in the .hpp file)
-     
-@@ -235,12 +237,15 @@ void link_unitigs_pass(const string unitigs_filename, bool verbose, const int pa
-         const string& seq = itSeq->toString();
-         if (debug) std::cout << "unitig: " << seq << std::endl;
- 
-+        const string& comment = itSeq->getComment();        
-+        unsigned long utig_id = std::stoul(comment.substr(0, comment.find(' ')));
-+        
-         if (is_in_pass(seq, pass, UNITIG_BEGIN, kmerSize))
-         { 
-             if (debug) std::cout << "pass " << pass << " examining beginning" << std::endl;
-             typename Model::Kmer kmerBegin = modelKminusOne.codeSeed(seq.substr(0, kmerSize-1).c_str(), Data::ASCII);
-             bool beginInSameOrientation = modelKminusOne.toString(kmerBegin.value()) == seq.substr(0,kmerSize-1);
-            ExtremityInfo eBegin(utig_counter, !beginInSameOrientation /* because we record rc*/, UNITIG_BEGIN);
-+            ExtremityInfo eBegin(utig_id, !beginInSameOrientation /* because we record rc*/, UNITIG_BEGIN);
-             utigs_links_map[kmerBegin.value()].push_back(eBegin.pack());
-         }
-         if (is_in_pass(seq, pass, UNITIG_END, kmerSize))
-@@ -248,11 +253,10 @@ void link_unitigs_pass(const string unitigs_filename, bool verbose, const int pa
-             if (debug) std::cout << "pass " << pass << " examining end" << std::endl;
-             typename Model::Kmer kmerEnd = modelKminusOne.codeSeed(seq.substr(seq.size() - kmerSize+1).c_str(), Data::ASCII);
-             bool endInSameOrientation = modelKminusOne.toString(kmerEnd.value()) == seq.substr(seq.size() - kmerSize+1);
-            ExtremityInfo eEnd(  utig_counter, !endInSameOrientation,                             UNITIG_END);
-+            ExtremityInfo eEnd(  utig_id, !endInSameOrientation,                             UNITIG_END);
-             utigs_links_map[kmerEnd.value()].push_back(eEnd.pack());
-             // there is no UNITIG_BOTH here because we're taking (k-1)-mers.
-         }
-        utig_counter++;
-     }
- 
-     std::ofstream links_file(unitigs_filename+".links." +to_string(pass));
-@@ -262,12 +266,13 @@ void link_unitigs_pass(const string unitigs_filename, bool verbose, const int pa
-         nb_hashed_entries += v.second.size(); 
-     logging("step 2 (" + to_string(utigs_links_map.size()) + "kmers/" + to_string(nb_hashed_entries) + "extremities)");
- 
-    utig_counter = 0;
-     for (itSeq.first(); !itSeq.isDone(); itSeq.next()) 
-     {
-         const string& seq = itSeq->toString();
-        
-        if (debug) std::cout << "unitig: " << seq << std::endl;
-+        const string& comment = itSeq->getComment();        
-+        unsigned long utig_id = std::stoul(comment.substr(0, comment.find(' ')));
-+
-+        if (debug) std::cout << "unitig "  << std::to_string(utig_id)  << " : " << seq << std::endl;
-  
-         if (is_in_pass(seq, pass, UNITIG_BEGIN, kmerSize))
-         {
-@@ -324,7 +329,7 @@ void link_unitigs_pass(const string unitigs_filename, bool verbose, const int pa
-                 if (debug) std::cout << std::endl;
-             }
-             
-            record_links(utig_counter, pass, in_links, links_file);
-+            record_links(utig_id, pass, in_links, links_file);
-         }
- 
- 
-@@ -371,10 +376,8 @@ void link_unitigs_pass(const string unitigs_filename, bool verbose, const int pa
-                 }
-                 if (debug) std::cout << std::endl;
-             }
-            record_links(utig_counter, pass, out_links, links_file);
-+            record_links(utig_id, pass, out_links, links_file);
-         }
-
-        utig_counter++;
-     }
- }
- 
--- a/debian/patches/44d5a44.patch
+++ b/debian/patches/44d5a44.patch
-From 44d5a444f7d8b921b66065ef2d6270d3eb0f1446 Mon Sep 17 00:00:00 2001
-From: rchikhi <rayan.chikhi@univ-lille1.fr>
-Date: Tue, 25 Dec 2018 01:05:20 +0100
-Origin: https://github.com/GATB/gatb-core/commit/44d5a44.patch
-Subject: [PATCH] fixed kmer counting of IUPAC nucleotides
-
---
- gatb-core/src/gatb/tools/misc/api/Data.cpp |  4 ++++
- gatb-core/src/gatb/tools/misc/api/Data.hpp | 21 +++++++++++++++++----
- 2 files changed, 21 insertions(+), 4 deletions(-)
- create mode 100644 gatb-core/src/gatb/tools/misc/api/Data.cpp
-
-diff --git a/gatb-core/src/gatb/tools/misc/api/Data.cpp b/gatb-core/src/gatb/tools/misc/api/Data.cpp
-new file mode 100644
-index 00000000..cddc7e38
--- /dev/null
-+++ b/gatb-core/src/gatb/tools/misc/api/Data.cpp
-@@ -0,0 +1,4 @@
-+#include <gatb/tools/misc/api/Data.hpp>
-+
-+const unsigned char gatb::core::tools::misc::Data::validNucleotide[]= {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
-+
-diff --git a/gatb-core/src/gatb/tools/misc/api/Data.hpp b/gatb-core/src/gatb/tools/misc/api/Data.hpp
-index 0c39e726..22e174a3 100644
--- a/gatb-core/src/gatb/tools/misc/api/Data.hpp
-+++ b/gatb-core/src/gatb/tools/misc/api/Data.hpp
-@@ -170,17 +170,30 @@ class Data : public Vector<char>
-      *  - second : 0 if valid, 1 if invalid (in case of N character for instance) */
-     typedef std::pair<char,char> ConvertChar;
- 
-    /** Note for the ASCII conversion: the 4th bit is used to tell whether it is invalid or not.
-     * => it finds out that 'N' character has this 4th bit equals to 1, which is not the case
-     * for 'A', 'C', 'G' and 'T'. */
-    struct ConvertASCII    { static ConvertChar get (const char* buffer, size_t idx)  { return ConvertChar((buffer[idx]>>1) & 3, (buffer[idx]>>3) & 1); }};
-+    /** Used to have the following trick:
-+     *   (buffer[idx]>>3)& 1
-+     * was equal to 1 for 'A', 'C', 'G' and 'T' (and also the lower case version)
-+     * and was equal to 0 for 'N' and 'n' 
-+     * but unfortunately it doesn't work for some of the IUPAC codes, like 'R' 
-+     * */
-+    struct ConvertASCII    { static ConvertChar get (const char* buffer, size_t idx)  { return ConvertChar((buffer[idx]>>1) & 3, validNucleotide[(unsigned char)(buffer[idx])]); }};
-     struct ConvertInteger  { static ConvertChar get (const char* buffer, size_t idx)  { return ConvertChar(buffer[idx],0); }         };
-     struct ConvertBinary   { static ConvertChar get (const char* buffer, size_t idx)  { return ConvertChar(((buffer[idx>>2] >> ((3-(idx&3))*2)) & 3),0); } };
- 
-+    static const unsigned char validNucleotide[];
- private:
- 
-     /** Encoding scheme of the data instance. */
-     Encoding_e  encoding;
-+
-+    /* generated using:
-+     * codes=[1]*256
-+     * codes[ord('A')] = codes[ord('a')] = 0;
-+     * codes[ord('C')] = codes[ord('c')] = 0;
-+     * codes[ord('G')] = codes[ord('g')] = 0;
-+     * codes[ord('T')] = codes[ord('t')] = 0;
-+     * print(codes)
-+     */
- };
- 
- /********************************************************************************/
--- a/debian/patches/5d45664.patch
+++ b/debian/patches/5d45664.patch
-From 5d4566430e0f708b602d26e0b54bd21dd4d7852b Mon Sep 17 00:00:00 2001
-From: rchikhi <rayan.chikhi@univ-lille1.fr>
-Date: Sat, 20 Oct 2018 10:30:53 +0200
-Origin: https://github.com/GATB/gatb-core/commit/5d45664.patch
-Subject: [PATCH] fix typo gatb/minia#11
-
---
- gatb-core/src/gatb/kmer/impl/SortingCountAlgorithm.cpp | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/gatb-core/src/gatb/kmer/impl/SortingCountAlgorithm.cpp b/gatb-core/src/gatb/kmer/impl/SortingCountAlgorithm.cpp
-index 96cf3b2f..a86969f5 100644
--- a/gatb-core/src/gatb/kmer/impl/SortingCountAlgorithm.cpp
-+++ b/gatb-core/src/gatb/kmer/impl/SortingCountAlgorithm.cpp
-@@ -212,7 +212,7 @@ IOptionsParser* SortingCountAlgorithm<span>::getOptionsParser (bool mandatory)
-     parser->push_back (new OptionOneParam (STR_KMER_ABUNDANCE_MIN_THRESHOLD,"min abundance hard threshold (only used when min abundance is \"auto\")",false, "2"));
-     parser->push_back (new OptionOneParam (STR_HISTOGRAM_MAX,     "max number of values in kmers histogram",        false, "10000"));
-     parser->push_back (new OptionOneParam (STR_SOLIDITY_KIND,     "way to compute counts of several files (sum, min, max, one, all, custom)",false, "sum"));
-	parser->push_back (new OptionOneParam (STR_SOLIDITY_CUSTOM,   "when solidity-kind is cutom, specifies list of files where kmer must be present",false, ""));
-+	parser->push_back (new OptionOneParam (STR_SOLIDITY_CUSTOM,   "when solidity-kind is custom, specifies list of files where kmer must be present",false, ""));
-     parser->push_back (new OptionOneParam (STR_MAX_MEMORY,        "max memory (in MBytes)",                         false, "5000"));
-     parser->push_back (new OptionOneParam (STR_MAX_DISK,          "max disk   (in MBytes)",                         false, "0"));
-     parser->push_back (new OptionOneParam (STR_URI_SOLID_KMERS,   "output file for solid kmers (only when constructing a graph)", false));
--- a/debian/patches/61c5ee1.patch
+++ b/debian/patches/61c5ee1.patch
-From 61c5ee19d44812f506258c22177c684936f8fd6f Mon Sep 17 00:00:00 2001
-From: rchikhi <rayan.chikhi@univ-lille1.fr>
-Date: Mon, 22 Oct 2018 11:48:08 +0200
-Origin: https://github.com/GATB/gatb-core/commit/61c5ee1.patch
-Subject: [PATCH] small bugfix
-
---
- gatb-core/test/unit/src/debruijn/TestDebruijn.cpp | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/gatb-core/test/unit/src/debruijn/TestDebruijn.cpp b/gatb-core/test/unit/src/debruijn/TestDebruijn.cpp
-index 1be96659..8d12e1fa 100644
--- a/gatb-core/test/unit/src/debruijn/TestDebruijn.cpp
-+++ b/gatb-core/test/unit/src/debruijn/TestDebruijn.cpp
-@@ -1298,7 +1298,7 @@ class TestDebruijn : public Test
- 
-         Node n2 = graph.reverse (n1);
-         CPPUNIT_ASSERT (n2.strand == STRAND_REVCOMP);
-        CPPUNIT_ASSERT (graph.toString(n2).compare ("TCAG") == 0 ||  graph.toString(n1).compare ("TGGA") == 0);
-+        CPPUNIT_ASSERT (graph.toString(n2).compare ("TCAG") == 0 ||  graph.toString(n2).compare ("TGGA") == 0);
-     }
- 
- 
--- a/debian/patches/da4ceed.patch
+++ b/debian/patches/da4ceed.patch
-From da4ceed164c761ad264b5b0b4e6745a9d9e33ea0 Mon Sep 17 00:00:00 2001
-From: rchikhi <rayan.chikhi@univ-lille1.fr>
-Date: Mon, 24 Dec 2018 15:12:41 +0100
-Origin: https://github.com/GATB/gatb-core/commit/da4ceed.patch
-Subject: [PATCH] minor cleanup
-
---
- gatb-core/src/gatb/bcalm2/bglue_algo.cpp | 5 ++---
- 1 file changed, 2 insertions(+), 3 deletions(-)
-
-diff --git a/gatb-core/src/gatb/bcalm2/bglue_algo.cpp b/gatb-core/src/gatb/bcalm2/bglue_algo.cpp
-index c12d1f79..6c1ec575 100644
--- a/gatb-core/src/gatb/bcalm2/bglue_algo.cpp
-+++ b/gatb-core/src/gatb/bcalm2/bglue_algo.cpp
-@@ -961,10 +961,9 @@ void bglue(Storage *storage,
- 
- 
-     // partition the glue into many files, à la dsk
-    std::mutex mtx; // lock to avoid a nasty bug when calling output()
-     auto partitionGlue = [k, &modelCanon /* crashes if copied!*/, \
-         &get_UFclass, &gluePartitions,
-        &out, &outLock, &nb_seqs_in_partition, nbGluePartitions, &mtx]
-+        &out, &outLock, &nb_seqs_in_partition, nbGluePartitions]
-             (const Sequence& sequence)
-     {
-         const string &seq = sequence.toString();
-@@ -1045,7 +1044,7 @@ void bglue(Storage *storage,
-     for (int partition = 0; partition < nbGluePartitions; partition++)
-     {
-         auto glue_partition = [&modelCanon, &ufkmers, partition, &gluePartition_prefix, nbGluePartitions, &copy_nb_seqs_in_partition,
-        &get_UFclass, &out, &outLock, kmerSize, &mtx]( int thread_id)
-+        &get_UFclass, &out, &outLock, kmerSize]( int thread_id)
-         {
-             int k = kmerSize;
- 
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -6,8 +6,3 @@ set_soversion.patch
 dynamic_linking_of_tools.patch
 multiarch.patch
 spelling.patch
-#44d5a44.patch
-#da4ceed.patch
-#0c580ab.patch
-#61c5ee1.patch
-#5d45664.patch
--- a/debian/patches/set_soversion.patch
+++ b/debian/patches/set_soversion.patch
@@ -9,7 +9,7 @@ Description: Invent soversion 0 since upstream does not set soversion
 
 set_target_properties  (gatbcore-static   PROPERTIES OUTPUT_NAME  gatbcore  clean_direct_output 1)
 -set_target_properties (gatbcore-dynamic  PROPERTIES OUTPUT_NAME  gatbcore  clean_direct_output 1)
-+set_target_properties (gatbcore-dynamic  PROPERTIES OUTPUT_NAME  gatbcore  SOVERSION 1   clean_direct_output 1)
+set_target_properties (gatbcore-dynamic  PROPERTIES OUTPUT_NAME  gatbcore  SOVERSION 2   clean_direct_output 1)
 
 ################################################################################
 #  INSTALLATION 
--- a/debian/watch
+++ b/debian/watch
 version=4

-# New files on Github
-opts="repacksuffix=+dfsg,dversionmangle=s/\+dfsg//g,repack,compression=xz" \
-https://github.com/GATB/gatb-core/releases .*/archive/v(\d[\d.-]+)\.(?:tar(?:\.gz|\.bz2)?|tgz)
+# Minia does not work with the official release - try HEAD
+opts="mode=git,pretty=1.4.1+git%cd.%h,repacksuffix=+dfsg,dversionmangle=s/\+dfsg//g,repack,compression=xz" \
+    https://github.com/GATB/gatb-core.git HEAD
+
+# Regular releases
+#opts="repacksuffix=+dfsg,dversionmangle=s/\+dfsg//g,repack,compression=xz" \
+#  https://github.com/GATB/gatb-core/releases .*/archive/v(\d[\d.-]+)\.(?:tar(?:\.gz|\.bz2)?|tgz)


--- a/gatb-core/.gitignore
+++ b/gatb-core/.gitignore
+CMakeLists.txt.user
+/build*
+/.settings/
+/.cproject
+*.leon
+.DS_Store
+
--- a/gatb-core/cmake/CppUnit.cmake
+++ b/gatb-core/cmake/CppUnit.cmake
@@ -3,6 +3,7 @@
 ################################################################################

 FIND_PATH (CPPUNIT_INCLUDE_DIR cppunit/extensions/HelperMacros.h
+  $ENV{HOME}/include
  $ENV{HOME}/.linuxbrew/include
  /local/include
  /usr/include
@@ -18,10 +19,12 @@ FIND_LIBRARY (CPPUNIT_LIBRARY cppunit
 )

 # A little hack here... We check whether the static library is reachable too.
+IF (EXISTS "${CPPUNIT_INCLUDE_DIR}/cppunit/extensions/HelperMacros.h")
    if (NOT EXISTS "${CPPUNIT_INCLUDE_DIR}/../lib/libcppunit.a")
        message ("-- CppUnit: found headers (in ${CPPUNIT_INCLUDE_DIR}) but not the static library (${CPPUNIT_INCLUDE_DIR}/../lib/libcppunit.a)")
        SET (CPPUNIT_NO_STATIC_LIB_FOUND 1)   
    endif()
+endif()

 IF (CPPUNIT_INCLUDE_DIR)
    IF (CPPUNIT_LIBRARY)

--- a/gatb-core/doc/book/.gitignore
+++ b/gatb-core/doc/book/.gitignore
+/main.aux
+/main.log
+/main.pdf
--- a/gatb-core/examples/bank/bank18.cpp
+++ b/gatb-core/examples/bank/bank18.cpp
@@ -42,12 +42,16 @@ int main (int argc, char* argv[])
        IBank* bank2 = Bank::open (options->getStr(STR_BANK2));  LOCAL (bank2);

        // We iterate the two banks. Note how we provide two iterators from the two banks.
-        PairedIterator<Sequence> itPair (bank1->iterator(), bank2->iterator());
+        PairedIterator<Sequence> *  itPair  = new  PairedIterator<Sequence> (bank1->iterator(), bank2->iterator());
+		LOCAL(itPair);

-        for (itPair.first(); !itPair.isDone(); itPair.next())
+		ProgressIterator< std::pair <Sequence, Sequence>> progress_iter (itPair, "coucou la paire", bank1->estimateNbItems());
+		
+
+        for (progress_iter.first(); !progress_iter.isDone(); progress_iter.next())
        {
-            Sequence& s1 = itPair.item().first;
-            Sequence& s2 = itPair.item().second;
+            Sequence& s1 = itPair->item().first;
+            Sequence& s2 = itPair->item().second;

            std::cout << "seq1.len=" << s1.getDataSize() << " seq2.len=" << s2.getDataSize() << std::endl;
        }

--- a/gatb-core/examples/protos/histo2D.cpp
+++ b/gatb-core/examples/protos/histo2D.cpp
+/* 
+ * KAT-like analysis of a genome assembly
+ *
+ * given 2 datasets
+ * outputs the 2D histogram of kmers
+ *
+ * must give 2 datasets, first readset, then genome
+ * Sample command: histo2D -in readset.fasta,genome.fasta -kmer-size 25 > resultfile
+ *
+ */
+#include <gatb/gatb_core.hpp>
+using namespace std;
+
+int dim1 = 10000; // max kmer occurence in read set
+int dim2 = 10;    // max kmer occurence in genome
+
+#define IDX(i,j) ((i) + (j)*dim1)
+
+/********************************************************************************/
+template<size_t span>
+class CountProcessorCustom : public CountProcessorAbstract<span>
+{
+public:
+
+    // We need the kmer size to dump kmers values as nucl strings
+    CountProcessorCustom (size_t kmerSize, ISynchronizer* synchro, u_int64_t * histo_table_2D) : kmerSize(kmerSize), synchro(synchro), _histo_table_2D(histo_table_2D) {}
+
+    virtual ~CountProcessorCustom () {}
+
+    CountProcessorAbstract<span>* clone ()  { return new CountProcessorCustom<span>(kmerSize, synchro,_histo_table_2D); }
+
+    virtual bool process (size_t partId, const typename Kmer<span>::Type& kmer, const CountVector& count, CountNumber sum)
+    {
+		if( count[0]  < dim1  &&   count[1]  < dim2)
+			__sync_fetch_and_add ( & _histo_table_2D[IDX( count[0] , count[1] )], 1);
+        return true;
+    }
+
+private:
+    size_t kmerSize;
+    ISynchronizer *synchro;
+	u_int64_t * _histo_table_2D;
+	
+};
+
+/********************************************************************************/
+template<size_t span>  struct MainLoop  {  void operator () (IProperties* options)
+{
+
+	//check that the user gave 2 input banks
+	std::string banknames = options->getStr(STR_URI_INPUT);
+	size_t n = std::count(banknames.begin(), banknames.end(), ',');
+	if( (n+1) != 2)
+	{
+		printf("There must be 2 input banks.\n");
+		exit(1);
+	}
+	
+	u_int64_t * histo_table_2D = (u_int64_t *) calloc( dim1 *  dim2 , sizeof(u_int64_t *));
+	
+    // We force the solidity kind (otherwise default value "sum" will consider N banks as a single one)
+    options->setStr(STR_SOLIDITY_KIND, "all");
+
+    // We create a SortingCountAlgorithm instance.
+    SortingCountAlgorithm<span> algo (options);
+
+    // global synchronization
+    ISynchronizer* synchro = System::thread().newSynchronizer();
+
+    // We create a custom count processor and give it to the sorting count algorithm
+    algo.addProcessor (new CountProcessorCustom<span> (options->getInt(STR_KMER_SIZE), synchro,histo_table_2D));
+
+    // We launch the algorithm
+    algo.execute();
+
+	//outptut the 2D histogram
+	for(int ii=0; ii< dim1; ii++)
+	{
+		printf("%5i:\t",ii);
+		for(int jj=0; jj< dim2; jj++)
+		{
+			printf("\t%6lli",histo_table_2D[IDX(ii,jj)]);
+		}
+		printf("\n");
+	}
+	free(histo_table_2D);
+	
+}};
+
+/********************************************************************************/
+int main (int argc, char* argv[])
+{
+    // We create a command line parser for the sorting count algorithm
+    IOptionsParser* parser = SortingCountAlgorithm<>::getOptionsParser ();
+    parser->push_back (new OptionOneParam (STR_NB_CORES, "nb cores",  false, "1"));
+    parser->push_back (new OptionOneParam (STR_VERBOSE,  "verbosity", false, "1"));
+
+    // We launch our functor
+    return Algorithm::mainloop <MainLoop> (parser, argc, argv);
+}
--- a/gatb-core/src/gatb/bank/api/Sequence.hpp
+++ b/gatb-core/src/gatb/bank/api/Sequence.hpp
@@ -138,6 +138,30 @@ struct Sequence
     * \param[in] qual : quality string of the sequence. */
    void setQuality (const std::string& qual)  { _quality = qual; }

+    /** Returns a string that is the reverse complement of the sequence
+     *  The Sequence object needs to be in ASCII Format
+     **/
+    std::string getRevcomp () const {
+        std::string rc;
+        std::string s = toString();
+        for (signed int i = s.length() - 1; i >= 0; i--) {
+            unsigned char c;
+            switch(s[i]){
+                case 'A': c='T';break;
+                case 'C': c='G';break;
+                case 'G': c='C';break;
+                case 'T': c='A';break;
+                case 'a': c='t';break;
+                case 'c': c='g';break;
+                case 'g': c='c';break;
+                case 't': c='a';break;
+                default:  c='X';break;
+            }
+            rc += c;
+        }
+        return rc;
+    }
+
    /** Comment attribute (note: should be private with a setter and getter). */
    std::string _comment;


--- a/gatb-core/src/gatb/bcalm2/.gitignore
+++ b/gatb-core/src/gatb/bcalm2/.gitignore
+buffer_*
+ctpl_*
--- a/gatb-core/src/gatb/bcalm2/bglue_algo.cpp
+++ b/gatb-core/src/gatb/bcalm2/bglue_algo.cpp
@@ -253,8 +253,9 @@ static void determine_order_sequences(vector<vector<uint32_t>> &res, const vecto
        kmerIndex[markedSequences[i].ke].insert(i);
    }

-    auto glue_from_extremity = [&](markedSeq<SPAN> current, uint32_t chain_index, uint32_t markedSequence_index)
+    auto glue_from_extremity = [&](markedSeq<SPAN> current, uint32_t chain_index, uint32_t markedSequence_index, bool expect_circular=false)
    {
+        uint32_t first_index = markedSequence_index;
        vector<uint32_t> chain;
        chain.push_back(chain_index);

@@ -308,10 +309,23 @@ static void determine_order_sequences(vector<vector<uint32_t>> &res, const vecto

            current = successor;
            markedSequence_index = successor_index;
-            chain.push_back(chain_index);
-            rmark = current.rmark;
+
+            if (expect_circular)
+            {
+                if (usedSeq.find(markedSequence_index) != usedSeq.end())
+                {
+                    assert(markedSequence_index == first_index);
+                    break;
+                }
+            }
+            else
+            {
               assert((usedSeq.find(markedSequence_index) == usedSeq.end()));
+            }
+
            usedSeq.insert(markedSequence_index);
+            chain.push_back(chain_index);
+            rmark = current.rmark;
            }

        res.push_back(chain);
@@ -351,23 +365,26 @@ static void determine_order_sequences(vector<vector<uint32_t>> &res, const vecto

    }

-    /* // attempt to fix circular contigs, but I was in a hurry, so not finished
-    while (nb_chained < sequences.size())
-    {
-        // there might be a special case: a circular unitig, to be glued with multiple sequences all containing doubled kmers at extremities
+    // special case: a circular unitig, to be glued with multiple sequences all containing doubled kmers at extremities
    // my fix plan: we pick an extremity at random, and chop the last nucleotide and mark it to not be glued. also find the corresponding kmer in other extremity, and mark it as not to be glued
-
+    while (nb_chained < markedSequences.size())
+    {
+        //std::cout << "nb chained " << nb_chained << " markedseq size" << markedSequences.size() << std::endl;
        vector<int> remaining_indices;
        for (uint32_t i = 0; i < markedSequences.size(); i++)
        {
-            if (usedSeq.find(i) != usedSeq.end())
+            if (usedSeq.find(i) == usedSeq.end())
                remaining_indices.push_back(i);
        }
-        uint32_t chain_index = remaining_indices[0];
-        string kmer = markedSequences[chain_index].substr(0,kmerSize);
-       // markedSequences[chain_index] = // TODO continue
+
+        assert(remaining_indices.size()>0);
+
+        markedSeq<SPAN> current = markedSequences[remaining_indices[0]];
+        uint32_t chain_index = markedSequences[remaining_indices[0]].index;
+        
+        glue_from_extremity(current, chain_index, remaining_indices[0], true);
    }
-    */
+    
    if (nb_chained < markedSequences.size())
    {
        std::cout << " Note: " << markedSequences.size() - nb_chained << " sequence chunks not returned in output unitigs (likely small circular contigs)" << std::endl;
@@ -732,14 +749,13 @@ void bglue(Storage *storage,
    }
    if (uf_hashes.size() == 0) // prevent an edge case when there's nothing to glue, boophf doesn't like it
        uf_hashes.push_back(0);
-    for (int pass = 0; pass < nb_prepare_passes; pass++)
-        System::file().remove (prefix+".glue.hashes." + to_string(pass));

    unsigned long nb_uf_keys = uf_hashes.size();
-    logging("loaded all unique UF elements (" + std::to_string(nb_uf_keys) + ") into a single file vector of size " + to_string(nb_uf_keys* sizeof(partition_t) / 1024/1024) + " MB");
+    logging("loaded all unique UF elements (" + std::to_string(nb_uf_keys) + ") into a single vector of size " + to_string(nb_uf_keys* sizeof(partition_t) / 1024/1024) + " MB");

    int gamma = 3; // make it even faster.
 
+    // TODO: why not iterate the UF hashes from disk instead of loading them in memory?
    boomphf::mphf<partition_t , /*TODO we don't need hasher_t here now that we're not hashing kmers, but I forgot to change*/ hasher_t< partition_t> > uf_mphf(nb_uf_keys, uf_hashes, nb_threads, gamma, verbose);

    free_memory_vector(uf_hashes);
@@ -750,9 +766,9 @@ void bglue(Storage *storage,
        logging("UF MPHF constructed (" + std::to_string(uf_mphf_memory/8/1024/1024) + " MB)" );
    }

-
    // create a UF data structure
-    unionFind<uint32_t> ufkmers(nb_uf_keys);
+    // this one stores nb_uf_keys * uint64_t (actually, atomic's). so it's bigger than uf_hashes
+    unionFind ufkmers(nb_uf_keys);

 #if 0
    unionFind<unsigned int> ufmin;
@@ -843,6 +859,11 @@ void bglue(Storage *storage,

    logging("UF constructed");

+    // remove the UF hashes, could have done that earlier (right before bbhash), but i'd like to keep them in case of segfault
+    for (int pass = 0; pass < nb_prepare_passes; pass++)
+        System::file().remove (prefix+".glue.hashes." + to_string(pass));
+
+
    if (debug_uf_stats) // for debugging
    {
        ufkmers.printStats("uf kmers");
@@ -883,9 +904,7 @@ void bglue(Storage *storage,
  
    // setup output file
    string output_prefix = prefix;
-    std::atomic<unsigned long> out_id; // identified for output sequences
-    out_id = 0;
-    BufferedFasta out (output_prefix, 100000);
+    BufferedFasta out (output_prefix, 100000, true);

    auto get_UFclass = [&modelCanon, &ufkmers_vector, &hasher, &uf_mphf]
        (const string &kmerBegin, const string &kmerEnd,
@@ -942,10 +961,9 @@ void bglue(Storage *storage,


    // partition the glue into many files, à la dsk
-    std::mutex mtx; // lock to avoid a nasty bug when calling output()
    auto partitionGlue = [k, &modelCanon /* crashes if copied!*/, \
        &get_UFclass, &gluePartitions,
-        &out, &outLock, &nb_seqs_in_partition, &out_id, nbGluePartitions, &mtx]
+        &out, &outLock, &nb_seqs_in_partition, nbGluePartitions]
            (const Sequence& sequence)
    {
        const string &seq = sequence.toString();
@@ -971,12 +989,8 @@ void bglue(Storage *storage,
            float mean_abundance = get_mean_abundance(abundances);
            uint32_t sum_abundances = get_sum_abundance(abundances);
            
-            // for some reason i do need that lock_guard here.. even though output is itself lock guarded. maybe some lazyness in the evauation of the to_string(out_id++)? who kon
-            // anyway this fixes the problem, i'll understand it some other time.
-            std::lock_guard<std::mutex> lock(mtx);
-            output(seq, out, std::to_string(out_id++) + " LN:i:" + to_string(seq.size()) + " KC:i:" + to_string(sum_abundances) + " km:f:" + to_string_with_precision(mean_abundance)); 
            // km is not a standard GFA field so i'm putting it in lower case as per the spec
-            // maybe could optimize by writing to disk using queues, if that's ever a bottleneck
+            output(seq, out, "LN:i:" + to_string(seq.size()) + " KC:i:" + to_string(sum_abundances) + " km:f:" + to_string_with_precision(mean_abundance)); 
            return;
        }

@@ -1030,7 +1044,7 @@ void bglue(Storage *storage,
    for (int partition = 0; partition < nbGluePartitions; partition++)
    {
        auto glue_partition = [&modelCanon, &ufkmers, partition, &gluePartition_prefix, nbGluePartitions, &copy_nb_seqs_in_partition,
-        &get_UFclass, &out, &outLock, &out_id, kmerSize, &mtx]( int thread_id)
+        &get_UFclass, &out, &outLock, kmerSize]( int thread_id)
        {
            int k = kmerSize;

@@ -1078,7 +1092,7 @@ void bglue(Storage *storage,

                markedSeq<SPAN> ms(seq_index, lmark, rmark, kmmerBegin.value(), kmmerEnd.value());

-                //if (ufclass == 38145) std::cout << " ufclass " << ufclass << " seq " << seq << " seq index " << seq_index << " " << lmark << rmark << " ks " << kmmerBegin.value() << " ke " << kmmerEnd.value() << std::endl; // debug specific partition
+                //std::cout << " ufclass " << ufclass << " seq " << seq << " seq index " << seq_index << " " << lmark << rmark << " ks " << kmmerBegin.value() << " ke " << kmmerEnd.value() << std::endl; // debug specific partition
                msInPart[ufclass].push_back(ms);
                seq_index++;
            }
@@ -1119,10 +1133,7 @@ void bglue(Storage *storage,
                float mean_abundance = get_mean_abundance(abs);
                uint32_t sum_abundances = get_sum_abundance(abs);
                {
-                    // for some reason i do need that lock_guard here.. even though output is itself lock guarded. maybe some lazyness in the evauation of the to_string(out_id++)? who kon
-                    // anyway this fixes the problem, i'll understand it some other time.
-                    std::lock_guard<std::mutex> lock(mtx);
-                    output(seq, out, std::to_string(out_id++) + " LN:i:" + to_string(seq.size()) + " KC:i:" + to_string(sum_abundances) + " km:f:" + to_string_with_precision(mean_abundance)); 
+                    output(seq, out, "LN:i:" + to_string(seq.size()) + " KC:i:" + to_string(sum_abundances) + " km:f:" + to_string_with_precision(mean_abundance));
                }
            }
                

--- a/gatb-core/src/gatb/bcalm2/bglue_algo.hpp
+++ b/gatb-core/src/gatb/bcalm2/bglue_algo.hpp
@@ -43,16 +43,21 @@ class BufferedFasta
        std::string buffer;
        unsigned long buffer_length;
        FILE* _insertHandle;
+        bool needs_consecutive_ids;
+        unsigned long consecutive_id;

    public:
        unsigned long max_buffer;
        bool threadsafe;

-        BufferedFasta(const std::string filename, unsigned long given_max_buffer = 50000)
+        BufferedFasta(const std::string filename, unsigned long given_max_buffer = 50000, bool given_needs_consecutive_ids = false)
        {
            max_buffer = given_max_buffer; // that much of buffering will be written to the file at once (in bytes)
            threadsafe = true;
            buffer_length = 0;
+            consecutive_id = 0;
+            needs_consecutive_ids = given_needs_consecutive_ids;
+            if ((!threadsafe) && needs_consecutive_ids) { std::cout << "error, non-threadsafe bufferedfasta with consecutive ids asked" << std::endl; exit(1);}
            _insertHandle = fopen (filename.c_str(), "w");
            if (!_insertHandle) { std::cout << "error opening " << filename << " for writing." << std::endl; exit(1);}
            buffer.reserve(max_buffer+1000/*security*/);
@@ -73,7 +78,13 @@ class BufferedFasta
            if (buffer_length + insert_size > max_buffer)
                flush();
            buffer_length += insert_size;
-            buffer += ">" + comment + "\n" + seq + "\n";
+            buffer += ">";
+            if (needs_consecutive_ids)
+            {
+                buffer += std::to_string(consecutive_id) + " "; 
+                consecutive_id++;
+            }
+            buffer += comment + "\n" + seq + "\n";
            if (threadsafe)
                mtx.unlock();
        }