Skip to content
Commits on Source (9)
gatb-core (1.4.1+git20190813.a73b6dd+dfsg-1) unstable; urgency=medium
* New upstream version
* debhelper-compat 12
* Standards-Version: 4.4.0
* Fix symbols file to match output from gcc9
Closes: #925690
* List missing dependencies explicitly
Closes: #931403
-- Andreas Tille <tille@debian.org> Tue, 20 Aug 2019 12:51:53 +0200
gatb-core (1.4.1+git20181225.44d5a44+dfsg-3) unstable; urgency=medium
* Fix symbols
......
......@@ -4,7 +4,7 @@ Uploaders: Nadiya Sitdykova <rovenskasa@gmail.com>,
Andreas Tille <tille@debian.org>
Section: science
Priority: optional
Build-Depends: debhelper (>= 12~),
Build-Depends: debhelper-compat (= 12),
d-shlibs,
cmake,
libcppunit-dev,
......@@ -13,7 +13,7 @@ Build-Depends: debhelper (>= 12~),
libjsoncpp-dev,
doxygen,
graphviz
Standards-Version: 4.3.0
Standards-Version: 4.4.0
Vcs-Browser: https://salsa.debian.org/med-team/gatb-core
Vcs-Git: https://salsa.debian.org/med-team/gatb-core.git
Homepage: https://github.com/GATB/gatb-core
......@@ -21,7 +21,9 @@ Homepage: https://github.com/GATB/gatb-core
Package: gatb-core
Architecture: any
Depends: ${shlibs:Depends},
${misc:Depends}
${misc:Depends},
libhdf5-103,
libcppunit-1.14-0
Description: Genome Analysis Toolbox with de-Bruijn graph
The GATB-CORE project provides a set of highly efficient
algorithms to analyse NGS data sets. These methods enable
......
This diff is collapsed.
......@@ -4,7 +4,7 @@ Description: Install examples into correct dir
--- a/gatb-core/CMakeLists.txt
+++ b/gatb-core/CMakeLists.txt
@@ -286,7 +286,7 @@ ENDIF()
@@ -283,7 +283,7 @@ ENDIF()
IF (NOT DEFINED GATB_CORE_EXCLUDE_EXAMPLES)
# add example snippets into binary archive (use by CPack directive)
......@@ -13,7 +13,7 @@ Description: Install examples into correct dir
ENDIF()
################################################################################
@@ -294,9 +294,10 @@ ENDIF()
@@ -291,9 +291,10 @@ ENDIF()
################################################################################
IF (NOT DEFINED GATB_CORE_INSTALL_EXCLUDE)
......
......@@ -69,11 +69,11 @@ Description: Use Debian packaged hdf5
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
set (debug 1)
@@ -211,16 +211,23 @@ set (gatb-core-flags ${LIBRARY_COMPILE_D
@@ -211,16 +211,22 @@ set (gatb-core-flags ${LIBRARY_COMPILE_D
set (gatb-core-includes ${PROJECT_BINARY_DIR}/include ${PROJECT_BINARY_DIR}/include/${CMAKE_BUILD_TYPE} ${PROJECT_SOURCE_DIR}/src ${PROJECT_SOURCE_DIR}/thirdparty ${gatb-core-extra-libraries-inc})
# We define the libraries used for linking binary based on gatb core
-set (gatb-core-libraries gatbcore-static dl pthread z hdf5 ${gatb-core-extra-libraries})
-set (gatb-core-libraries gatbcore-static dl pthread z hdf5-static ${gatb-core-extra-libraries})
+set (gatb-core-libraries gatbcore-static dl pthread z ${gatb-core-extra-libraries})
# We define the directory where to find cmake helpers
......@@ -95,11 +95,10 @@ Description: Use Debian packaged hdf5
+# target_link_libraries(name_of_program_or_library ${HDF5_LIBRARIES})
+set (gatb-core-libraries ${gatb-core-libraries} ${HDF5_LIBRARIES})
+MESSAGE(STATUS HDF5_LIBRARIES="${HDF5_LIBRARIES}")
+
################################################################################
# LIBRARY GENERATION
@@ -253,11 +260,13 @@ ENDIF()
@@ -253,11 +259,11 @@ ENDIF()
################################################################################
ADD_SUBDIRECTORY(thirdparty)
......@@ -107,18 +106,16 @@ Description: Use Debian packaged hdf5
-# DEPENDENCIES
-################################################################################
-# we must be sure that hdf5 is built and installed before building gatb-core
-ADD_DEPENDENCIES (gatbcore-static hdf5 hdf5_postbuild)
+
-ADD_DEPENDENCIES (gatbcore-static hdf5-static hdf5_postbuild)
+# NOTE... we have to duplicate the variables for the other scopes (in particular for sub directories)
+set (gatb-core-flags ${gatb-core-flags} PARENT_SCOPE)
+set (gatb-core-includes ${gatb-core-includes} PARENT_SCOPE)
+set (gatb-core-libraries ${gatb-core-libraries} PARENT_SCOPE)
+set (gatb-core-cmake ${gatb-core-cmake} PARENT_SCOPE)
+
################################################################################
# DOCUMENTATION GENERATION
@@ -288,7 +297,7 @@ IF (NOT DEFINED GATB_CORE_INSTALL_EXCLUD
@@ -288,7 +294,7 @@ IF (NOT DEFINED GATB_CORE_INSTALL_EXCLUD
INSTALL (FILES ${PROJECT_SOURCE_DIR}/doc/misc/README.txt DESTINATION . OPTIONAL)
INSTALL (FILES ${PROJECT_SOURCE_DIR}/LICENCE DESTINATION . OPTIONAL)
INSTALL (FILES ${PROJECT_SOURCE_DIR}/THIRDPARTIES.md DESTINATION . OPTIONAL)
......
......@@ -4,4 +4,8 @@ CMakeLists.txt.user
/.cproject
*.leon
.DS_Store
thirdparty/hdf5/src/H5Edefin.h
thirdparty/hdf5/src/H5Einit.h
thirdparty/hdf5/src/H5Epubgen.h
thirdparty/hdf5/src/H5Eterm.h
project(gatb-core)
# We set the required version
cmake_minimum_required (VERSION 3.1.0)
cmake_minimum_required (VERSION 3.10.0)
################################################################################
# The version number.
......@@ -211,7 +211,7 @@ set (gatb-core-flags ${LIBRARY_COMPILE_DEFINITIONS})
set (gatb-core-includes ${PROJECT_BINARY_DIR}/include ${PROJECT_BINARY_DIR}/include/${CMAKE_BUILD_TYPE} ${PROJECT_SOURCE_DIR}/src ${PROJECT_SOURCE_DIR}/thirdparty ${gatb-core-extra-libraries-inc})
# We define the libraries used for linking binary based on gatb core
set (gatb-core-libraries gatbcore-static dl pthread z hdf5 ${gatb-core-extra-libraries})
set (gatb-core-libraries gatbcore-static dl pthread z hdf5-static ${gatb-core-extra-libraries})
# We define the directory where to find cmake helpers
set (gatb-core-cmake ${CMAKE_MODULE_PATH})
......@@ -257,7 +257,7 @@ ADD_SUBDIRECTORY(thirdparty)
# DEPENDENCIES
################################################################################
# we must be sure that hdf5 is built and installed before building gatb-core
ADD_DEPENDENCIES (gatbcore-static hdf5 hdf5_postbuild)
ADD_DEPENDENCIES (gatbcore-static hdf5-static hdf5_postbuild)
################################################################################
# DOCUMENTATION GENERATION
......
......@@ -8,7 +8,7 @@ GATB=$(shell pwd)/../
GATB_LIB=$(GATB)/build/lib
CXXFLAGS = -std=c++0x -O3 -I$(GATB)/src -I$(GATB)/build/include -I$(GATB)/thirdparty
LDFLAGS= -L$(GATB_LIB) -lgatbcore -lpthread -lz -lhdf5 -std=c++0x -ldl -static
LDFLAGS= -L$(GATB_LIB) -lgatbcore -lpthread -lhdf5 -lz -std=c++0x -ldl -static
SRCS = $(wildcard *.cpp)
......
......@@ -58,7 +58,7 @@ typedef struct
{
gzFile stream;
unsigned char *buffer;
int buffer_start, buffer_end;
uint64_t buffer_start, buffer_end;
bool eof;
char last_char;
......@@ -79,7 +79,7 @@ struct variable_string_t
variable_string_t () : length(0), max(0), string(0) {}
~variable_string_t () { if (string!=0) { FREE(string); } }
int length, max;
uint64_t length, max;
char *string;
};
......@@ -433,7 +433,7 @@ inline signed int buffered_gets (
if (bf->buffer_start >= bf->buffer_end && bf->eof) return -1;
while (1)
{
int i;
uint64_t i;
if (bf->buffer_start >= bf->buffer_end) if (!rebuffer (bf)) break;
if (allow_spaces)
{
......@@ -452,6 +452,7 @@ inline signed int buffered_gets (
if (is_power_of_2(s->max)) { s->max ++; }
nearest_power_of_2(s->max);
s->string = (char*) REALLOC (s->string, s->max);
//std::cout << "realloc of size " << s->max << " res: " << (uint64_t)(s->string) << std::endl;
}
memcpy (s->string + s->length, bf->buffer + bf->buffer_start, i - bf->buffer_start);
s->length += i - bf->buffer_start;
......@@ -765,7 +766,7 @@ void BankFasta::Iterator::estimate (u_int64_t& number, u_int64_t& totalSize, u_i
{
// linear extrapolation
number = (number * _ref.getSize()) / actualPosition;
totalSize = (totalSize * _ref.getSize()) / actualPosition;
totalSize = totalSize * ( (float)_ref.getSize() / (float)actualPosition) ;
}
}
......
This diff is collapsed.
......@@ -233,7 +233,7 @@ void graph3<span>::debruijn(){
minusone.setVal(-1);
left.push_back({0,minusone, SEQ_LEFT}); // dummy kmer so that we dont need to check bounds.. clever..
right.push_back({0,minusone, SEQ_LEFT});
uint debug_index = 0;
//uint debug_index = 0;
for (uint32_t i = 0; i< indiceUnitigs; i++)
{
......@@ -255,7 +255,7 @@ void graph3<span>::debruijn(){
//~ if (debug_index > 0) if (kL.indice == debug_index || kR.indice == debug_index ) std::cout << " identical, kl / kR " << kL.indice << " " << kR.indice << " unitigs " << unitigs[kL.indice] << " " << unitigs[kR.indice] << " positions " << kL.position << " " << kR.position << std::endl;
//~ if(isNumber (unitigs[kL.indice][0])){
//~ }
if(not kL.indice==kR.indice){
if(kL.indice!=kR.indice){
update_connected(kL);
update_connected(kR);
}
......@@ -265,7 +265,7 @@ void graph3<span>::debruijn(){
++iL;++iR;
if(left[iL].kmmer==kL.kmmer){
go=false;
if(not left[iL].indice==right[iR].indice){
if(left[iL].indice!=right[iR].indice){
update_connected(left[iL]);
}
//~ while(left[++iL].kmmer<=kR.kmmer ){}
......@@ -277,7 +277,7 @@ void graph3<span>::debruijn(){
}
if(right[iR].kmmer==kL.kmmer){
go=false;
if(not left[iL].indice==right[iR].indice){
if(left[iL].indice!=right[iR].indice){
update_connected(right[iR]);
}
//~ while(right[++iR].kmmer<=kL.kmmer ){}
......
......@@ -151,8 +151,8 @@ struct FunctorNodes
void operator() (Node& node)
{
// We get branching nodes neighbors for the current node.
GraphVector<Node> successors = graph->template successors (node);
GraphVector<Node> predecessors = graph->template predecessors (node);
GraphVector<Node> successors = graph->successors (node);
GraphVector<Node> predecessors = graph->predecessors (node);
if ( ! (successors.size()==1 && predecessors.size()==1) )
{
......
......@@ -888,7 +888,9 @@ GraphUnitigsTemplate<span>::GraphUnitigsTemplate (tools::misc::IProperties* para
/** We create a storage instance. */
/* (this is actually loading, not creating, the storage at "uri") */
BaseGraph::_storageMode = load_from_hdf5 ? STORAGE_HDF5 : STORAGE_FILE;
BaseGraph::setStorage (StorageFactory(BaseGraph::_storageMode).create (input, false, false));
bool append = true; // in principle we wouldn't need to modify the .h5 file when building unitigs, but at some very rare locations in the code, we do (like nb_unitigs)
BaseGraph::setStorage (StorageFactory(BaseGraph::_storageMode).create (input, false, false, false, append));
/** We get some properties. */
BaseGraph::_state = (typename GraphUnitigsTemplate<span>::StateMask) atol (BaseGraph::getGroup().getProperty ("state").c_str());
......@@ -1742,6 +1744,10 @@ simplePathLongest_avance(const NodeGU& node, Direction dir, int& seqLength, int&
GraphVector<EdgeGU> in_neighbors_vec = this->neighborsEdge (neighbors[0].to, reverse(dir));
int in_neighbors = in_neighbors_vec.size();
if (in_neighbors == 0)
std:: cout << "simplePathLongest_avance stopped at " << toString(cur_node) << " because of surprising lack of in-neighbor given that we had to come from somewhere" << std::endl;
assert(in_neighbors >= 1);
/** We check we have no in-branching. */
if (in_neighbors > 1)
......
......@@ -80,7 +80,7 @@ struct NodeDepth
template<size_t span, typename Node, typename Edge, typename Graph>
bool IterativeExtensions<span, Node, Edge, Graph>::compare_and_mark_last_k_minus_one_mer (const string& node, set<kmer_type>& kmers_set)
{
KmerModel leftKmer = modelMinusOne.codeSeed (node.c_str(), Data::ASCII, node.size() - modelMinusOne.getKmerSize());
KmerModelDirect leftKmer = modelMinusOne.codeSeed (node.c_str(), Data::ASCII, node.size() - modelMinusOne.getKmerSize());
kmer_type kmer = leftKmer.value();
if (kmers_set.find(kmer) != kmers_set.end())
......@@ -115,7 +115,7 @@ IterativeExtensions<span, Node, Edge, Graph>::IterativeExtensions (
max_depth(max_depth), max_nodes(max_nodes)
{
model = Model (graph.getKmerSize() );
modelMinusOne = Model (graph.getKmerSize()-1);
modelMinusOne = ModelDirect (graph.getKmerSize()-1);
}
/*********************************************************************
......@@ -168,7 +168,7 @@ void IterativeExtensions<span, Node, Edge, Graph>::construct_linear_seqs (
#ifndef DONTMARK
set<kmer_type> already_extended_from;
// compare_and_mark_last_k_minus_one_mer(L, already_extended_from); // mark first kmer to never extend from it again, // L will be marked at first iteration below
compare_and_mark_last_k_minus_one_mer(L, already_extended_from); // mark first kmer to never extend from it again
#endif
/** We will need a Path object and a Sequence object during the extension. */
......@@ -271,13 +271,15 @@ void IterativeExtensions<span, Node, Edge, Graph>::construct_linear_seqs (
}
#ifndef DONTMARK
// make sure this is the only time we see this (k-1)-overlap
bool already_seen = compare_and_mark_last_k_minus_one_mer (graph.toString(ksd.node), already_extended_from);
if(len_right>0){ //==0 particular case of the first contig, contig already marked (before the loop), do not mark again, this will stop extension
// make sure this is the only time we see this (k-1)-overlap (end of the just built contig)
bool already_seen = compare_and_mark_last_k_minus_one_mer (graph.toString(endNode), already_extended_from);
if (already_seen)
{
INFO (("... XXX not extending node %s becaues last k-1-mer was already seen\n", seq.toString().c_str()));
continue;
}
}
#endif
// continue extending from immediately overlapping kmers
......
......@@ -46,6 +46,8 @@ public:
/** ShortcutS. */
typedef typename kmer::impl::Kmer<span>::ModelCanonical Model;
typedef typename kmer::impl::Kmer<span>::ModelCanonical::Kmer KmerModel;
typedef typename kmer::impl::Kmer<span>::ModelDirect ModelDirect;
typedef typename kmer::impl::Kmer<span>::ModelDirect::Kmer KmerModelDirect;
typedef typename kmer::impl::Kmer<span>::Type kmer_type;
/** Constructor.
......@@ -109,7 +111,7 @@ private:
int max_nodes;
Model model;
Model modelMinusOne;
ModelDirect modelMinusOne;
/** Fill a Sequence instance from the results of the current graph traversal.
* \param[in] node : starting node of the path
......
......@@ -30,7 +30,7 @@ using namespace gatb::core::system::impl;
namespace gatb { namespace core { namespace debruijn { namespace impl {
static constexpr int nb_passes = 8;
static void write_final_output(const string& unitigs_filename, bool verbose, BankFasta* out, uint64_t &nb_unitigs);
static void write_final_output(const string& unitigs_filename, bool verbose, BankFasta* out, uint64_t &nb_unitigs, bool renumber_unitigs);
static bool get_link_from_file(std::ifstream& input, std::string &link, uint64_t &unitig_id);
/* this procedure finds the overlaps between unitigs, using a hash table of all extremity (k-1)-mers
......@@ -42,30 +42,68 @@ namespace gatb { namespace core { namespace debruijn { namespace impl {
*
* so the memory usage is just that of the hash tables that record kmers, not of the links
*
* Assumption: FASTA header of unitigs starts with a unique number (unitig ID).
* Normally bcalm outputs consecutive unitig ID's but LinkTigs could also work with non-consecutive, non-sorted IDs
* Two modes of operation:
*
* renumber_unitigs == true: FASTA header can be anything. Useful for any program that has removed some unitigs, e.g. merci.
* LinkTigs will take the header and split it into space-separated fields, remove the first field and keep the remaining ones.
* The first field will be replaced by numbered IDs in consecutive order, between 0 and |nb_unitigs|-1.
*
* renumber_unitigs == false: then FASTA headers of unitigs _needs_ to start with a unique number (unitig ID).
* Normally bcalm outputs consecutive unitig ID's but LinkTigs can also work with non-consecutive, non-sorted IDs
*/
template<size_t span>
void link_tigs(string unitigs_filename, int kmerSize, int nb_threads, uint64_t &nb_unitigs, bool verbose)
void link_tigs(string unitigs_filename, int kmerSize, int nb_threads, uint64_t &nb_unitigs, bool verbose, bool renumber_unitigs)
{
bcalm_logging = verbose;
BankFasta* out = new BankFasta(unitigs_filename+".indexed");
if (kmerSize < 4) { std::cout << "error, recent optimizations (specifically link_unitigs) don't support k<5 for now" << std::endl; exit(1); }
BankFasta* out = new BankFasta(unitigs_filename+".linked");
if (kmerSize < 4) { std::cout << "error, link_unitigs doesn't support k<5, sorry. Contact a developer if you really need k<4 support (alternatively: construct that tiny dBG using Python :)" << std::endl; exit(1); }
logging("Finding links between unitigs");
for (int pass = 0; pass < nb_passes; pass++)
link_unitigs_pass<span>(unitigs_filename, verbose, pass, kmerSize);
link_unitigs_pass<span>(unitigs_filename, verbose, pass, kmerSize, renumber_unitigs);
write_final_output(unitigs_filename, verbose, out, nb_unitigs);
write_final_output(unitigs_filename, verbose, out, nb_unitigs, renumber_unitigs);
delete out;
system::impl::System::file().remove (unitigs_filename);
system::impl::System::file().rename (unitigs_filename+".indexed", unitigs_filename);
system::impl::System::file().rename (unitigs_filename+".linked", unitigs_filename);
logging("Done finding links between unitigs");
}
/* strip L:'s from a comment line*/
static string remove_previous_links(string &header)
{
bool debug = false;
if (debug) std::cout << "parsing unitig links for " << header << std::endl;
string res = "";
std::stringstream stream(header);
while(1) {
string tok;
stream >> tok;
if(!stream)
break;
string field = tok.substr(0,2);
if (field != "L:")
{
res += tok + " ";
}
}
//res = res.substr(0,res.size()-2);
if (debug) std::cout << "returning " << res<< std::endl;
return res;
}
/* keep all the comment line except the first field*/
static string strip_first_field(string &header)
{
return header.substr(header.find(' ')+1);
}
/*
* takes all the prefix.links.* files, sorted by unitigs.
* do a n-way merge to gather the links for each unitig in unitig order
......@@ -73,7 +111,7 @@ void link_tigs(string unitigs_filename, int kmerSize, int nb_threads, uint64_t &
*/
static void write_final_output(const string& unitigs_filename, bool verbose, BankFasta* out, uint64_t &nb_unitigs)
static void write_final_output(const string& unitigs_filename, bool verbose, BankFasta* out, uint64_t &nb_unitigs, bool renumber_unitigs)
{
logging("gathering links from disk");
std::ifstream* inputLinks[nb_passes];
......@@ -88,6 +126,7 @@ static void write_final_output(const string& unitigs_filename, bool verbose, Ban
string cur_links, seq, comment;
seq = itSeq->toString();
comment = itSeq->getComment();
comment = remove_previous_links(comment);
for (int pass = 0; pass < nb_passes; pass++)
{
......@@ -102,6 +141,10 @@ static void write_final_output(const string& unitigs_filename, bool verbose, Ban
uint64_t last_unitig = 0;
nb_unitigs = 0; // passed variable
bool first_one = true;
if (renumber_unitigs)
comment = to_string(nb_unitigs) + " " + strip_first_field(comment);
// nb_passes-way merge sort
while ((!finished.all()) || pq.size() > 0)
......@@ -110,6 +153,12 @@ static void write_final_output(const string& unitigs_filename, bool verbose, Ban
int pass = get<1>(cur);
uint64_t unitig = get<0>(cur);
if (first_one) // handles the case where first unitig isn't labeled 0
{
last_unitig = unitig;
first_one = false;
}
if (unitig != last_unitig)
{
Sequence s (Data::ASCII);
......@@ -123,6 +172,9 @@ static void write_final_output(const string& unitigs_filename, bool verbose, Ban
itSeq.next();
seq = itSeq->toString();
comment = itSeq->getComment();
comment = remove_previous_links(comment);
if (renumber_unitigs)
comment = to_string(nb_unitigs) + " " + strip_first_field(comment);
}
cur_links += get<2>(cur);
......@@ -213,12 +265,13 @@ static void record_links(uint64_t utig_id, int pass, const string &link, std::of
template<size_t span>
void link_unitigs_pass(const string unitigs_filename, bool verbose, const int pass, const int kmerSize)
void link_unitigs_pass(const string unitigs_filename, bool verbose, const int pass, const int kmerSize, const bool renumber_unitigs)
{
typedef typename kmer::impl::Kmer<span>::ModelCanonical Model;
typedef typename kmer::impl::Kmer<span>::Type Type;
bool debug = false;
uint64_t utig_counter = 0;
BankFasta inputBank (unitigs_filename);
BankFasta::Iterator itSeq (inputBank);
......@@ -240,6 +293,8 @@ void link_unitigs_pass(const string unitigs_filename, bool verbose, const int pa
const string& comment = itSeq->getComment();
unsigned long utig_id = std::stoul(comment.substr(0, comment.find(' ')));
if (renumber_unitigs) utig_id = utig_counter;
if (is_in_pass(seq, pass, UNITIG_BEGIN, kmerSize))
{
if (debug) std::cout << "pass " << pass << " examining beginning" << std::endl;
......@@ -257,6 +312,8 @@ void link_unitigs_pass(const string unitigs_filename, bool verbose, const int pa
utigs_links_map[kmerEnd.value()].push_back(eEnd.pack());
// there is no UNITIG_BOTH here because we're taking (k-1)-mers.
}
utig_counter++;
}
std::ofstream links_file(unitigs_filename+".links." +to_string(pass));
......@@ -264,6 +321,9 @@ void link_unitigs_pass(const string unitigs_filename, bool verbose, const int pa
uint64_t nb_hashed_entries = 0;
for (auto v : utigs_links_map)
nb_hashed_entries += v.second.size();
utig_counter = 0;
logging("step 2 (" + to_string(utigs_links_map.size()) + "kmers/" + to_string(nb_hashed_entries) + "extremities)");
for (itSeq.first(); !itSeq.isDone(); itSeq.next())
......@@ -272,6 +332,8 @@ void link_unitigs_pass(const string unitigs_filename, bool verbose, const int pa
const string& comment = itSeq->getComment();
unsigned long utig_id = std::stoul(comment.substr(0, comment.find(' ')));
if (renumber_unitigs) utig_id = utig_counter;
if (debug) std::cout << "unitig " << std::to_string(utig_id) << " : " << seq << std::endl;
if (is_in_pass(seq, pass, UNITIG_BEGIN, kmerSize))
......@@ -378,6 +440,8 @@ void link_unitigs_pass(const string unitigs_filename, bool verbose, const int pa
}
record_links(utig_id, pass, out_links, links_file);
}
utig_counter++;
}
}
......
......@@ -30,10 +30,10 @@ namespace gatb { namespace core { namespace debruijn { namespace impl {
template<size_t SPAN>
void link_tigs( std::string prefix, int kmerSize, int nb_threads, uint64_t &nb_unitigs, bool verbose);
void link_tigs( std::string prefix, int kmerSize, int nb_threads, uint64_t &nb_unitigs, bool verbose, bool renumber_unitigs = false);
template<size_t span>
void link_unitigs_pass(const std::string unitigs_filename, bool verbose, const int pass, const int kmerSize);
void link_unitigs_pass(const std::string unitigs_filename, bool verbose, const int pass, const int kmerSize, const bool renumber_unitigs);
}}}}
......
......@@ -1108,7 +1108,8 @@ void Simplifications<GraphType,Node,Edge>::heuristic_most_covered_path_unitigs(
current_node = neighbors[0].to;
if (_graph.degree(current_node, reverse(dir)) <= 1)
{
std::cout << "Weird, there was supposed to be an in-neighbor. Maybe there's a loop. Remove this print if it never happens" << std::endl;
// actually this happens for kmers that are linked to themselves (e.g. k=5, AAATT, is connected to its own revcomp AATTT, so it's fine to end the simple path here on those cases)
//std::cout << "Weird, there was supposed to be an in-neighbor. Maybe there's a loop. Remove this print if it never happens" << "in degree " << _graph.degree(current_node, reverse(dir)) << " node" << _graph.toString(current_node) << std::endl;
return;
}
......@@ -1280,7 +1281,7 @@ unsigned long Simplifications<GraphType,Node,Edge>::removeBulges()
unsigned int k = _graph.getKmerSize();
unsigned int maxBulgeLength = std::max((unsigned int)((double)k * _bulgeLen_kMult), (unsigned int)(k + _bulgeLen_kAdd)); // SPAdes, exactly
unsigned int backtrackingLimit = k+_bulgeAltPath_kAdd;//maxBulgeLength; // arbitrary, but if too high it will take much time; // with unitigs, no reason that it has to depend on k, but for some reason, setting it to just "k" doesnt remove nearly as many bulges as k=20. todo investigate that someday.
unsigned int altPathCovMult = _bulgeAltPath_covMult;
double altPathCovMult = _bulgeAltPath_covMult;
// stats
//
......
......@@ -66,7 +66,7 @@ public:
double _bulgeLen_kMult;
unsigned int _bulgeLen_kAdd;
unsigned int _bulgeAltPath_kAdd;
unsigned int _bulgeAltPath_covMult;
double _bulgeAltPath_covMult;
double _ecLen_kMult;
double _ecRCTCcutoff;
......