Skip to content
Commits on Source (7)
gatb-core (1.4.0+dfsg-1) UNRELEASED; urgency=medium
gatb-core (1.4.1+dfsg-1) UNRELEASED; urgency=medium
* Initial release (Closes: #873044)
-- Andreas Tille <tille@debian.org> Tue, 28 Nov 2017 10:37:22 +0100
-- Andreas Tille <tille@debian.org> Mon, 14 May 2018 21:36:32 +0200
......@@ -4,7 +4,7 @@ Uploaders: Nadiya Sitdykova <rovenskasa@gmail.com>,
Andreas Tille <tille@debian.org>
Section: science
Priority: optional
Build-Depends: debhelper (>= 10),
Build-Depends: debhelper (>= 11~),
d-shlibs,
cmake,
libcppunit-dev,
......@@ -13,9 +13,9 @@ Build-Depends: debhelper (>= 10),
libjsoncpp-dev,
doxygen,
graphviz
Standards-Version: 4.1.1
Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/gatb-core.git
Vcs-Git: https://anonscm.debian.org/git/debian-med/gatb-core.git
Standards-Version: 4.1.4
Vcs-Browser: https://salsa.debian.org/med-team/gatb-core
Vcs-Git: https://salsa.debian.org/med-team/gatb-core.git
Homepage: https://github.com/GATB/gatb-core
Package: gatb-core
......@@ -60,7 +60,7 @@ Architecture: any
Section: libdevel
Depends: ${shlibs:Depends},
${misc:Depends},
libgatbcore0 (= ${binary:Version}),
libgatbcore0 (= ${binary:Version})
Description: development library of the Genome Analysis Toolbox
The GATB-CORE project provides a set of highly efficient
algorithms to analyse NGS data sets. These methods enable
......
......@@ -9,7 +9,7 @@ cmake_minimum_required (VERSION 3.1.0)
# The default version number is the latest official build
SET (gatb-core_VERSION_MAJOR 1)
SET (gatb-core_VERSION_MINOR 4)
SET (gatb-core_VERSION_PATCH 0)
SET (gatb-core_VERSION_PATCH 1)
# But, it is possible to define another release number during a local build
IF (DEFINED MAJOR)
......
--------------------------------------------------------------------------------
# RELEASE 1.4.1
* This is a bugfix release
* fixed a segfault in some multi-threaded situations
* removed some spurious large files in the distrib
* fixed a bug with the -storate-type file option
--------------------------------------------------------------------------------
# RELEASE 1.4.0
......
......@@ -791,7 +791,10 @@ void bglue(Storage *storage,
const typename ModelCanon::Kmer kmmerBegin = modelCanon.codeSeed(kmerBegin.c_str(), Data::ASCII);
const typename ModelCanon::Kmer kmmerEnd = modelCanon.codeSeed(kmerEnd.c_str(), Data::ASCII);
ufkmers.union_(uf_mphf.lookup(hasher(kmmerBegin)), uf_mphf.lookup(hasher(kmmerEnd)));
uint32_t v1 = uf_mphf.lookup(hasher(kmmerBegin));
uint32_t v2 = uf_mphf.lookup(hasher(kmmerEnd));
ufkmers.union_(v1,v2);
//ufkmers.union_((hasher(kmmerBegin)), (hasher(kmmerEnd)));
#if 0
......@@ -818,7 +821,6 @@ void bglue(Storage *storage,
};
//setDispatcher (new SerialDispatcher()); // force single thread
Dispatcher dispatcher (nb_threads);
dispatcher.iterate (in->iterator(), createUF);
......@@ -836,6 +838,7 @@ void bglue(Storage *storage,
if (debug_uf_stats) // for debugging
{
ufkmers.printStats("uf kmers");
//ufkmers.dumpUF("uf.dump");
logging("after computing UF stats");
}
......@@ -929,6 +932,7 @@ void bglue(Storage *storage,
logging( "Allowed " + to_string((max_buffer * nbGluePartitions) /1024 /1024) + " MB memory for buffers");
// partition the glue into many files, à la dsk
auto partitionGlue = [k, &modelCanon /* crashes if copied!*/, \
&get_UFclass, &gluePartitions,
......@@ -1010,10 +1014,11 @@ void bglue(Storage *storage,
// glue all partitions using a thread pool
ThreadPool pool(nb_threads);
std::mutex mtx; // lock to avoid a nasty bug when calling output()
for (int partition = 0; partition < nbGluePartitions; partition++)
{
auto glue_partition = [&modelCanon, &ufkmers, partition, &gluePartition_prefix, nbGluePartitions, &copy_nb_seqs_in_partition,
&get_UFclass, &out, &outLock, &out_id, kmerSize]( int thread_id)
&get_UFclass, &out, &outLock, &out_id, kmerSize, &mtx]( int thread_id)
{
int k = kmerSize;
......@@ -1101,8 +1106,13 @@ void bglue(Storage *storage,
float mean_abundance = get_mean_abundance(abs);
uint32_t sum_abundances = get_sum_abundance(abs);
{
// for some reason i do need that lock_guard here.. even though output is itself lock guarded. maybe some lazyness in the evauation of the to_string(out_id++)? who kon
// anyway this fixes the problem, i'll understand it some other time.
std::lock_guard<std::mutex> lock(mtx);
output(seq, out, std::to_string(out_id++) + " LN:i:" + to_string(seq.size()) + " KC:i:" + to_string(sum_abundances) + " km:f:" + to_string_with_precision(mean_abundance));
}
}
free_memory_vector(ordered_sequences_idxs);
......
......@@ -126,7 +126,7 @@ template<size_t span>
void graph3<span>::compaction(uint iL, uint iR,typename graph3<span>::kmerType kmmer){
if(iR!=iL){
typename graph3<span>::kmerType RC=rcb(kmmer);
uint s1(unitigs[iL].size()),s2(unitigs[iR].size());
//uint s1(unitigs[iL].size()),s2(unitigs[iR].size());
bool b1(isNumber(unitigs[iL][0])),b2(isNumber(unitigs[iR][0]));
if(b1 and b2){return compaction(stoi(unitigs[iL]),stoi(unitigs[iR]),kmmer);}
if(b1){return compaction(stoi(unitigs[iL]),iR,kmmer);}
......
......@@ -5,6 +5,7 @@
#include <set>
#include <atomic>
#include <iostream>
#include <fstream>
#include <unordered_map>
/**
......@@ -130,6 +131,15 @@ public:
std::cout << "raw space of UF hash data: " << ( 2*getNumKeys * sizeof(T) ) /1024/1024 << " MB" << std::endl; // 2x because each key of type T is associated to a value of type T
}
// debug function
void dumpUF(std::string file)
{
std::ofstream dumpfile;
dumpfile.open (file);
for (uint32_t i=0; i<size(); ++i)
dumpfile << i << " " << mData[i] << std::endl;
dumpfile.close();
}
mutable std::vector<std::atomic<uint64_t>> mData;
......
......@@ -309,7 +309,6 @@ void build_visitor_solid<Node,Edge,GraphDataVariant>::operator() (GraphData<span
DEBUG ((cout << "builGraph for bank '" << bank->getId() << "'"
<< " kmerSize=" << kmerSize
<< " nksMin=" << nksMin
<< " output='" << output << "'"
<< endl
));
......
......@@ -1277,6 +1277,7 @@ public:
const std::string& kmerSizeStr
)
{
//std::cout << "custom createbloom, name=" << name << " size=" << sizeStr << " nbHash=" << nbHashStr << " k=" << kmerSizeStr << std::endl;
tools::misc::BloomKind kind; parse (name, kind);
return createBloom<T> (kind, (u_int64_t)atol (sizeStr.c_str()), (size_t)atol (nbHashStr.c_str()), atol (kmerSizeStr.c_str()));
}
......
......@@ -113,7 +113,9 @@ public:
void flush () { _bag->flush(); }
/** \copydoc Collection::addProperty */
void addProperty (const std::string& key, const std::string value) {}
void addProperty (const std::string& key, const std::string value) {
std::cout << "warning: collectionAbstract.addProperty() called without an implementation (notify a developer)" << std::endl;
}
/** \copydoc Collection::addProperty */
void addProperty (const std::string& key, const char* format ...)
......
......@@ -41,6 +41,7 @@ namespace gatb { namespace core { namespace tools { namespace misc { namespac
*********************************************************************/
TimeInfo::TimeInfo () : _time(system::impl::System::time())
{
_synchro = System::thread().newSynchronizer();
}
/*********************************************************************
......@@ -53,6 +54,7 @@ TimeInfo::TimeInfo () : _time(system::impl::System::time())
*********************************************************************/
TimeInfo::TimeInfo (system::ITime& aTime) : _time(aTime)
{
_synchro = System::thread().newSynchronizer();
}
/*********************************************************************
......@@ -68,6 +70,16 @@ void TimeInfo::start (const char* name)
_entriesT0 [name] = _time.getTimeStamp();
}
//destructor
TimeInfo::~TimeInfo()
{
if(_synchro)
{
delete _synchro;
_synchro = 0;
}
}
/*********************************************************************
** METHOD :
** PURPOSE :
......@@ -88,9 +100,12 @@ void TimeInfo::stop (const char* name)
** OUTPUT :
** RETURN :
** REMARKS :
GR: this must be thread safe ! because multiple threads may call this on the same TimeInfo object at the same time !
*********************************************************************/
TimeInfo& TimeInfo::operator+= (TimeInfo& ti)
{
_synchro->lock();
const std::map <std::string, u_int32_t>& entries = ti.getEntries();
for (map <string, u_int32_t>::const_iterator it = entries.begin(); it != ti.getEntries().end(); ++it)
......@@ -98,6 +113,7 @@ TimeInfo& TimeInfo::operator+= (TimeInfo& ti)
_entries[it->first] += it->second;
}
_synchro->unlock();
return *this;
}
......@@ -108,14 +124,18 @@ TimeInfo& TimeInfo::operator+= (TimeInfo& ti)
** OUTPUT :
** RETURN :
** REMARKS :
GR: this must be thread safe ! because multiple threads may call this on the same TimeInfo object at the same time !
*********************************************************************/
TimeInfo& TimeInfo::operator/= (size_t nb)
{
_synchro->lock();
for (map <string, u_int32_t>::const_iterator it = _entries.begin(); it != _entries.end(); ++it)
{
_entries[it->first] = (u_int32_t) ((float)it->second / (float)nb);
}
_synchro->unlock();
return *this;
}
......
......@@ -30,6 +30,7 @@
#include <gatb/tools/misc/api/IProperty.hpp>
#include <gatb/system/api/ITime.hpp>
#include <gatb/system/impl/System.hpp>
#include <map>
......@@ -75,6 +76,8 @@ public:
/** Default constructor. */
TimeInfo ();
~TimeInfo();
/** Constructor taking a time factory.
* \param[in] aTime : the time factory to be used.
*/
......@@ -126,6 +129,7 @@ private:
system::ITime& _time;
std::map <std::string, u_int32_t> _entriesT0;
std::map <std::string, u_int32_t> _entries;
gatb::core::system::ISynchronizer* _synchro;
};
/********************************************************************************/
......
......@@ -35,9 +35,11 @@
#include <gatb/tools/collections/impl/IteratorFile.hpp>
#include <gatb/tools/collections/impl/CollectionAbstract.hpp>
#include <gatb/system/impl/System.hpp>
#include <json/json.hpp>
#include <string>
#include <vector>
#include <fstream>
/********************************************************************************/
namespace gatb {
......@@ -63,18 +65,76 @@ public:
/* Note (Rayan): this isn't very clean. Two files objectss are opened, one by BagFile (in write mode) and one in IterableFile (in read mode).
* With Clang/OSX, turns out the IterableFile was created before BagFile, causing some troubles.
* Also this is opening the file twice, not nice. Anyway until I think of a better system, it's kept as it is, and IterableFile does a small hack*/
), _name(filename)
), _name(filename), _propertiesName(filename+".props")
{}
/** Destructor. */
virtual ~CollectionFile() {}
/** \copydoc tools::collections::Collection::remove */
void remove () { gatb::core::system::impl::System::file().remove (_name); }
void remove () {
gatb::core::system::impl::System::file().remove (_name);
gatb::core::system::impl::System::file().remove (_propertiesName);
}
/* R: some code duplication with GroupFile, but it's the same in the HDF5 case. not the best design.
* so, collections can hold properties, so can groups.. */
/** \copydoc tools::collections::Collection::addProperty */
void addProperty (const std::string& key, const std::string value)
{
//std::cout << "CollectionFile addProperty called, key=" << key << " value=" << value<< std::endl;
std::ifstream myfile (_propertiesName);
std::string data, line;
if (myfile.is_open())
{
while ( getline (myfile,line) )
data += line;
myfile.close();
}
json::JSON j;
if (data.size() > 0)
j = json::LoadJson(data);
// otherwise json is empty and we create it
j[key] = value;
std::string s = j.dump();
std::ofstream myfile2;
myfile2.open (_propertiesName);
myfile2 << s;
myfile2.close();
}
/** \copydoc tools::collections::Collection::getProperty */
std::string getProperty (const std::string& key)
{
//std::cout << "CollectionFile getProperty called, key=" << key << std::endl;
std::string result;
std::ifstream myfile (_propertiesName);
std::string data, line;
if (myfile.is_open())
{
while ( getline (myfile,line) )
data += line;
myfile.close();
}
json::JSON j;
if (data.size() > 0)
{
j = json::LoadJson(data);
result = j[key].ToString();
}
return result;
}
private:
std::string _name;
std::string _propertiesName;
};
/********************************************************************************/
......
......@@ -63,7 +63,7 @@ namespace impl {
int ok = system::impl::System::file().mkdir(folder, 0755);
if(ok != 0){
std::cout << "Error: can't create output directory (" << folder<< ")\n" << " debug, doesexist:" << system::impl::System::file().doesExistDirectory(folder);
std::cout << "created directory " << folder << std::endl;
std::cout << "created directory " << folder << std::endl; // doesn't seem to be ever printed
}
}
/** We may need to create the HDF5 group. Empty name means root group, which is constructed by default. */
......@@ -221,7 +221,7 @@ public:
std::string filename = file_folder + parent->getFullId('.') + std::string(".") + name;
std::string folder = system::impl::System::file().getDirectory(filename);
std::string prefix = system::impl::System::file().getBaseName(filename);
std::string prefix = system::impl::System::file().getBaseName(filename) + std::string(".") + name; // because gatb's getBaseName is stupid and cuts after the last dot
if (nb == 0)
{ // if nb is 0, it means we're opening partitions and not creating them, thus we need to get the number of partitions.
......
......@@ -118,6 +118,7 @@ public:
bloomCollection->addProperty ("nb_hash", ss2.str());
bloomCollection->addProperty ("type", bloom->getName());
bloomCollection->addProperty ("kmer_size", ss3.str());
bloomCollection->flush (); // R: wasn't there before but I guess this can't hurt
}
/** Load a Bloom filter from a group
......
......@@ -62,14 +62,15 @@ class TestLeon : public Test
CPPUNIT_TEST_SUITE_GATB (TestLeon);
CPPUNIT_TEST_GATB(bank_checkLeon1);
/*
CPPUNIT_TEST_GATB(bank_checkLeon2);
CPPUNIT_TEST_GATB(bank_checkLeon3);
CPPUNIT_TEST_GATB(bank_checkLeon4);
CPPUNIT_TEST_GATB(bank_checkLeon5);
CPPUNIT_TEST_GATB(bank_checkLeon6);
CPPUNIT_TEST_GATB(bank_checkLeon7);
CPPUNIT_TEST_GATB(bank_checkLeon8);*/
//removed some large files from distrib
// CPPUNIT_TEST_GATB(bank_checkLeon7);
// CPPUNIT_TEST_GATB(bank_checkLeon8);
CPPUNIT_TEST_SUITE_GATB_END();
......@@ -155,10 +156,12 @@ public:
// STEP 2: compare reference and compressed version
// we open the files in read mode
IBank* fasBank = Bank::open (fastqFile); //BankFasta
IBank* leonBank = Bank::open (leonFile); //BankLeon
bank_compare_banks_equality(fasBank, leonBank);
}
/**
......