Skip to content
Commits on Source (4)
biobambam2 (2.0.147-1) unstable; urgency=medium
* fix heap comparisons in bamconsensus
-- German Tischler-Höhle <germant@miltenyibiotec.de> Tue, 05 Nov 2019 10:19:07 +0100
biobambam2 (2.0.146-1) unstable; urgency=medium
* add explicit flush in bamtofastq
* bump libmaus2 version
* explicitely call flush on BamToFastqOutputFileSet objects in bamtofastq (try to avoid triggering exceptions in object destructor)
* allow disabling adapter detection/clipping in fastqtobam2
* use dynamic scheduling in bamfilterk
-- German Tischler-Höhle <germant@miltenyibiotec.de> Wed, 30 Oct 2019 10:48:48 +0100
biobambam2 (2.0.145-1) unstable; urgency=medium
* refactor vcfsort
* add kmer end position on reference in bamconsensus
* fixes after libmaus2 changes
-- German Tischler-Höhle <germant@miltenyibiotec.de> Fri, 11 Oct 2019 12:46:33 +0200
biobambam2 (2.0.144-1) unstable; urgency=medium
* update list of uncommon programs
-- German Tischler-Höhle <germant@miltenyibiotec.de> Fri, 27 Sep 2019 13:37:44 +0200
biobambam2 (2.0.143-1) unstable; urgency=medium
* fix newline issue in Makefile
-- German Tischler-Höhle <germant@miltenyibiotec.de> Fri, 27 Sep 2019 13:11:19 +0200
biobambam2 (2.0.142-1) unstable; urgency=medium
* add install-uncommon configure option to deselect some programs from standard installation
-- German Tischler-Höhle <germant@miltenyibiotec.de> Fri, 27 Sep 2019 12:58:23 +0200
biobambam2 (2.0.141-1) unstable; urgency=medium
* bump libmaus2 version
-- German Tischler-Höhle <germant@miltenyibiotec.de> Fri, 27 Sep 2019 11:24:14 +0200
biobambam2 (2.0.140-1) unstable; urgency=medium
* update libmaus2 version
-- German Tischler-Höhle <germant@miltenyibiotec.de> Fri, 27 Sep 2019 08:57:03 +0200
biobambam2 (2.0.139-1) unstable; urgency=medium
* add bamheap3 program
-- German Tischler-Höhle <germant@miltenyibiotec.de> Thu, 19 Sep 2019 14:09:13 +0200
biobambam2 (2.0.138-1) unstable; urgency=medium
* add exportcdna option in bamfeaturecount
-- German Tischler-Höhle <germant@miltenyibiotec.de> Wed, 28 Aug 2019 15:50:12 +0200
biobambam2 (2.0.137-1) unstable; urgency=medium
* add manual page for bamfeaturecount
......
AC_INIT(biobambam2,2.0.137,[tischler@mpi-cbg.de],[biobambam2],[http://www.sanger.ac.uk])
AC_INIT(biobambam2,2.0.147,[germant@miltenyibiotec.de],[biobambam2],[https://gitlab.com/german.tischler/biobambam2])
AC_CANONICAL_SYSTEM
AC_PROG_LIBTOOL
......@@ -165,9 +165,7 @@ if test ! -z "${with_libmaus2}" ; then
fi
fi
PKG_CHECK_MODULES([libmaus2],[libmaus2 >= 2.0.663])
PKG_CHECK_MODULES([libmaus2digests],[libmaus2digests >= 2.0.663])
PKG_CHECK_MODULES([libmaus2seqchksumsfactory],[libmaus2seqchksumsfactory >= 2.0.663])
PKG_CHECK_MODULES([libmaus2],[libmaus2 >= 2.0.683])
if test ! -z "${with_libmaus2}" ; then
if test ! -z "${PKGCONFIGPATHSAVE}" ; then
......@@ -175,8 +173,8 @@ if test ! -z "${with_libmaus2}" ; then
fi
fi
LIBMAUS2CPPFLAGS="${libmaus2_CFLAGS} ${libmaus2seqchksumsfactory_CFLAGS} ${libmaus2digests_CFLAGS}"
LIBMAUS2LIBS="${libmaus2_LIBS} ${libmaus2seqchksumsfactory_LIBS} ${libmaus2digests_LIBS}"
LIBMAUS2CPPFLAGS="${libmaus2_CFLAGS}"
LIBMAUS2LIBS="${libmaus2_LIBS}"
CPPFLAGS_SAVE="${CPPFLAGS}"
LDFLAGS_SAVE="${LDFLAGS}"
......@@ -302,7 +300,7 @@ if test "${have_libmaus2_irods}" = "yes" ; then
fi
fi
PKG_CHECK_MODULES([libmaus2irods],[libmaus2irods >= 2.0.663])
PKG_CHECK_MODULES([libmaus2irods],[libmaus2irods >= 2.0.683])
LIBMAUS2IRODSCPPFLAGS="${libmaus2irods_CFLAGS}"
LIBMAUS2IRODSLIBS="${libmaus2irods_LIBS}"
......@@ -461,6 +459,19 @@ else
BLASTXMLTOBAMNOINSTEXP=${BLASTNXMLTOBAM}
fi
AC_ARG_ENABLE(install_uncommon,
AS_HELP_STRING([--enable-install-uncommon],[enable installation of some uncommon programs (default no)]),
[install_uncommon=${enableval}],[install_uncommon=no])
UNCOMMON="bamfilter bamfixmatecoordinates bamfixmatecoordinatesnamesorted bamtoname bamdisthist fastabgzfextract bamheap bamfrontback bamrandomtag bamheap2 bamheap3 bamtagconversion fastqtobampar bambisect vcffilterinfo vcfpatchcontigprepend vcfconcat vcfsort filtergtf bamconsensus"
UNCOMMONINSTALLED=
UNCOMMONUNINSTALLED=
if test "${install_uncommon}" = "yes" ; then
UNCOMMONINSTALLED="${UNCOMMON}"
else
UNCOMMONUNINSTALLED="${UNCOMMON}"
fi
AC_MSG_NOTICE([Using flags ${CFLAGS} for C compiler ${CC}])
AC_MSG_NOTICE([Using flags ${CXXFLAGS} for C++ compiler ${CXX}])
......@@ -497,4 +508,8 @@ AC_SUBST([GMPLIBS])
AC_SUBST([HAVE_PTHREAD_MUTEX_RECURSIVE_NP])
AC_SUBST([HAVE_PTHREAD_MUTEX_RECURSIVE])
#
AC_SUBST([UNCOMMON])
AC_SUBST([UNCOMMONINSTALLED])
AC_SUBST([UNCOMMONUNINSTALLED])
#
AC_OUTPUT(Makefile src/Makefile test/Makefile src/biobambam2/BamBamConfig.hpp)
biobambam2 (2.0.137+dfsg-2) UNRELEASED; urgency=medium
biobambam2 (2.0.147+dfsg-1) unstable; urgency=medium
* Just a reminder to close that bug also in the changelog.
* Team upload.
* New upstream version
* Standards-Version: 4.4.1
* Upstream renamed conflicting binary (Closes: #933937)
-- Steffen Moeller <moeller@debian.org> Wed, 28 Aug 2019 14:49:00 +0200
-- Steffen Moeller <moeller@debian.org> Wed, 06 Nov 2019 19:16:45 +0100
biobambam2 (2.0.137+dfsg-1) unstable; urgency=medium
......
......@@ -5,8 +5,8 @@ Section: science
Priority: optional
Build-Depends: debhelper-compat (= 12),
pkg-config,
libmaus2-dev (>= 2.0.663)
Standards-Version: 4.4.0
libmaus2-dev (>= 2.0.284)
Standards-Version: 4.4.1
Vcs-Browser: https://salsa.debian.org/med-team/biobambam2
Vcs-Git: https://salsa.debian.org/med-team/biobambam2.git
Homepage: https://gitlab.com/german.tischler/biobambam2
......
......@@ -39,7 +39,12 @@ man_MANS = ${MANPAGES}
EXTRA_DIST = ${MANPAGES}
bin_PROGRAMS = bamtofastq bammarkduplicates bamsort bammaskflags bamrecompress \
bin_PROGRAMS = \
bamtofastq \
bammarkduplicates \
bamsort \
bammaskflags \
bamrecompress \
bamadapterfind \
bamfilteraux \
bamauxsort \
......@@ -77,7 +82,6 @@ bin_PROGRAMS = bamtofastq bammarkduplicates bamsort bammaskflags bamrecompress \
bamintervalcomment \
bamintervalcommenthist \
bamstreamingmarkduplicates \
bamheap2 \
bamalignfrac \
bamfilternames \
bamsormadup \
......@@ -85,9 +89,7 @@ bin_PROGRAMS = bamtofastq bammarkduplicates bamsort bammaskflags bamrecompress \
bamexploderef \
bamfastexploderef \
bamfastnumextract \
fastqtobampar \
bamranksort \
bamtagconversion \
fastaexplod \
bamrecalculatecigar \
bamfiltermc \
......@@ -106,28 +108,37 @@ bin_PROGRAMS = bamtofastq bammarkduplicates bamsort bammaskflags bamrecompress \
bamreheader \
bamreplacechecksums \
bamfastcat \
vcffilterinfo \
vcfpatchcontigprepend \
populaterefcache \
bambisect \
vcfconcat \
vcfsort \
bamfiltereofblocks \
bamdepth \
bamdepthintersect \
bamfilterk \
bamconsensus \
bamfixpairinfo \
filtergtf \
bamfeaturecount \
@BLASTXMLTOBAMINSTEXP@
noinst_PROGRAMS = bamfilter bamfixmatecoordinates bamfixmatecoordinatesnamesorted bamtoname \
bamdisthist fastabgzfextract @BAMREFDEPTHPEAKS@ \
bamheap bamfrontback \
@BLASTXMLTOBAMNOINSTEXP@ bamrandomtag
EXTRA_PROGRAMS = blastnxmltobam
bamfeaturecount @BLASTXMLTOBAMINSTEXP@ @UNCOMMONINSTALLED@
noinst_PROGRAMS = @BAMREFDEPTHPEAKS@ @BLASTXMLTOBAMNOINSTEXP@ @UNCOMMONUNINSTALLED@
EXTRA_PROGRAMS = blastnxmltobam \
bamfilter \
bamfixmatecoordinates \
bamfixmatecoordinatesnamesorted \
bamtoname \
bamdisthist \
fastabgzfextract \
bamheap \
bamfrontback \
bamrandomtag \
bamheap2 \
bamheap3 \
bamtagconversion \
fastqtobampar \
bambisect \
bamconsensus \
vcffilterinfo \
vcfpatchcontigprepend \
vcfconcat \
vcfsort \
filtergtf
populaterefcache_SOURCES = programs/populaterefcache.cpp biobambam2/Licensing.cpp
populaterefcache_LDADD = ${LIBMAUS2LIBS}
......@@ -459,6 +470,11 @@ bamheap2_LDADD = ${LIBMAUS2LIBS}
bamheap2_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} ${AM_LDFLAGS}
bamheap2_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS}
bamheap3_SOURCES = programs/bamheap3.cpp biobambam2/Licensing.cpp
bamheap3_LDADD = ${LIBMAUS2LIBS}
bamheap3_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} ${AM_LDFLAGS}
bamheap3_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS}
bamalignfrac_SOURCES = programs/bamalignfrac.cpp biobambam2/Licensing.cpp
bamalignfrac_LDADD = ${LIBMAUS2LIBS}
bamalignfrac_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} ${AM_LDFLAGS}
......
This diff is collapsed.
......@@ -74,6 +74,44 @@ given in column 6 of the transcript output).
maximum fraction of bases allowed to be uncovered in the unique region of a transcript so the
transcript will be reported (i.e. minimum value allowed for the first number
given in column 4 of the transcript output).
.PP
.B exclude=<SECONDARY>:
Do not include reads in the output that have any of the given flags set. The
flags are given separated by commas. Valid flags are:
.IP PAIRED:
read was paired in sequencing
.IP PROPER_PAIR:
read has been mapped as part of a proper pair
.IP UNMAP:
read was not mapped
.IP MUNMAP:
mate of read was not mapped
.IP REVERSE:
read was mapped to the reverse strand
.IP MREVERSE:
mate of read was mapped to the reverse strand
.IP READ1:
read was first read of a pair during sequencing
.IP READ2:
read was second read of a pair during sequencing
.IP SECONDARY:
alignment is secondary, i.e. an alternative mapping to the primary alignment in the same file
.IP QCFAIL:
read as marked as having failed quality control
.IP DUP:
read is marked as a duplicate of another read in the same file (see bammarkduplicates)
.IP SUPPLEMENTARY:
read is marked as supplementary alignment
.PP
.B exportcdna=<0>:
instead of feature counting generate a FastA file containing the CDNA as
designated by the GTF annotation file. The second parameter (BAM file) needs
to be specified, but will not be read. This option requires a reference
FastA file suitable for the GTF file to be provided via the reference
key.
.PP
.B reference=<>:
name of a reference FastA file. This is required for the exportcdna option.
.SH AUTHOR
Written by German Tischler-Höhle.
.SH "REPORTING BUGS"
......
This diff is collapsed.
......@@ -528,7 +528,7 @@ int bamfilterk(libmaus2::util::ArgInfo const & arginfo)
Filter const filter(KH,AKB,AK,KB,k,thresval);
#if defined(_OPENMP)
#pragma omp parallel for num_threads(numthreads)
#pragma omp parallel for num_threads(numthreads) schedule(dynamic,1)
#endif
for ( uint64_t i = 0; i < numpacks; ++i )
{
......
This diff is collapsed.
......@@ -27,14 +27,6 @@
#include <libmaus2/digest/Digests.hpp>
#if defined(LIBMAUS2_HAVE_SHA2_ASSEMBLY)
#include <libmaus2/digest/DigestFactory_SHA2_ASM.hpp>
#endif
#if defined(LIBMAUS2_HAVE_SMMINTRIN_H) && defined(HAVE_SSE4)
#include <libmaus2/digest/DigestFactory_CRC32C_SSE42.hpp>
#endif
#include <libmaus2/digest/DigestFactoryContainer.hpp>
#include <libmaus2/parallel/NumCpus.hpp>
......@@ -416,13 +408,6 @@ int main(int argc, char * argv[])
{
try
{
#if defined(LIBMAUS2_HAVE_SHA2_ASSEMBLY)
libmaus2::digest::DigestFactoryContainer::addFactories(libmaus2::digest::DigestFactory_SHA2_ASM());
#endif
#if defined(LIBMAUS2_HAVE_SMMINTRIN_H) && defined(HAVE_SSE4)
libmaus2::digest::DigestFactoryContainer::addFactories(libmaus2::digest::DigestFactory_CRC32C_SSE42());
#endif
::libmaus2::util::ArgInfo const arginfo(argc,argv);
for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i )
......
......@@ -760,6 +760,7 @@ void bamtofastqCollating(
{
for ( uint64_t i = 0; i < numoutputfiles; ++i )
{
AOS[i]->flush();
ALSPFDOS[i].reset();
ALSGZOS[i].reset();
}
......@@ -783,6 +784,8 @@ void bamtofastqCollating(
{
libmaus2::bambam::BamToFastqOutputFileSet OFS(arginfo);
try
{
while ( (ob = CHCBD.process()) )
{
uint64_t const precnt = cnt;
......@@ -927,6 +930,14 @@ void bamtofastqCollating(
<< "\t" << static_cast<double>(cnt)/rtc.getElapsedSeconds() << std::endl;
}
}
OFS.flush();
}
catch(...)
{
OFS.flush();
throw;
}
}
std::cerr << "[V] " << cnt << std::endl;
......@@ -1027,6 +1038,8 @@ void bamtofastqCollatingRanking(
libmaus2::bambam::BamToFastqOutputFileSet OFS(arginfo);
try
{
libmaus2::bambam::CircularHashCollatingBamDecoder::OutputBufferEntry const * ob = 0;
// number of alignments written to files
......@@ -1106,6 +1119,14 @@ void bamtofastqCollatingRanking(
if ( arginfo.getValue<unsigned int>("combs",0) )
std::cerr << combs;
OFS.flush();
}
catch(...)
{
OFS.flush();
throw;
}
}
void bamtofastqCollatingRanking(libmaus2::util::ArgInfo const & arginfo)
......
......@@ -1851,6 +1851,7 @@ struct BlockProcessBamEncodePackageDispatcher : public libmaus2::parallel::Simpl
AdapterMatchParameters const & AMP;
NumberEncodeTable const NET;
bool const zz;
bool const adaptercheck;
BlockProcessBamEncodePackageDispatcher(
libmaus2::parallel::LockedGrowingFreeList<
......@@ -1862,10 +1863,11 @@ struct BlockProcessBamEncodePackageDispatcher : public libmaus2::parallel::Simpl
libmaus2::bambam::AdapterFilter const & rAF,
AdapterMatchParameters const & rAMP,
bool const rzz,
bool const radaptercheck,
BlockReadStreamInfoVector * rBRSIV,
BlockProcessBamEncodePackageReturnEvent & rBPBEPRE,
BlockProcessBamEncodePackageFinishedEvent & rBPBEPFE
) : BRSIV(rBRSIV), BPBEPRE(rBPBEPRE), BPBEPFE(rBPBEPFE), rginfo(rrginfo), rgid(rginfo.ID), checksumsFreeList(rchecksumsFreeList), AF(rAF), AMP(rAMP), NET(), zz(rzz)
) : BRSIV(rBRSIV), BPBEPRE(rBPBEPRE), BPBEPFE(rBPBEPFE), rginfo(rrginfo), rgid(rginfo.ID), checksumsFreeList(rchecksumsFreeList), AF(rAF), AMP(rAMP), NET(), zz(rzz), adaptercheck(radaptercheck)
{
}
......@@ -1999,7 +2001,7 @@ struct BlockProcessBamEncodePackageDispatcher : public libmaus2::parallel::Simpl
);
}
bool const foundadapter = AF.searchAdapters(
bool const foundadapter = adaptercheck && AF.searchAdapters(
reinterpret_cast<uint8_t const *>(ptr->c + FQP.basestart),
FQP.baseend-FQP.basestart,
2 /* maximum number of mismatches */,
......@@ -2172,6 +2174,8 @@ struct BamStreamMergePackageDispatcher : public libmaus2::parallel::SimpleThread
uint64_t const mmask;
libmaus2::bambam::BamSeqEncodeTable const seqenc;
bool const adaptercheck;
static uint64_t getMMask()
{
static uint64_t const mmask =
......@@ -2205,8 +2209,9 @@ struct BamStreamMergePackageDispatcher : public libmaus2::parallel::SimpleThread
BlockReadStreamInfoVector * rBRSIV,
BamStreamMergePackageReturnEvent & rBSMPR,
BamStreamMergePackageFinishedEvent & rBSMPF,
BamStreamMergePackageStatsUpdate & rBSMPSU
) : BRSIV(rBRSIV), BSMPR(rBSMPR), BSMPF(rBSMPF), BSMPSU(rBSMPSU), AOP(rAOP), S(256,false), R(256,false), mmask(getMMask())
BamStreamMergePackageStatsUpdate & rBSMPSU,
bool const radaptercheck
) : BRSIV(rBRSIV), BSMPR(rBSMPR), BSMPF(rBSMPF), BSMPSU(rBSMPSU), AOP(rAOP), S(256,false), R(256,false), mmask(getMMask()), adaptercheck(radaptercheck)
{
std::fill(S.begin(),S.end(),4);
S['a'] = S['A'] = 0;
......@@ -2274,9 +2279,11 @@ struct BamStreamMergePackageDispatcher : public libmaus2::parallel::SimpleThread
int64_t a30 = std::numeric_limits<int64_t>::min();
int64_t a31 = std::numeric_limits<int64_t>::min();
if ( ito - ifrom == 2 )
if ( adaptercheck && (ito - ifrom == 2) )
{
// get read1 data pointers
std::pair<uint8_t const *,uint8_t const *> const UP0(merge[ifrom+0].first,merge[ifrom+0].second);
// get read2 data pointers
std::pair<uint8_t const *,uint8_t const *> const UP1(merge[ifrom+1].first,merge[ifrom+1].second);
uint64_t const l0 = libmaus2::bambam::BamAlignmentDecoderBase::getLseq(UP0.first);
......@@ -2903,6 +2910,7 @@ struct BlockReadControl :
libmaus2::bambam::AdapterFilter const & rAF,
AdapterMatchParameters const & rAMP,
bool const rzz,
bool const radaptercheck,
AdapterOverlapParams const & rAOP,
uint64_t const newlineputback,
uint64_t const rblocksize,
......@@ -2939,8 +2947,8 @@ struct BlockReadControl :
BPNCP(&BRSIV,*this,*this),
BPNSP(&BRSIV,*this,*this),
BPLPP(&BRSIV,*this,*this),
BPBEP(checksumsFreeList,rginfo,rAF,rAMP,rzz,&BRSIV,*this,*this),
BSMP(rAOP,&BRSIV,*this,*this,*this),
BPBEP(checksumsFreeList,rginfo,rAF,rAMP,rzz,radaptercheck,&BRSIV,*this,*this),
BSMP(rAOP,&BRSIV,*this,*this,*this,radaptercheck),
BBCP(deflateFreeList,&BRSIV,*this,*this),
eofcnt(0),
eoflock(),
......@@ -3951,6 +3959,7 @@ int fastqtobam2(libmaus2::util::ArgInfo const & arginfo)
std::string const numerical = arginfo.getValue<std::string>("numerical",std::string());
bool const zz = arginfo.getValue<int>("zz",0);
bool const adaptercheck = arginfo.getValue<int>("adaptercheck",1);
uint64_t const adpmatchminscore = arginfo.getValue<uint64_t>("adpmatchminscore",getDefaultMatchMinScore());
double const adpmatchminfrac = arginfo.getValue<double>("adpmatchminfrac",getDefaultMatchMinFrac());
......@@ -4094,7 +4103,7 @@ int fastqtobam2(libmaus2::util::ArgInfo const & arginfo)
reinterpret_cast<uint8_t const *>(bgzfhead.c_str()),bgzfhead.size()
);
BlockReadControl BRC(out,STP,Vfn,level /* level */,rginfo,hash,&header,filedigest,numindexgen,AF,AMP,zz,AOP,16 /* newline putback */, 1024*1024,100);
BlockReadControl BRC(out,STP,Vfn,level /* level */,rginfo,hash,&header,filedigest,numindexgen,AF,AMP,zz,adaptercheck,AOP,16 /* newline putback */, 1024*1024,100);
while ( ! BRC.isEOF() && ! STP.isInPanicMode() )
{
......
......@@ -18,6 +18,8 @@
#include <config.h>
#include <libmaus2/vcf/VCFParser.hpp>
#include <libmaus2/vcf/VCFSortEntry.hpp>
#include <libmaus2/vcf/VCFSorter.hpp>
#include <libmaus2/util/ArgInfo.hpp>
#include <libmaus2/lz/BgzfDeflate.hpp>
......@@ -32,325 +34,13 @@
static int getDefaultGZ() { return 0; }
static int getDefaultDedup() { return 0; }
libmaus2::util::AutoArrayCharBaseAllocator AFLA(0);
libmaus2::util::GrowingFreeList< libmaus2::autoarray::AutoArray<char>, libmaus2::util::AutoArrayCharBaseAllocator, libmaus2::util::AutoArrayCharBaseTypeInfo > AFL(AFLA);
struct VCFEntry
{
public:
libmaus2::autoarray::AutoArray<char>::shared_ptr_type A;
uint64_t n;
uint64_t s_a;
uint64_t s_b;
uint64_t c_3_a;
uint64_t c_3_e;
uint64_t c_4_a;
uint64_t c_4_e;
private:
public:
VCFEntry() : A(AFL.get()), n(0), s_a(0), s_b(0), c_3_a(0), c_3_e(0), c_4_a(0), c_4_e(0) {}
~VCFEntry()
{
AFL.put(A);
}
VCFEntry(VCFEntry const & O) : A(AFL.get()), n(0), s_a(0), s_b(0), c_3_a(0), c_3_e(0), c_4_a(0), c_4_e(0)
{
*this = O;
}
void swap(VCFEntry & O)
{
std::swap(A ,O.A);
std::swap(n ,O.n);
std::swap(s_a ,O.s_a);
std::swap(s_b ,O.s_b);
std::swap(c_3_a,O.c_3_a);
std::swap(c_3_e,O.c_3_e);
std::swap(c_4_a,O.c_4_a);
std::swap(c_4_e,O.c_4_e);
}
std::ostream & print(std::ostream & out) const
{
out.write(A->begin(),n);
out.put('\n');
return out;
}
void set(char const * a, char const * e, uint64_t const r_s_a, uint64_t const r_s_b,
uint64_t r_c_3_a,
uint64_t r_c_3_e,
uint64_t r_c_4_a,
uint64_t r_c_4_e
)
{
n = e-a;
A->ensureSize(e-a);
std::copy(a,e,A->begin());
s_a = r_s_a;
s_b = r_s_b;
c_3_a = r_c_3_a;
c_3_e = r_c_3_e;
c_4_a = r_c_4_a;
c_4_e = r_c_4_e;
}
VCFEntry & operator=(VCFEntry const & O)
{
if ( this != &O )
{
A->ensureSize(O.A->size());
std::copy(
O.A->begin(),
O.A->begin() + O.n,
A->begin()
);
n = O.n;
s_a = O.s_a;
s_b = O.s_b;
c_3_a = O.c_3_a;
c_3_e = O.c_3_e;
c_4_a = O.c_4_a;
c_4_e = O.c_4_e;
}
return *this;
}
std::ostream & serialise(std::ostream & out) const
{
libmaus2::util::NumberSerialisation::serialiseNumber(out,n);
out.write(A->begin(),n);
libmaus2::util::NumberSerialisation::serialiseNumber(out,s_a);
libmaus2::util::NumberSerialisation::serialiseNumber(out,s_b);
libmaus2::util::NumberSerialisation::serialiseNumber(out,c_3_a);
libmaus2::util::NumberSerialisation::serialiseNumber(out,c_3_e);
libmaus2::util::NumberSerialisation::serialiseNumber(out,c_4_a);
libmaus2::util::NumberSerialisation::serialiseNumber(out,c_4_e);
return out;
}
std::istream & deserialise(std::istream & in)
{
n = libmaus2::util::NumberSerialisation::deserialiseNumber(in);
A->ensureSize(n);
in.read(A->begin(),n);
if ( !in || in.gcount() != static_cast<std::streamsize>(n) )
{
libmaus2::exception::LibMausException lme;
lme.getStream() << "[E] VCFEntry::deserialise: failed to read data" << std::endl;
lme.finish();
throw lme;
}
s_a = libmaus2::util::NumberSerialisation::deserialiseNumber(in);
s_b = libmaus2::util::NumberSerialisation::deserialiseNumber(in);
c_3_a = libmaus2::util::NumberSerialisation::deserialiseNumber(in);
c_3_e = libmaus2::util::NumberSerialisation::deserialiseNumber(in);
c_4_a = libmaus2::util::NumberSerialisation::deserialiseNumber(in);
c_4_e = libmaus2::util::NumberSerialisation::deserialiseNumber(in);
return in;
}
bool operator<(VCFEntry const & O) const
{
if ( s_a != O.s_a )
{
return s_a < O.s_a;
}
else if ( s_b != O.s_b )
{
return s_b < O.s_b;
}
else
{
uint64_t const l_3 = c_3_e-c_3_a;
uint64_t const O_l_3 = O.c_3_e-O.c_3_a;
int const c3 =
strncmp(
A->begin()+c_3_a,
O.A->begin()+O.c_3_a,
std::min(l_3,O_l_3)
);
if ( c3 != 0 )
return c3 < 0;
else if ( l_3 != O_l_3 )
return l_3 < O_l_3;
uint64_t const l_4 = c_4_e-c_4_a;
uint64_t const O_l_4 = O.c_4_e-O.c_4_a;
int const c4 =
strncmp(
A->begin()+c_4_a,
O.A->begin()+O.c_4_a,
std::min(l_4,O_l_4)
);
if ( c4 != 0 )
return c4 < 0;
else if ( l_4 != O_l_4 )
return l_4 < O_l_4;
return false;
}
}
};
static int64_t parseUInt(char const * a, char const * e)
{
uint64_t v = 0;
while ( a != e )
{
char const c = *(a++);
if ( c >= '0' && c <= '9' )
{
v *= 10;
v += c-'0';
}
else
{
return -1;
}
}
return v;
}
int vcfsort(libmaus2::util::ArgInfo const & arginfo, std::istream & in, std::ostream & out)
{
bool const gz = arginfo.getValue<int>("gz",getDefaultGZ());
int const dedup = arginfo.getValue<int>("dedup",getDefaultDedup());
std::string const tmpfilename = arginfo.getUnparsedValue("T",arginfo.getDefaultTmpFileName());
libmaus2::util::TempFileRemovalContainer::addTempFile(tmpfilename);
libmaus2::autoarray::AutoArray< char > contig_A;
libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > contig_O;
libmaus2::vcf::VCFParser vcf(in);
// uint64_t n_contig = vcf.getContigNames(contig_A,contig_O);
std::vector<std::string> const Vcontig = vcf.getContigNames();
#if 0
for ( uint64_t i = 0; i < n_contig; ++i )
{
std::string const name(contig_A.begin() + contig_O[i].first,contig_A.begin() + contig_O[i].second);
Vcontig.push_back(name);
}
#endif
::libmaus2::trie::Trie<char> trienofailure;
trienofailure.insertContainer(Vcontig);
::libmaus2::trie::LinearHashTrie<char,uint32_t>::unique_ptr_type LHTnofailure(trienofailure.toLinearHashTrie<uint32_t>());
libmaus2::util::TabEntry<> T;
VCFEntry VE;
std::pair<bool, char const *> P;
libmaus2::sorting::SerialisingSortingBufferedOutputFile<VCFEntry>::unique_ptr_type Psorter(
new libmaus2::sorting::SerialisingSortingBufferedOutputFile<VCFEntry>(tmpfilename,64*1024)
);
while ( (P=vcf.readEntry(T)).first )
{
uint64_t const s = T.size();
if ( s >= 5 )
{
char const * a = P.second;
char const * e = T.get(s-1,a).second;
std::pair<char const *,char const *> P0(T.get(0,a));
std::pair<char const *,char const *> P1(T.get(1,a));
std::pair<char const *,char const *> P3(T.get(3,a));
std::pair<char const *,char const *> P4(T.get(4,a));
int64_t const contid = LHTnofailure->searchCompleteNoFailure(P0.first,P0.second);
int64_t const pos = parseUInt(P1.first,P1.second);
if ( contid >= 0 && pos >= 0 )
{
VE.set(
a,e,contid,pos,
P3.first-a,
P3.second-a,
P4.first-a,
P4.second-a
);
Psorter->put(VE);
}
else
{
libmaus2::exception::LibMausException lme;
lme.getStream() << "[E] unable to parse " << std::string(a,e) << std::endl;
lme.finish();
throw lme;
}
}
}
libmaus2::lz::BgzfOutputStream::unique_ptr_type pBOS;
if ( gz )
{
libmaus2::lz::BgzfOutputStream::unique_ptr_type tBOS(new libmaus2::lz::BgzfOutputStream(out));
pBOS = UNIQUE_PTR_MOVE(tBOS);
}
std::ostream & vout = gz ? *pBOS : out;
VCFEntry VEprev;
bool VEprevvalid = false;
vcf.printText(vout);
libmaus2::sorting::SerialisingSortingBufferedOutputFile<VCFEntry>::merger_ptr_type Pmerger(Psorter->getMerger());
while ( Pmerger->getNext(VE) )
{
if (
(!dedup)
||
(!VEprevvalid)
||
(VEprev < VE)
)
{
VE.print(vout);
#if 0
if ( VEprevvalid && VE.s_a == VEprev.s_a && VE.s_b == VEprev.s_b )
{
std::cerr << std::string(80,'=') << std::endl;
VEprev.print(std::cerr);
VE.print(std::cerr);
}
#endif
}
VEprev.swap(VE);
VEprevvalid = true;
}
Pmerger.reset();
Psorter.reset();
if ( gz )
{
pBOS->flush();
pBOS->addEOFBlock();
pBOS.reset();
}
libmaus2::aio::FileRemoval::removeFile(tmpfilename);
libmaus2::vcf::VCFSorter::sort(in,out,gz,dedup,tmpfilename);
return EXIT_SUCCESS;
}
......