Skip to content
Commits on Source (4)
......@@ -65,3 +65,4 @@ src/vcfconcat
src/vcfsort
removespace.sh
src/filtergtf
src/vcfreplacecontigs
biobambam2 (2.0.152-1) unstable; urgency=medium
* update libmaus2 version to 2.0.694
-- German Tischler-Höhle <germant@miltenyibiotec.de> Wed, 22 Jan 2020 09:58:27 +0100
biobambam2 (2.0.151-1) unstable; urgency=medium
* bump libmaus2 version
-- German Tischler-Höhle <germant@miltenyibiotec.de> Tue, 21 Jan 2020 16:41:38 +0100
biobambam2 (2.0.150-1) unstable; urgency=medium
* Add cols (line wrapping) option in bamtofastq
-- German Tischler-Höhle <germant@miltenyibiotec.de> Tue, 07 Jan 2020 16:37:02 +0100
biobambam2 (2.0.149-1) unstable; urgency=medium
* use kmerCallbackPosFR instead of kmerCallbackPos in bamfilterk
* Add vcfreplacecontigs program
-- German Tischler-Höhle <germant@miltenyibiotec.de> Tue, 07 Jan 2020 09:02:29 +0100
biobambam2 (2.0.148-1) unstable; urgency=medium
* compute maximum depth in bamfeaturecount
* fix output of wrong reference id in bamfeaturecount if GTF chrom set is a subset of BAM chrom set
-- German Tischler-Höhle <germant@miltenyibiotec.de> Wed, 13 Nov 2019 12:53:00 +0100
biobambam2 (2.0.147-1) unstable; urgency=medium
* fix heap comparisons in bamconsensus
......
This diff is collapsed.
This diff is collapsed.
AC_INIT(biobambam2,2.0.147,[germant@miltenyibiotec.de],[biobambam2],[https://gitlab.com/german.tischler/biobambam2])
AC_INIT(biobambam2,2.0.152,[germant@miltenyibiotec.de],[biobambam2],[https://gitlab.com/german.tischler/biobambam2])
AC_CANONICAL_SYSTEM
AC_PROG_LIBTOOL
......@@ -165,7 +165,7 @@ if test ! -z "${with_libmaus2}" ; then
fi
fi
PKG_CHECK_MODULES([libmaus2],[libmaus2 >= 2.0.683])
PKG_CHECK_MODULES([libmaus2],[libmaus2 >= 2.0.694])
if test ! -z "${with_libmaus2}" ; then
if test ! -z "${PKGCONFIGPATHSAVE}" ; then
......@@ -300,7 +300,7 @@ if test "${have_libmaus2_irods}" = "yes" ; then
fi
fi
PKG_CHECK_MODULES([libmaus2irods],[libmaus2irods >= 2.0.683])
PKG_CHECK_MODULES([libmaus2irods],[libmaus2irods >= 2.0.694])
LIBMAUS2IRODSCPPFLAGS="${libmaus2irods_CFLAGS}"
LIBMAUS2IRODSLIBS="${libmaus2irods_LIBS}"
......@@ -463,7 +463,7 @@ AC_ARG_ENABLE(install_uncommon,
AS_HELP_STRING([--enable-install-uncommon],[enable installation of some uncommon programs (default no)]),
[install_uncommon=${enableval}],[install_uncommon=no])
UNCOMMON="bamfilter bamfixmatecoordinates bamfixmatecoordinatesnamesorted bamtoname bamdisthist fastabgzfextract bamheap bamfrontback bamrandomtag bamheap2 bamheap3 bamtagconversion fastqtobampar bambisect vcffilterinfo vcfpatchcontigprepend vcfconcat vcfsort filtergtf bamconsensus"
UNCOMMON="bamfilter bamfixmatecoordinates bamfixmatecoordinatesnamesorted bamtoname bamdisthist fastabgzfextract bamheap bamfrontback bamrandomtag bamheap2 bamheap3 bamtagconversion fastqtobampar bambisect vcffilterinfo vcfpatchcontigprepend vcfconcat vcfsort filtergtf bamconsensus vcfreplacecontigs"
UNCOMMONINSTALLED=
UNCOMMONUNINSTALLED=
if test "${install_uncommon}" = "yes" ; then
......
biobambam2 (2.0.152+dfsg-1) unstable; urgency=medium
* Team upload.
* New upstream version
-- Steffen Moeller <moeller@debian.org> Fri, 24 Jan 2020 12:37:00 +0100
biobambam2 (2.0.147+dfsg-1) unstable; urgency=medium
* Team upload.
......
......@@ -5,7 +5,7 @@ Section: science
Priority: optional
Build-Depends: debhelper-compat (= 12),
pkg-config,
libmaus2-dev (>= 2.0.284)
libmaus2-dev
Standards-Version: 4.4.1
Vcs-Browser: https://salsa.debian.org/med-team/biobambam2
Vcs-Git: https://salsa.debian.org/med-team/biobambam2.git
......
......@@ -138,6 +138,7 @@ EXTRA_PROGRAMS = blastnxmltobam \
vcfpatchcontigprepend \
vcfconcat \
vcfsort \
vcfreplacecontigs \
filtergtf
populaterefcache_SOURCES = programs/populaterefcache.cpp biobambam2/Licensing.cpp
......@@ -600,6 +601,11 @@ vcfsort_LDADD = ${LIBMAUS2LIBS}
vcfsort_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} ${AM_LDFLAGS}
vcfsort_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS}
vcfreplacecontigs_SOURCES = programs/vcfreplacecontigs.cpp biobambam2/Licensing.cpp
vcfreplacecontigs_LDADD = ${LIBMAUS2LIBS}
vcfreplacecontigs_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} ${AM_LDFLAGS}
vcfreplacecontigs_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS}
bamfiltereofblocks_SOURCES = programs/bamfiltereofblocks.cpp biobambam2/Licensing.cpp biobambam2/RunEOFFilter.cpp
bamfiltereofblocks_LDADD = ${LIBMAUS2LIBS}
bamfiltereofblocks_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} ${AM_LDFLAGS}
......
......@@ -47,6 +47,7 @@
#include <libmaus2/util/LELoad.hpp>
#include <libmaus2/bambam/ProgramHeaderLineSet.hpp>
#include <libmaus2/fastx/FastAReader.hpp>
#include <libmaus2/bambam/StrCmpNum.hpp>
#include <biobambam2/BamBamConfig.hpp>
#include <biobambam2/Licensing.hpp>
......
......@@ -1854,7 +1854,6 @@ std::vector<ConsensusPart> handleConsensus(
}
#endif
// get set of branch points
std::set<BranchPoint> SBP;
for ( uint64_t i = 0; i < VBL.size(); ++i )
......@@ -1873,17 +1872,22 @@ std::vector<ConsensusPart> handleConsensus(
BranchPoint const & from = cur.from;
BranchPoint const & to = cur.to;
// get from id in branch point vector
uint64_t const fromid = std::lower_bound(VBP.begin(),VBP.begin()+VBP.size(),from) - VBP.begin();
assert ( fromid < VBP.size() && VBP[fromid] == from );
// get to id in branch point vector
uint64_t const toid = std::lower_bound(VBP.begin(),VBP.begin()+VBP.size(),to) - VBP.begin();
//
assert ( toid < VBP.size() && VBP[toid] == to );
// store link (1 based)
edgeMap[fromid+1].push_back(toid+1);
}
std::vector< std::vector<uint64_t> > Vcomp;
std::map<uint64_t,uint64_t> Vcompmap;
// compute connected components
{
std::set<uint64_t> unseen;
std::map<uint64_t,std::vector<uint64_t> > redgeMap;
......@@ -1900,7 +1904,6 @@ std::vector<ConsensusPart> handleConsensus(
}
}
while ( unseen.size() )
{
#if 0
......@@ -1945,9 +1948,11 @@ std::vector<ConsensusPart> handleConsensus(
std::vector<uint64_t> const V(component.begin(),component.end());
// set branch point id to component map
for ( uint64_t i = 0; i < V.size(); ++i )
Vcompmap[V[i]] = Vcomp.size();
// push connected component
Vcomp.push_back(V);
#if 0
......@@ -1959,6 +1964,7 @@ std::vector<ConsensusPart> handleConsensus(
}
}
// consider connected components
for ( uint64_t z = 0; z < Vcomp.size(); ++z )
{
std::vector<uint64_t> const & V = Vcomp[z];
......@@ -1968,13 +1974,16 @@ std::vector<ConsensusPart> handleConsensus(
int64_t maxx = -1;
int64_t maxu = -1;
// iterate over branch points in component
for ( uint64_t y = 0; y < V.size(); ++y )
{
assert ( V[y] > 0 );
// get branch point
BranchPoint const & BP = VBP[V[y]-1];
typedef std::vector<BranchLink>::const_iterator it;
// get branch links originating from BP
std::pair<it,it> const P = std::equal_range(
VBL.begin(),
VBL.end(),
......@@ -1982,9 +1991,12 @@ std::vector<ConsensusPart> handleConsensus(
BranchLinkFromComparator()
);
// iterate over branch links
for ( it x = P.first; x != P.second; ++x )
{
// get link
BranchLink const & BL = *x;
// get kmer links
KmerLink const * KL_A = AKLBL.begin() + BL.link_start;
KmerLink const * KL_E = AKLBL.begin() + BL.link_end;
......@@ -1993,8 +2005,10 @@ std::vector<ConsensusPart> handleConsensus(
// std::cerr << BL << std::endl;
// iterate over kmer links on branch link
for ( KmerLink const * KL_C = KL_A; KL_C != KL_E; ++KL_C )
{
// get frequency of from and to
int64_t const fromfreq = KL_C->fromfreq;
int64_t const tofreq = KL_C->tofreq;
......@@ -2021,10 +2035,12 @@ std::vector<ConsensusPart> handleConsensus(
assert ( maxfreq > 0 );
// get branch point carying maximum frequency link
BranchPoint const & BP = VBP[V[maxy]-1];
typedef std::vector<BranchLink>::const_iterator it;
// get vector of links originating from BP
std::pair<it,it> const P = std::equal_range(
VBL.begin(),
VBL.end(),
......@@ -2048,8 +2064,10 @@ std::vector<ConsensusPart> handleConsensus(
BranchLink curBL = P.first[maxx];
VBLmax.push_back(curBL);
// follow backward links from curBL
while ( true )
{
// look for links
std::pair<it,it> const LP =
std::equal_range(
VBLto.begin(),
......@@ -2058,8 +2076,10 @@ std::vector<ConsensusPart> handleConsensus(
BranchLinkToComparator()
);
// if any
if ( LP.second-LP.first )
{
// look for maximum weight
double weight = -std::numeric_limits<double>::max();
int64_t maxi = -1;
for ( int64_t i = 0; i < LP.second-LP.first; ++i )
......@@ -2079,6 +2099,7 @@ std::vector<ConsensusPart> handleConsensus(
}
}
// reverse path (we were going backward)
std::reverse(VBLmax.begin(),VBLmax.end());
curBL = P.first[maxx];
......@@ -2086,8 +2107,10 @@ std::vector<ConsensusPart> handleConsensus(
std::vector<BranchLink> VBLfrom(VBL);
std::sort(VBLfrom.begin(),VBLfrom.end(),BranchLinkFromComparator());
// follow forward
while ( true )
{
// look for extensions
std::pair<it,it> const LP =
std::equal_range(
VBLfrom.begin(),
......@@ -2096,8 +2119,10 @@ std::vector<ConsensusPart> handleConsensus(
BranchLinkFromComparator()
);
// if any
if ( LP.second-LP.first )
{
// look for maximum weight
double weight = -std::numeric_limits<double>::max();
int64_t maxi = -1;
for ( int64_t i = 0; i < LP.second-LP.first; ++i )
......@@ -2123,6 +2148,7 @@ std::vector<ConsensusPart> handleConsensus(
std::cerr << "full path" << std::endl;
#endif
// decode kmer path
std::ostringstream fullpathstr;
if ( VBLmax.size() )
{
......
......@@ -137,6 +137,8 @@ int bamfeaturecount(libmaus2::util::ArgInfo const & arginfo)
libmaus2::gtf::GTFData::unique_ptr_type pGTF(libmaus2::gtf::GTFData::obtain(annofn,verbose));
libmaus2::gtf::GTFData const & gtfdata = *pGTF;
// gtfdata.print(std::cerr);
if ( exportcdna )
{
if ( !arginfo.hasArg("reference") )
......@@ -651,6 +653,10 @@ int bamfeaturecount(libmaus2::util::ArgInfo const & arginfo)
}
#endif
}
else
{
// std::cerr << "[W] no GTF data found for " << algn.formatAlignment(bamheader) << std::endl;
}
}
#if 0
......@@ -672,7 +678,6 @@ int bamfeaturecount(libmaus2::util::ArgInfo const & arginfo)
{
libmaus2::autoarray::AutoArray<libmaus2::gtf::ExonInfo> AEI(gtfdata.Aexon.size());
libmaus2::sorting::SerialisingSortingBufferedOutputFileArray<libmaus2::gtf::ExonInterval>::merger_ptr_type merger(
......@@ -695,9 +700,10 @@ int bamfeaturecount(libmaus2::util::ArgInfo const & arginfo)
uint64_t uncovered;
uint64_t basecover;
uint64_t total;
uint64_t maxdepth;
DepthCallback()
: uncovered(0), basecover(0), total(0) {}
: uncovered(0), basecover(0), total(0), maxdepth(0) {}
void operator()(uint64_t const from, uint64_t const to, uint64_t const depth)
{
......@@ -705,6 +711,9 @@ int bamfeaturecount(libmaus2::util::ArgInfo const & arginfo)
uint64_t const l = to-from;
if ( depth > maxdepth )
maxdepth = depth;
if ( depth )
{
basecover += l * depth;
......@@ -841,13 +850,14 @@ int bamfeaturecount(libmaus2::util::ArgInfo const & arginfo)
info.unique.uncovered += DC.uncovered;
info.unique.basecount += DC.basecover;
info.unique.total += DC.total;
info.unique.maxdepth = std::max(info.unique.maxdepth,DC.maxdepth);
}
else
{
info.ambig.uncovered += DC.uncovered;
info.ambig.basecount += DC.basecover;
info.ambig.total += DC.total;
info.ambig.maxdepth = std::max(info.ambig.maxdepth,DC.maxdepth);
}
}
......@@ -902,8 +912,7 @@ int bamfeaturecount(libmaus2::util::ArgInfo const & arginfo)
char const * name = gtfdata.Aid.begin() + G.name_offset;
uint64_t o = 0;
std::string const refidname = bamheader.getRefIDName(G.chr_id);
std::string const refidname = gtfdata.Vchr[G.chr_id]; //bamheader.getRefIDName(G.chr_id);
for ( uint64_t ti = G.start; ti < G.end; ++ti )
if (
......
......@@ -451,13 +451,14 @@ int bamfilterk(libmaus2::util::ArgInfo const & arginfo)
void operator()(uint64_t const kmer, uint64_t const pos, bool const z)
{
//std::cerr << "kmer " << kmer << " seqid " << seqid << " pos " << pos << " z " << z << std::endl;
AK.push(o_AK,KMer(kmer,seqid,pos,z));
}
};
Callback CB(i,AK,o_AK);
KB.kmerCallbackPos(
KB.kmerCallbackPosFR(
Vref[i].c_str(),
Vref[i].size(),
CB,
......
......@@ -204,6 +204,10 @@ The suffixes k, m, g, K, M and G can be used to denote that the argument is
to be multiplied by 1024, 1024^2, 1024^3, 1000, 1000^2 or 1000^3
respectively.
.PP
.B cols=<>:
If set to an unsigned number then wrap the sequence and quality lines at
this number of columns. By default no wrapping is performed.
.PP
.B splitprefix=<bamtofastq_split>:
file prefix if split>0 and collate=0.
.PP
......
This diff is collapsed.
/**
bambam
Copyright (C) 2019 German Tischler
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**/
#include <config.h>
#include <libmaus2/vcf/VCFParser.hpp>
#include <libmaus2/util/ArgInfo.hpp>
#include <libmaus2/lz/BgzfDeflate.hpp>
#include <libmaus2/fastx/FastAReader.hpp>
#include <biobambam2/Licensing.hpp>
static int getDefaultVerbose() { return 0; }
int vcfreplacecontigs(libmaus2::util::ArgInfo const & arginfo, std::istream & in, std::ostream & out)
{
//unsigned int const verbose = arginfo.getValue<unsigned int>("verbose",getDefaultVerbose());
bool const gz = arginfo.getValue<int>("gz",0);
libmaus2::lz::BgzfOutputStream::unique_ptr_type pBOS;
if ( gz )
{
libmaus2::lz::BgzfOutputStream::unique_ptr_type tBOS(new libmaus2::lz::BgzfOutputStream(out));
pBOS = UNIQUE_PTR_MOVE(tBOS);
}
std::ostream & vout = gz ? *pBOS : out;
libmaus2::vcf::VCFParser vcf(in);
if ( ! ( 0 < arginfo.getNumRestArgs() ) )
{
std::cerr << "[E] usage: " << arginfo.progname << " <in.fa> <in.vcf" << std::endl;
return EXIT_FAILURE;
}
std::string const fafn = arginfo.getUnparsedRestArg(0);
std::vector<libmaus2::bambam::Chromosome> V;
std::set<std::string> S;
{
libmaus2::fastx::FastAReader FA(fafn);
libmaus2::fastx::FastAReader::pattern_type pat;
while ( FA.getNextPatternUnlocked(pat) )
{
std::string const name = pat.getShortStringId();
V.push_back(libmaus2::bambam::Chromosome(name,pat.spattern.size()));
S.insert(name);
std::cerr << "[V] pushed " << name << "[" << pat.spattern.size() << "]" << std::endl;
}
}
vcf.setChromosomeVector(V);
vcf.printText(vout);
std::pair<
libmaus2::util::TabEntry<> const *,
char const *
> P;
std::pair<
std::vector<std::string>,
::libmaus2::trie::LinearHashTrie<char,uint32_t>::shared_ptr_type
> const Pcontig = vcf.getContigNamesAndTrie();
uint64_t line = 0;
while ( (P = vcf.readEntry()).first )
{
if ( P.first->size() )
{
std::pair<char const *, char const *> const Pchr = P.first->get(0,P.second);
if ( Pcontig.second->searchCompleteNoFailure(Pchr.first,Pchr.second) != -1 )
{
char const * a = P.first->get(0,P.second).first;
char const * e = P.first->get(P.first->size()-1,P.second).second;
vout.write(a,e-a);
vout.put('\n');
line += 1;
if ( line % (1024*1024) == 0 )
std::cerr << "[V] " << line << std::endl;
}
else
{
libmaus2::exception::LibMausException lme;
lme.getStream() << "[E] sequence " << std::string(Pchr.first,Pchr.second) << " does not appear in FastA file provided" << std::endl;
lme.finish();
throw lme;
}
}
}
// vcf.unparsedCopy(vout);
#if 0
if ( Vin.size() )
{
std::pair<bool, char const *> E;
{
libmaus2::aio::InputStreamInstance ISI(Vin[0]);
libmaus2::vcf::VCFParser vcf(ISI);
vcf.printText(vout);
vcf.unparsedCopy(vout);
}
for ( uint64_t j = 1; j < Vin.size(); ++j )
{
libmaus2::aio::InputStreamInstance ISI(Vin[j]);
libmaus2::vcf::VCFParser vcf(ISI);
vcf.unparsedCopy(vout);
}
}
#endif
if ( gz )
{
pBOS->flush();
pBOS->addEOFBlock();
pBOS.reset();
}
return EXIT_SUCCESS;
}
int main(int argc, char * argv[])
{
try
{
::libmaus2::util::ArgInfo const arginfo(argc,argv);
for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i )
if (
arginfo.restargs[i] == "-v"
||
arginfo.restargs[i] == "--version"
)
{
std::cerr << ::biobambam2::Licensing::license();
return EXIT_SUCCESS;
}
else if (
arginfo.restargs[i] == "-h"
||
arginfo.restargs[i] == "--help"
)
{
std::cerr << ::biobambam2::Licensing::license();
std::cerr << std::endl;
std::cerr << "Key=Value pairs:" << std::endl;
std::cerr << std::endl;
std::vector< std::pair<std::string,std::string> > V;
V.push_back ( std::pair<std::string,std::string> ( "verbose=<["+::biobambam2::Licensing::formatNumber(getDefaultVerbose())+"]>", "print progress report (default: 1)" ) );
::biobambam2::Licensing::printMap(std::cerr,V);
std::cerr << std::endl;
return EXIT_SUCCESS;
}
return vcfreplacecontigs(arginfo,std::cin,std::cout);
}
catch(std::exception const & ex)
{
std::cerr << ex.what() << std::endl;
return EXIT_FAILURE;
}
}