Skip to content
Commits on Source (4)
......@@ -2,7 +2,7 @@ cmake_minimum_required( VERSION 2.8.2 )
project( FLEXBAR )
set( SEQAN_APP_VERSION "3.1.0" )
set( SEQAN_APP_VERSION "3.2.0" )
include_directories( ${FLEXBAR_SOURCE_DIR}/include )
# link_directories( ${FLEXBAR_SOURCE_DIR}/lib )
......
......@@ -28,20 +28,20 @@ Flexbar source code as well as binaries for Linux and Mac OS can be downloaded o
Make sure that `cmake` is available, as well as development and runtime files of the TBB library 4.0 or later (Intel Threading Building Blocks). For example on Debian systems, install the packages `libtbb-dev` and `libtbb2`. Furthermore, the SeqAn library and a compiler that supports C++14 is required:
* Get SeqAn library version 2.2.0 [here](https://github.com/seqan/seqan/releases/download/seqan-v2.2.0/seqan-library-2.2.0.tar.xz)
* Download Flexbar 3.1 source code [release](https://github.com/seqan/flexbar/releases)
* Download Flexbar 3.2 source code [release](https://github.com/seqan/flexbar/releases)
Decompress both files:
tar xzf flexbar-3.1.0.tar.gz
tar xzf flexbar-3.2.0.tar.gz
tar xJf seqan-library-2.2.0.tar.xz
Move SeqAn include folder to Flexbar:
mv seqan-library-2.2.0/include flexbar-3.1.0
mv seqan-library-2.2.0/include flexbar-3.2.0
Use these commands for building:
cd flexbar-3.1.0
cd flexbar-3.2.0
cmake .
make
......@@ -77,9 +77,9 @@ In this example, reads that are barcoded on left side are demultiplexed by speci
flexbar -r reads.fq -t target -b brc.fa -be LTAIL -a adp.fa
The second example shows how to trim compressed reads based on their quality scores in illumina version 1.8 format. Afterwards, provided adapters are removed in right trim-end mode, only if the overlap of adapter and read has at least length five with at most 40% errors.
The second example shows how to trim compressed reads based on their quality scores in illumina version 1.8 format. Afterwards, provided adapters are removed in right trim-end mode, only if the overlap of adapter and read has at least length 5 with at most 20% errors.
flexbar -r reads.fq.gz -q TAIL -qf i1.8 -a adp.fa -ao 5 -at 0.4
flexbar -r reads.fq.gz -q TAIL -qf i1.8 -a adp.fa -ao 5 -at 0.2
For further examples visit the [manual](https://github.com/seqan/flexbar/wiki) page.
flexbar (1:3.2.0-1) unstable; urgency=medium
* New upstream version
-- Andreas Tille <tille@debian.org> Tue, 22 May 2018 15:54:36 +0200
flexbar (1:3.1.0-1) unstable; urgency=medium
[ Steffen Moeller ]
......
......@@ -2,7 +2,9 @@
Flexbar - flexible barcode and adapter removal
Version 3.1.0
Version 3.2.0
BSD 3-Clause License
uses SeqAn library release 2.2.0
and TBB library 4.0 or later
......@@ -27,8 +29,8 @@ int main(int argc, const char* argv[]){
using namespace std;
using namespace seqan;
const string version = "3.1";
const string date = "April 2018";
const string version = "3.2";
const string date = "May 2018";
ArgumentParser parser("flexbar");
......
......@@ -98,17 +98,18 @@ void loadAdapters(Options &o, const bool secondSet, const bool useAdapterFile){
}
}
else{
if(o.rcMode == RCOFF || o.rcMode == RCON){
TBar bar;
bar.id = "cmdline";
bar.seq = o.adapterSeq;
o.adapters.push_back(bar);
if(o.revCompAdapter){
}
if(o.rcMode == RCON || o.rcMode == RCONLY){
TSeqStr adapterSeqRC = o.adapterSeq;
seqan::reverseComplement(adapterSeqRC);
TBar barRC;
barRC.id = "cmdline revcomp";
barRC.id = "cmdline_rc";
barRC.seq = adapterSeqRC;
o.adapters.push_back(barRC);
}
......
......@@ -11,17 +11,21 @@ class SeqRead {
TSeqStr seq;
TString id, qual, umi;
bool rmAdapter, rmAdapterRC;
SeqRead(TSeqStr& sequence, TString& seqID) :
seq(sequence),
id(seqID),
umi(""){
rmAdapter(false),
rmAdapterRC(false){
}
SeqRead(TSeqStr& sequence, TString& seqID, TString& quality) :
seq(sequence),
id(seqID),
qual(quality),
umi(""){
rmAdapter(false),
rmAdapterRC(false){
}
};
......@@ -60,7 +64,7 @@ struct AlignResults{
int overlapLength, queryLength, tailLength;
float allowedErrors;
TSeqStr randTag;
TSeqStr umiTag;
std::string alString;
AlignResults(){}
......@@ -116,16 +120,30 @@ namespace flexbar{
FString id;
FSeqStr seq;
bool rcAdapter;
tbb::atomic<unsigned long> rmOverlap, rmFull;
TBar() :
rmOverlap(0),
rmFull(0){
rmFull(0),
rcAdapter(false){
}
};
enum RevCompMode {
RCOFF,
RCON,
RCONLY
};
enum AlignmentMode {
ALIGNALL,
ALIGNRCOFF,
ALIGNRC
};
enum ComputeCycle {
PRELOAD,
COMPUTE,
......
......@@ -12,16 +12,16 @@ private:
std::ostream *out;
tbb::concurrent_vector<flexbar::TBar> bars;
bool m_revComp, m_isAdapter;
const bool m_isAdapter;
const flexbar::RevCompMode m_rcMode;
public:
LoadFasta(const Options &o, const bool isAdapter) :
out(o.out),
m_rcMode(o.rcMode),
m_isAdapter(isAdapter){
m_revComp = o.revCompAdapter && isAdapter;
};
......@@ -64,21 +64,24 @@ public:
}
else idMap[ids[i]] = 1;
if(! m_isAdapter || m_rcMode == RCOFF || m_rcMode == RCON){
TBar bar;
bar.id = ids[i];
bar.seq = seqs[i];
bars.push_back(bar);
}
if(m_revComp){
TSeqStr seq = seqs[i];
if(m_isAdapter && (m_rcMode == RCON || m_rcMode == RCONLY)){
TString id = ids[i];
TSeqStr seq = seqs[i];
append(id, " revcomp");
append(id, "_rc");
seqan::reverseComplement(seq);
TBar barRC;
barRC.id = id;
barRC.seq = seq;
barRC.rcAdapter = true;
bars.push_back(barRC);
}
}
......
This diff is collapsed.
......@@ -12,12 +12,22 @@ class PairedAlign : public tbb::filter {
private:
const bool m_writeUnassigned, m_twoBarcodes;
const bool m_writeUnassigned, m_twoBarcodes, m_umiTags, m_useRcTrimEnd;
const bool m_htrim, m_htrimAdapterRm, m_htrimMaxFirstOnly;
const std::string m_htrimLeft, m_htrimRight;
const int m_htrimMinLength, m_htrimMaxLength;
const float m_htrimErrorRate;
const unsigned int m_arTimes;
const flexbar::FileFormat m_format;
const flexbar::LogAlign m_log;
const flexbar::RunType m_runType;
const flexbar::BarcodeDetect m_barType;
const flexbar::AdapterRemoval m_adapRem;
const flexbar::TrimEnd m_aTrimEnd, m_arcTrimEnd, m_bTrimEnd;
tbb::atomic<unsigned long> m_unassigned;
tbb::concurrent_vector<flexbar::TBar> *m_adapters, *m_adapters2;
......@@ -33,11 +43,26 @@ public:
PairedAlign(Options &o) :
filter(parallel),
m_format(o.format),
m_log(o.logAlign),
m_runType(o.runType),
m_barType(o.barDetect),
m_adapRem(o.adapRm),
m_aTrimEnd(o.a_end),
m_arcTrimEnd(o.arc_end),
m_bTrimEnd(o.b_end),
m_arTimes(o.a_cycles),
m_umiTags(o.umiTags),
m_useRcTrimEnd(o.useRcTrimEnd),
m_writeUnassigned(o.writeUnassigned),
m_htrimLeft(o.htrimLeft),
m_htrimRight(o.htrimRight),
m_htrimMinLength(o.htrimMinLength),
m_htrimMaxLength(o.htrimMaxLength),
m_htrimMaxFirstOnly(o.htrimMaxFirstOnly),
m_htrimErrorRate(o.h_errorRate),
m_htrimAdapterRm(o.htrimAdapterRm),
m_htrim(o.htrimLeft != "" || o.htrimRight != ""),
m_twoBarcodes(o.barDetect == flexbar::WITHIN_READ_REMOVAL2 || o.barDetect == flexbar::WITHIN_READ2),
out(o.out),
m_unassigned(0){
......@@ -47,11 +72,11 @@ public:
m_barcodes2 = &o.barcodes2;
m_adapters2 = &o.adapters2;
m_b1 = new TSeqAlign(m_barcodes, o, o.b_min_overlap, o.b_errorRate, o.b_tail_len, o.b_match, o.b_mismatch, o.b_gapCost, o.b_end, true);
m_a1 = new TSeqAlign(m_adapters, o, o.a_min_overlap, o.a_errorRate, o.a_tail_len, o.match, o.mismatch, o.gapCost, o.end, false);
m_b1 = new TSeqAlign(m_barcodes, o, o.b_min_overlap, o.b_errorRate, o.b_tail_len, o.b_match, o.b_mismatch, o.b_gapCost, true);
m_a1 = new TSeqAlign(m_adapters, o, o.a_min_overlap, o.a_errorRate, o.a_tail_len, o.match, o.mismatch, o.gapCost, false);
m_b2 = new TSeqAlign(m_barcodes2, o, o.b_min_overlap, o.b_errorRate, o.b_tail_len, o.b_match, o.b_mismatch, o.b_gapCost, o.b_end, true);
m_a2 = new TSeqAlign(m_adapters2, o, o.a_min_overlap, o.a_errorRate, o.a_tail_len, o.match, o.mismatch, o.gapCost, o.end, false);
m_b2 = new TSeqAlign(m_barcodes2, o, o.b_min_overlap, o.b_errorRate, o.b_tail_len, o.b_match, o.b_mismatch, o.b_gapCost, true);
m_a2 = new TSeqAlign(m_adapters2, o, o.a_min_overlap, o.a_errorRate, o.a_tail_len, o.match, o.mismatch, o.gapCost, false);
if(m_log == flexbar::TAB)
*out << "ReadTag\tQueryTag\tQueryStart\tQueryEnd\tOverlapLength\tMismatches\tIndels\tAllowedErrors" << std::endl;
......@@ -66,18 +91,18 @@ public:
};
void alignPairedReadBarcode(flexbar::TPairedRead* pRead, flexbar::TAlignBundle &alBundle, std::vector<flexbar::ComputeCycle> &cycle, std::vector<unsigned int> &idxAl){
void alignPairedReadToBarcodes(flexbar::TPairedRead* pRead, flexbar::TAlignBundle &alBundle, std::vector<flexbar::ComputeCycle> &cycle, std::vector<unsigned int> &idxAl, const flexbar::AlignmentMode &alMode){
using namespace flexbar;
if(m_barType != BOFF){
switch(m_barType){
case BARCODE_READ: pRead->barID = m_b1->alignSeqRead(pRead->b, false, alBundle[0], cycle[0], idxAl[0]); break;
case WITHIN_READ_REMOVAL2: pRead->barID2 = m_b2->alignSeqRead(pRead->r2, true, alBundle[2], cycle[2], idxAl[2]);
case WITHIN_READ_REMOVAL: pRead->barID = m_b1->alignSeqRead(pRead->r1, true, alBundle[1], cycle[1], idxAl[1]); break;
case WITHIN_READ2: pRead->barID2 = m_b2->alignSeqRead(pRead->r2, false, alBundle[2], cycle[2], idxAl[2]);
case WITHIN_READ: pRead->barID = m_b1->alignSeqRead(pRead->r1, false, alBundle[1], cycle[1], idxAl[1]); break;
case BARCODE_READ: pRead->barID = m_b1->alignSeqRead(pRead->b, false, alBundle[0], cycle[0], idxAl[0], alMode, m_bTrimEnd); break;
case WITHIN_READ_REMOVAL2: pRead->barID2 = m_b2->alignSeqRead(pRead->r2, true, alBundle[2], cycle[2], idxAl[2], alMode, m_bTrimEnd);
case WITHIN_READ_REMOVAL: pRead->barID = m_b1->alignSeqRead(pRead->r1, true, alBundle[1], cycle[1], idxAl[1], alMode, m_bTrimEnd); break;
case WITHIN_READ2: pRead->barID2 = m_b2->alignSeqRead(pRead->r2, false, alBundle[2], cycle[2], idxAl[2], alMode, m_bTrimEnd);
case WITHIN_READ: pRead->barID = m_b1->alignSeqRead(pRead->r1, false, alBundle[1], cycle[1], idxAl[1], alMode, m_bTrimEnd); break;
case BOFF: break;
}
......@@ -89,18 +114,113 @@ public:
}
void alignPairedReadAdapter(flexbar::TPairedRead* pRead, flexbar::TAlignBundle &alBundle, std::vector<flexbar::ComputeCycle> &cycle, std::vector<unsigned int> &idxAl){
void alignPairedReadToAdapters(flexbar::TPairedRead* pRead, flexbar::TAlignBundle &alBundle, std::vector<flexbar::ComputeCycle> &cycle, std::vector<unsigned int> &idxAl, const flexbar::AlignmentMode &alMode, const flexbar::TrimEnd trimEnd){
using namespace flexbar;
if(m_adapRem != AOFF){
if(m_adapRem != ATWO)
m_a1->alignSeqRead(pRead->r1, true, alBundle[0], cycle[0], idxAl[0]);
m_a1->alignSeqRead(pRead->r1, true, alBundle[0], cycle[0], idxAl[0], alMode, trimEnd);
if(pRead->r2 != NULL && m_adapRem != AONE){
if(m_adapRem != NORMAL2) m_a1->alignSeqRead(pRead->r2, true, alBundle[1], cycle[1], idxAl[1]);
else m_a2->alignSeqRead(pRead->r2, true, alBundle[1], cycle[1], idxAl[1]);
if(m_adapRem != NORMAL2) m_a1->alignSeqRead(pRead->r2, true, alBundle[1], cycle[1], idxAl[1], alMode, trimEnd);
else m_a2->alignSeqRead(pRead->r2, true, alBundle[1], cycle[1], idxAl[1], alMode, trimEnd);
}
}
}
void trimLeftHPS(flexbar::TSeqRead* seqRead){
using namespace std;
using namespace flexbar;
if(m_htrimAdapterRm && m_useRcTrimEnd){
if (seqRead->rmAdapter && (m_aTrimEnd == RIGHT || m_aTrimEnd == RTAIL)) return;
else if(seqRead->rmAdapterRC && (m_arcTrimEnd == RIGHT || m_arcTrimEnd == RTAIL)) return;
}
else if(m_htrimAdapterRm && ! m_useRcTrimEnd){
if(m_aTrimEnd == RIGHT || m_aTrimEnd == RTAIL) return;
}
if(! m_htrimAdapterRm || seqRead->rmAdapter || seqRead->rmAdapterRC){
for(unsigned int s = 0; s < m_htrimLeft.length(); ++s){
char nuc = m_htrimLeft[s];
unsigned int cutPos = 0;
unsigned int notNuc = 0;
for(unsigned int i = 0; i < length(seqRead->seq); ++i){
if(seqRead->seq[i] != nuc){
notNuc++;
}
else if(notNuc <= m_htrimErrorRate * (i+1)){
if(m_htrimMaxLength != 0 && i+1 > m_htrimMaxLength && (! m_htrimMaxFirstOnly || s == 0)) break;
cutPos = i+1;
}
}
if(cutPos > 0 && cutPos >= m_htrimMinLength){
erase(seqRead->seq, 0, cutPos);
if(m_format == FASTQ){
erase(seqRead->qual, 0, cutPos);
}
}
}
}
}
void trimRightHPS(flexbar::TSeqRead* seqRead){
using namespace std;
using namespace flexbar;
if(m_htrimAdapterRm && m_useRcTrimEnd){
if (seqRead->rmAdapter && (m_aTrimEnd == LEFT || m_aTrimEnd == LTAIL)) return;
else if(seqRead->rmAdapterRC && (m_arcTrimEnd == LEFT || m_arcTrimEnd == LTAIL)) return;
}
else if(m_htrimAdapterRm && ! m_useRcTrimEnd){
if(m_aTrimEnd == LEFT || m_aTrimEnd == LTAIL) return;
}
if(! m_htrimAdapterRm || seqRead->rmAdapter || seqRead->rmAdapterRC){
for(unsigned int s = 0; s < m_htrimRight.length(); ++s){
char nuc = m_htrimRight[s];
unsigned int seqLen = length(seqRead->seq);
unsigned int cutPos = seqLen;
unsigned int notNuc = 0;
for(int i = seqLen - 1; i >= 0; --i){
if(seqRead->seq[i] != nuc){
notNuc++;
}
else if(notNuc <= m_htrimErrorRate * (seqLen - i)){
if(m_htrimMaxLength != 0 && i < seqLen - m_htrimMaxLength && (! m_htrimMaxFirstOnly || s == 0)) break;
cutPos = i;
}
}
if(cutPos < seqLen && cutPos <= seqLen - m_htrimMinLength){
erase(seqRead->seq, cutPos, length(seqRead->seq));
if(m_format == FASTQ){
erase(seqRead->qual, cutPos, length(seqRead->qual));
}
}
}
}
}
......@@ -115,6 +235,17 @@ public:
TPairedReadBundle *prBundle = static_cast<TPairedReadBundle* >(item);
if(m_umiTags){
for(unsigned int i = 0; i < prBundle->size(); ++i){
prBundle->at(i)->r1->umi = "";
if(prBundle->at(i)->r2 != NULL)
prBundle->at(i)->r2->umi = "";
}
}
AlignmentMode alMode = ALIGNALL;
// barcode detection
if(m_barType != BOFF){
......@@ -135,7 +266,7 @@ public:
}
for(unsigned int i = 0; i < prBundle->size(); ++i){
alignPairedReadBarcode(prBundle->at(i), alBundle, cycle, idxAl);
alignPairedReadToBarcodes(prBundle->at(i), alBundle, cycle, idxAl, alMode);
}
for(unsigned int i = 0; i < 3; ++i){
......@@ -144,7 +275,7 @@ public:
}
for(unsigned int i = 0; i < prBundle->size(); ++i){
alignPairedReadBarcode(prBundle->at(i), alBundle, cycle, idxAl);
alignPairedReadToBarcodes(prBundle->at(i), alBundle, cycle, idxAl, alMode);
}
}
......@@ -152,6 +283,24 @@ public:
if(m_adapRem != AOFF){
for(unsigned int c = 0; c < m_arTimes; ++c){
flexbar::TrimEnd trimEnd = m_aTrimEnd;
unsigned int rc = 1;
if(m_useRcTrimEnd){
alMode = ALIGNRCOFF;
rc = 2;
}
for(unsigned int r = 0; r < rc; ++r){
if(m_useRcTrimEnd && r == 1){
alMode = ALIGNRC;
trimEnd = m_arcTrimEnd;
}
TAlignBundle alBundle;
Alignments r1AlignmentsA, r2AlignmentsA;
......@@ -167,7 +316,7 @@ public:
}
for(unsigned int i = 0; i < prBundle->size(); ++i){
alignPairedReadAdapter(prBundle->at(i), alBundle, cycle, idxAl);
alignPairedReadToAdapters(prBundle->at(i), alBundle, cycle, idxAl, alMode, trimEnd);
}
for(unsigned int i = 0; i < 2; ++i){
......@@ -176,7 +325,44 @@ public:
}
for(unsigned int i = 0; i < prBundle->size(); ++i){
alignPairedReadAdapter(prBundle->at(i), alBundle, cycle, idxAl);
alignPairedReadToAdapters(prBundle->at(i), alBundle, cycle, idxAl, alMode, trimEnd);
}
}
}
}
if(m_umiTags){
for(unsigned int i = 0; i < prBundle->size(); ++i){
append(prBundle->at(i)->r1->id, prBundle->at(i)->r1->umi);
if(prBundle->at(i)->r2 != NULL){
append(prBundle->at(i)->r1->id, prBundle->at(i)->r2->umi);
append(prBundle->at(i)->r2->id, prBundle->at(i)->r1->umi);
append(prBundle->at(i)->r2->id, prBundle->at(i)->r2->umi);
}
}
}
if(m_htrim){
if(m_htrimLeft != ""){
for(unsigned int i = 0; i < prBundle->size(); ++i){
trimLeftHPS(prBundle->at(i)->r1);
if(prBundle->at(i)->r2 != NULL)
trimLeftHPS(prBundle->at(i)->r2);
}
}
if(m_htrimRight != ""){
for(unsigned int i = 0; i < prBundle->size(); ++i){
trimRightHPS(prBundle->at(i)->r1);
if(prBundle->at(i)->r2 != NULL)
trimRightHPS(prBundle->at(i)->r2);
}
}
}
......
......@@ -12,7 +12,7 @@ class PairedInput : public tbb::filter {
private:
const flexbar::FileFormat m_format;
const bool m_isPaired, m_useBarRead, m_useNumberTag;
const bool m_isPaired, m_useBarRead, m_useNumberTag, m_interleaved;
const unsigned int m_bundleSize;
tbb::atomic<unsigned long> m_uncalled, m_uncalledPairs, m_tagCounter;
......@@ -25,6 +25,7 @@ public:
filter(serial_in_order),
m_format(o.format),
m_useNumberTag(o.useNumberTag),
m_interleaved(o.interleavedInput),
m_isPaired(o.isPaired),
m_useBarRead(o.barDetect == flexbar::BARCODE_READ),
m_bundleSize(o.bundleSize),
......@@ -37,7 +38,7 @@ public:
m_f2 = NULL;
m_b = NULL;
if(m_isPaired)
if(m_isPaired && ! m_interleaved)
m_f2 = new SeqInput<TSeqStr, TString>(o, o.readsFile2, true, false);
if(m_useBarRead)
......@@ -61,9 +62,17 @@ public:
TStrings quals, quals2, qualsBR;
TBools uncalled, uncalled2, uncalledBR;
unsigned int nReads = m_f1->loadSeqReads(uncalled, ids, seqs, quals, m_bundleSize);
unsigned int bundleSize = m_bundleSize;
if(m_interleaved) bundleSize = m_bundleSize * 2;
if(m_isPaired){
unsigned int nReads = m_f1->loadSeqReads(uncalled, ids, seqs, quals, bundleSize);
if(m_interleaved && nReads % 2 == 1){
cerr << "\nERROR: Interleaved reads input does not contain even number of reads.\n" << endl;
exit(1);
}
if(m_isPaired && ! m_interleaved){
unsigned int nReads2 = m_f2->loadSeqReads(uncalled2, ids2, seqs2, quals2, m_bundleSize);
if(nReads != nReads2){
......@@ -75,11 +84,14 @@ public:
if(m_useBarRead){
unsigned int nBarReads = m_b->loadSeqReads(uncalledBR, idsBR, seqsBR, qualsBR, m_bundleSize);
if(nReads > nBarReads){
unsigned int multi = 1;
if(m_interleaved) multi = 2;
if(nReads > nBarReads * multi){
cerr << "\nERROR: Read without barcode read in input.\n" << endl;
exit(1);
}
else if(nReads < nBarReads){
else if(nReads < nBarReads * multi){
cerr << "\nERROR: Barcode read without read in input.\n" << endl;
exit(1);
}
......@@ -90,6 +102,8 @@ public:
TPairedReadBundle *prBundle = new TPairedReadBundle();
if(! m_interleaved){
for(unsigned int i = 0; i < length(ids); ++i){
if(uncalled[i] || (m_isPaired && uncalled2[i])){
......@@ -130,6 +144,56 @@ public:
prBundle->push_back(new TPairedRead(read1, read2, barRead));
}
}
}
else{ // interleaved paired input
unsigned int nEntries = (unsigned int) (length(ids) / 2);
for(unsigned int i = 0; i < nEntries; ++i){
unsigned int r = (i * 2);
unsigned int p = (i * 2) + 1;
if(uncalled[r] || uncalled[p]){
if(uncalled[r]) ++m_uncalled;
if(uncalled[p]) ++m_uncalled;
++m_uncalledPairs;
}
// else if(m_useBarRead && uncalledBR[i]){
//
// // to be handled
// }
else{
if(m_useNumberTag){
stringstream converter;
converter << ++m_tagCounter;
TString tagCount = converter.str();
ids[r] = tagCount;
ids[p] = tagCount;
if(m_useBarRead) idsBR[i] = tagCount;
}
TSeqRead *read1 = NULL, *read2 = NULL, *barRead = NULL;
if(m_format == FASTA){
read1 = new TSeqRead(seqs[r], ids[r]);
read2 = new TSeqRead(seqs[p], ids[p]);
if(m_useBarRead) barRead = new TSeqRead(seqsBR[i], idsBR[i]);
}
else{
read1 = new TSeqRead(seqs[r], ids[r], quals[r]);
read2 = new TSeqRead(seqs[p], ids[p], quals[p]);
if(m_useBarRead) barRead = new TSeqRead(seqsBR[i], idsBR[i], qualsBR[i]);
}
prBundle->push_back(new TPairedRead(read1, read2, barRead));
}
}
}
return prBundle;
}
......@@ -176,19 +240,19 @@ public:
unsigned long getNrProcessedReads() const{
if(m_isPaired) return m_f1->getNrProcessedReads() + m_f2->getNrProcessedReads();
if(m_isPaired && ! m_interleaved) return m_f1->getNrProcessedReads() + m_f2->getNrProcessedReads();
else return m_f1->getNrProcessedReads();
}
unsigned long getNrProcessedChars() const{
if(m_isPaired) return m_f1->getNrProcessedChars() + m_f2->getNrProcessedChars();
if(m_isPaired && ! m_interleaved) return m_f1->getNrProcessedChars() + m_f2->getNrProcessedChars();
else return m_f1->getNrProcessedChars();
}
unsigned long getNrLowPhredReads() const {
if(m_isPaired) return m_f1->getNrLowPhredReads() + m_f2->getNrLowPhredReads();
if(m_isPaired && ! m_interleaved) return m_f1->getNrLowPhredReads() + m_f2->getNrLowPhredReads();
else return m_f1->getNrLowPhredReads();
}
......
......@@ -16,15 +16,11 @@ private:
int m_mapsize;
const int m_minLength, m_qtrimThresh, m_qtrimWinSize;
const bool m_isPaired, m_writeUnassigned, m_writeSingleReads, m_writeSingleReadsP;
const bool m_twoBarcodes, m_qtrimPostRm, m_randTag;
const bool m_twoBarcodes, m_qtrimPostRm;
tbb::atomic<unsigned long> m_nSingleReads, m_nLowPhred;
const std::string m_target;
const std::string m_trimLeftNucs, m_trimRightNucs;
const int m_hpsMinLength;
const float m_errorRate;
const flexbar::FileFormat m_format;
const flexbar::RunType m_runType;
......@@ -50,16 +46,11 @@ public:
m_runType(o.runType),
m_barDetect(o.barDetect),
m_minLength(o.min_readLen),
m_trimLeftNucs(o.trimLeftNucs),
m_trimRightNucs(o.trimRightNucs),
m_hpsMinLength(o.hpsMinLength),
m_errorRate(o.a_errorRate),
m_qtrim(o.qTrim),
m_qtrimThresh(o.qtrimThresh),
m_qtrimWinSize(o.qtrimWinSize),
m_qtrimPostRm(o.qtrimPostRm),
m_isPaired(o.isPaired),
m_randTag(o.randTag),
m_writeUnassigned(o.writeUnassigned),
m_writeSingleReads(o.writeSingleReads),
m_writeSingleReadsP(o.writeSingleReadsP),
......@@ -235,72 +226,6 @@ public:
};
void trimLeftNucs(flexbar::TSeqRead* seqRead){
using namespace std;
using namespace flexbar;
for(unsigned int s = 0; s < m_trimLeftNucs.length(); ++s){
char nuc = m_trimLeftNucs[s];
unsigned int cutPos = 0;
unsigned int notNuc = 0;
for(unsigned int i = 0; i < length(seqRead->seq); ++i){
if(seqRead->seq[i] != nuc){
notNuc++;
}
else if(notNuc <= m_errorRate * (i + 1)){
cutPos = i+1;
}
}
if(cutPos > 0 && cutPos >= m_hpsMinLength){
erase(seqRead->seq, 0, cutPos);
if(m_format == FASTQ){
erase(seqRead->qual, 0, cutPos);
}
}
}
}
void trimRightNucs(flexbar::TSeqRead* seqRead){
using namespace std;
using namespace flexbar;
for(unsigned int s = 0; s < m_trimRightNucs.length(); ++s){
char nuc = m_trimRightNucs[s];
unsigned int cutPos = length(seqRead->seq);
unsigned int notNuc = 0;
for(int i = length(seqRead->seq) - 1; i >= 0; --i){
if(seqRead->seq[i] != nuc){
notNuc++;
}
else if(notNuc <= m_errorRate * (length(seqRead->seq) - i)){
cutPos = i;
}
}
if(cutPos < length(seqRead->seq) && cutPos <= length(seqRead->seq) - m_hpsMinLength){
erase(seqRead->seq, cutPos, length(seqRead->seq));
if(m_format == FASTQ){
erase(seqRead->qual, cutPos, length(seqRead->qual));
}
}
}
}
void writePairedRead(flexbar::TPairedRead* pRead){
using namespace flexbar;
......@@ -319,11 +244,6 @@ public:
if(qualTrim(pRead->r1, m_qtrim, m_qtrimThresh, m_qtrimWinSize)) ++m_nLowPhred;
}
if(m_trimLeftNucs != "") trimLeftNucs(pRead->r1);
if(m_trimRightNucs != "") trimRightNucs(pRead->r1);
if(m_randTag) append(pRead->r1->id, pRead->r1->umi);
if(length(pRead->r1->seq) >= m_minLength){
m_outMap[pRead->barID].f1->writeRead(pRead->r1);
......@@ -355,23 +275,6 @@ public:
if(qualTrim(pRead->r2, m_qtrim, m_qtrimThresh, m_qtrimWinSize)) ++m_nLowPhred;
}
if(m_trimLeftNucs != ""){
trimLeftNucs(pRead->r1);
trimLeftNucs(pRead->r2);
}
if(m_trimRightNucs != ""){
trimRightNucs(pRead->r1);
trimRightNucs(pRead->r2);
}
if(m_randTag){
append(pRead->r1->id, pRead->r1->umi);
append(pRead->r1->id, pRead->r2->umi);
append(pRead->r2->id, pRead->r1->umi);
append(pRead->r2->id, pRead->r2->umi);
}
if(length(pRead->r1->seq) >= m_minLength) l1ok = true;
if(length(pRead->r2->seq) >= m_minLength) l2ok = true;
......
......@@ -11,11 +11,10 @@ private:
typedef AlignResults<TSeqStr> TAlignResults;
const flexbar::TrimEnd m_trimEnd;
const flexbar::LogAlign m_log;
const flexbar::FileFormat m_format;
const bool m_isBarcoding, m_writeTag, m_randTag, m_strictRegion;
const bool m_isBarcoding, m_writeTag, m_umiTags, m_strictRegion;
const int m_minLength, m_minOverlap, m_tailLength;
const float m_errorRate;
const unsigned int m_bundleSize;
......@@ -29,14 +28,13 @@ private:
public:
SeqAlign(tbb::concurrent_vector<flexbar::TBar> *queries, const Options &o, int minOverlap, float errorRate, const int tailLength, const int match, const int mismatch, const int gapCost, const flexbar::TrimEnd end, const bool isBarcoding):
SeqAlign(tbb::concurrent_vector<flexbar::TBar> *queries, const Options &o, int minOverlap, float errorRate, const int tailLength, const int match, const int mismatch, const int gapCost, const bool isBarcoding):
m_minOverlap(minOverlap),
m_errorRate(errorRate),
m_tailLength(tailLength),
m_trimEnd(end),
m_isBarcoding(isBarcoding),
m_randTag(o.randTag),
m_umiTags(o.umiTags),
m_minLength(o.min_readLen),
m_log(o.logAlign),
m_format(o.format),
......@@ -46,14 +44,14 @@ public:
m_out(o.out),
m_nPreShortReads(0),
m_modified(0),
algo(TAlgorithm(o, match, mismatch, gapCost, end)){
algo(TAlgorithm(o, match, mismatch, gapCost)){
m_queries = queries;
m_rmOverlaps = tbb::concurrent_vector<unsigned long>(flexbar::MAX_READLENGTH + 1, 0);
};
int alignSeqRead(flexbar::TSeqRead* sr, const bool performRemoval, flexbar::Alignments &alignments, flexbar::ComputeCycle &cycle, unsigned int &idxAl){
int alignSeqRead(flexbar::TSeqRead* sr, const bool performRemoval, flexbar::Alignments &alignments, flexbar::ComputeCycle &cycle, unsigned int &idxAl, const flexbar::AlignmentMode &alMode, const flexbar::TrimEnd trimEnd){
using namespace std;
using namespace flexbar;
......@@ -78,15 +76,18 @@ public:
for(unsigned int i = 0; i < m_queries->size(); ++i){
if (alMode == ALIGNRCOFF && m_queries->at(i).rcAdapter) continue;
else if(alMode == ALIGNRC && ! m_queries->at(i).rcAdapter) continue;
TSeqStr &qseq = m_queries->at(i).seq;
TSeqStr *rseq = &seqRead.seq;
TSeqStr tmp;
if(m_trimEnd == LTAIL || m_trimEnd == RTAIL){
if(trimEnd == LTAIL || trimEnd == RTAIL){
int tailLength = (m_tailLength > 0) ? m_tailLength : length(qseq);
if(tailLength < readLength){
if(m_trimEnd == LTAIL) tmp = prefix(seqRead.seq, tailLength);
if(trimEnd == LTAIL) tmp = prefix(seqRead.seq, tailLength);
else tmp = suffix(seqRead.seq, readLength - tailLength);
rseq = &tmp;
}
......@@ -112,10 +113,13 @@ public:
// align each query sequence and store best one
for(unsigned int i = 0; i < m_queries->size(); ++i){
if (alMode == ALIGNRCOFF && m_queries->at(i).rcAdapter) continue;
else if(alMode == ALIGNRC && ! m_queries->at(i).rcAdapter) continue;
TAlignResults a;
// global sequence alignment
algo.alignGlobal(a, alignments, cycle, idxAl++);
algo.alignGlobal(a, alignments, cycle, idxAl++, trimEnd);
a.queryLength = length(m_queries->at(i).seq);
a.tailLength = (m_tailLength > 0) ? m_tailLength : a.queryLength;
......@@ -128,8 +132,8 @@ public:
bool validAl = true;
if(((m_trimEnd == RTAIL || m_trimEnd == RIGHT) && a.startPosA < a.startPosS && m_strictRegion) ||
((m_trimEnd == LTAIL || m_trimEnd == LEFT) && a.endPosA > a.endPosS && m_strictRegion) ||
if(((trimEnd == RTAIL || trimEnd == RIGHT) && a.startPosA < a.startPosS && m_strictRegion) ||
((trimEnd == LTAIL || trimEnd == LEFT) && a.endPosA > a.endPosS && m_strictRegion) ||
a.overlapLength < 1){
validAl = false;
......@@ -149,24 +153,24 @@ public:
// valid alignment
if(qIndex >= 0){
TrimEnd trimEnd = m_trimEnd;
TrimEnd trEnd = trimEnd;
// trim read based on alignment
if(performRemoval){
if(trimEnd == ANY){
if(trEnd == ANY){
if(am.startPosA <= am.startPosS && am.endPosS <= am.endPosA){
seqRead.seq = "";
if(m_format == FASTQ) seqRead.qual = "";
}
else if(am.startPosA - am.startPosS >= am.endPosS - am.endPosA){
trimEnd = RIGHT;
trEnd = RIGHT;
}
else trimEnd = LEFT;
else trEnd = LEFT;
}
switch(trimEnd){
switch(trEnd){
int rCutPos;
......@@ -211,6 +215,11 @@ public:
++m_modified;
if(! m_isBarcoding){
if(! m_queries->at(qIndex).rcAdapter) seqRead.rmAdapter = true;
else seqRead.rmAdapterRC = true;
}
// count number of removals for each query
m_queries->at(qIndex).rmOverlap++;
......@@ -233,9 +242,9 @@ public:
// valid alignment, not neccesarily removal
if(m_randTag && am.randTag != ""){
if(m_umiTags && am.umiTag != ""){
append(seqRead.umi, "_");
append(seqRead.umi, am.randTag);
append(seqRead.umi, am.umiTag);
}
// alignment stats
......@@ -244,8 +253,8 @@ public:
if(performRemoval){
s << "Sequence removal:";
if(trimEnd == LEFT || trimEnd == LTAIL) s << " left side\n";
else if(trimEnd == RIGHT || trimEnd == RTAIL) s << " right side\n";
if(trEnd == LEFT || trEnd == LTAIL) s << " left side\n";
else if(trEnd == RIGHT || trEnd == RTAIL) s << " right side\n";
else s << " any side\n";
}
else s << "Sequence detection, no removal:\n";
......
......@@ -21,16 +21,14 @@ private:
// TScoreSimple m_score;
TScoreMatrix m_scoreMatrix;
const bool m_randTag;
const bool m_umiTags;
const flexbar::LogAlign m_log;
const flexbar::TrimEnd m_trimEnd;
public:
SeqAlignAlgo(const Options &o, const int match, const int mismatch, const int gapCost, const flexbar::TrimEnd trimEnd):
m_randTag(o.randTag),
m_log(o.logAlign),
m_trimEnd(trimEnd){
SeqAlignAlgo(const Options &o, const int match, const int mismatch, const int gapCost):
m_umiTags(o.umiTags),
m_log(o.logAlign){
using namespace seqan;
......@@ -50,7 +48,7 @@ public:
};
void alignGlobal(TAlignResults &a, flexbar::Alignments &alignments, flexbar::ComputeCycle &cycle, const unsigned int idxAl){
void alignGlobal(TAlignResults &a, flexbar::Alignments &alignments, flexbar::ComputeCycle &cycle, const unsigned int idxAl, const flexbar::TrimEnd trimEnd){
using namespace std;
using namespace seqan;
......@@ -69,12 +67,12 @@ public:
cycle = RESULTS;
if(m_trimEnd == RIGHT || m_trimEnd == RTAIL){
if(trimEnd == RIGHT || trimEnd == RTAIL){
AlignConfig<true, false, true, true> ac;
alignments.ascores = globalAlignment(alignments.aset, m_scoreMatrix, ac);
}
else if(m_trimEnd == LEFT || m_trimEnd == LTAIL){
else if(trimEnd == LEFT || trimEnd == LTAIL){
AlignConfig<true, true, false, true> ac;
alignments.ascores = globalAlignment(alignments.aset, m_scoreMatrix, ac);
......@@ -113,7 +111,7 @@ public:
a.alString = s.str();
}
if(m_randTag) a.randTag = "";
if(m_umiTags) a.umiTag = "";
TRowIterator it1 = begin(row1);
......@@ -130,7 +128,7 @@ public:
if(isGap(it1)) ++a.gapsR;
else if(isGap(it2)) ++a.gapsA;
else if(*it1 != *it2 && *it2 != 'N') ++a.mismatches;
else if(m_randTag && *it2 == 'N') append(a.randTag, (TChar) *it1);
else if(m_umiTags && *it2 == 'N') append(a.umiTag, (TChar) *it1);
}
++alPos;
++it2;
......
......@@ -117,6 +117,7 @@ public:
if(m_format == FASTQ)
erase(quals[i], 0, idx);
}
if(m_preTrimEnd > 0 && length(seq) > 1){
int idx = m_preTrimEnd;
......@@ -127,6 +128,7 @@ public:
if(m_format == FASTQ)
quals[i] = prefix(quals[i], length(quals[i]) - idx);
}
if(m_qtrim != QOFF && ! m_qtrimPostRm){
if(qualTrim(seq, quals[i], m_qtrim, m_qtrimThresh, m_qtrimWinSize)) ++m_nLowPhred;
}
......