Skip to content
Commits on Source (7)
......@@ -2,7 +2,7 @@ cmake_minimum_required( VERSION 2.8.2 )
project( FLEXBAR )
set( SEQAN_APP_VERSION "3.0.3" )
set( SEQAN_APP_VERSION "3.1.0" )
include_directories( ${FLEXBAR_SOURCE_DIR}/include )
# link_directories( ${FLEXBAR_SOURCE_DIR}/lib )
......
BSD 3-Clause License
Copyright (c) 2017, SeqAn
Copyright (c) 2018, SeqAn
All rights reserved.
Redistribution and use in source and binary forms, with or without
......
## Flexbar flexible barcode and adapter removal
## Flexbar flexible barcode and adapter removal
The program Flexbar preprocesses high-throughput sequencing data efficiently. It demultiplexes barcoded runs and removes adapter sequences. Moreover, trimming and filtering features are provided. Flexbar increases read mapping rates and improves genome as well as transcriptome assemblies. It supports next-generation sequencing data in fasta and fastq format, e.g. from Roche 454 and the Illumina platform.
The program Flexbar preprocesses high-throughput sequencing data efficiently. It demultiplexes barcoded runs and removes adapter sequences. Moreover, trimming and filtering features are provided. Flexbar increases read mapping rates and improves genome as well as transcriptome assemblies. Unique molecular identifiers can be extracted in a flexible way. The program supports sequencing data in fasta and fastq format, e.g. from the Illumina platform.
Refer to the [manual](https://github.com/seqan/flexbar/wiki) or contact [jtroehr](https://github.com/jtroehr) for support with this application.
Refer to the [manual](https://github.com/seqan/flexbar/wiki) or contact [Johannes Roehr](https://github.com/jtroehr) for support with this application.
![Flexbar logo](https://github.com/seqan/flexbar/wiki/images/flexbar-logo.png)
### Reference
### References
Matthias Dodt, Johannes T. Roehr, Rina Ahmed, Christoph Dieterich: Flexbar — flexible barcode and adapter processing for next-generation sequencing platforms. Biology 2012, 1(3):895-905.
Johannes T. Roehr, Christoph Dieterich, Knut Reinert: Flexbar 3.0 – SIMD and multicore parallelization. Bioinformatics 2017.
See article on [PubMed](http://www.ncbi.nlm.nih.gov/pubmed/24832523).
See article on [PubMed](https://www.ncbi.nlm.nih.gov/pubmed/28541403)
Matthias Dodt, Johannes T. Roehr, Rina Ahmed, Christoph Dieterich: Flexbar – flexible barcode and adapter processing for next-generation sequencing platforms. Biology 2012.
See article on [PubMed](https://www.ncbi.nlm.nih.gov/pubmed/24832523)
### Download
......@@ -21,23 +25,23 @@ Flexbar source code as well as binaries for Linux and Mac OS can be downloaded o
### Building from source
Make sure that `cmake` is available, as well as development and runtime files of the TBB library 4.0 or later (Intel Threading Building Blocks). Using a package manager is a simple way to install them. Furthermore, the SeqAn library and a compiler that supports C++14 is required:
Make sure that `cmake` is available, as well as development and runtime files of the TBB library 4.0 or later (Intel Threading Building Blocks). For example on Debian systems, install the packages `libtbb-dev` and `libtbb2`. Furthermore, the SeqAn library and a compiler that supports C++14 is required:
* Get SeqAn library version 2.2.0 [here](https://github.com/seqan/seqan/releases/download/seqan-v2.2.0/seqan-library-2.2.0.tar.xz)
* Download Flexbar 3.0.3 source code [release](https://github.com/seqan/flexbar/releases)
* Download Flexbar 3.1 source code [release](https://github.com/seqan/flexbar/releases)
Decompress both files:
tar xzf flexbar-3.0.3.tar.gz
tar xzf flexbar-3.1.0.tar.gz
tar xJf seqan-library-2.2.0.tar.xz
Move SeqAn include folder to Flexbar:
mv seqan-library-2.2.0/include flexbar-3.0.3
mv seqan-library-2.2.0/include flexbar-3.1.0
Use these commands for building:
cd flexbar-3.0.3
cd flexbar-3.1.0
cmake .
make
......
flexbar (1:3.0.3-3) UNRELEASED; urgency=medium
flexbar (1:3.1.0-1) unstable; urgency=medium
[ Steffen Moeller ]
* d/u/metadata:
- added RRIDs
- yamllint cleanliness
-- Steffen Moeller <moeller@debian.org> Thu, 29 Mar 2018 20:38:12 +0200
[ Andreas Tille ]
* New upstream version
* Point Vcs fields to salsa.debian.org
* Standards-Version: 4.1.4
* debhelper 11
-- Andreas Tille <tille@debian.org> Thu, 26 Apr 2018 15:25:26 +0200
flexbar (1:3.0.3-2) unstable; urgency=medium
......
......@@ -4,15 +4,15 @@ Uploaders: Andreas Tille <tille@debian.org>,
Tony Travis <ajt@minke.ukfsn.org>
Section: science
Priority: optional
Build-Depends: debhelper (>= 10),
Build-Depends: debhelper (>= 11~),
libtbb-dev,
cmake,
libseqan2-dev,
zlib1g-dev,
libbz2-dev
Standards-Version: 4.1.0
Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/flexbar.git
Vcs-Git: https://anonscm.debian.org/git/debian-med/flexbar.git
Standards-Version: 4.1.4
Vcs-Browser: https://salsa.debian.org/med-team/flexbar
Vcs-Git: https://salsa.debian.org/med-team/flexbar.git
Homepage: https://github.com/seqan/flexbar
Package: flexbar
......
......@@ -59,12 +59,3 @@ endif()
# endif()
set( SEQAN_CTD_EXECUTABLES ${SEQAN_CTD_EXECUTABLES} flexbar CACHE INTERNAL "" )
# set( CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "/Users/jtr/Downloads/seqan-trunk/util/cmake" )
# set( SEQAN_FIND_DEPENDENCIES ZLIB BZip2 )
# find_package( SeqAn REQUIRED )
# add_definitions( ${SEQAN_DEFINITIONS} )
# include_directories( ${SEQAN_INCLUDE_DIRS} )
# target_link_libraries( flexbar ${SEQAN_LIBRARIES} )
# set( SEQAN_HAS_ZLIB FALSE )
......@@ -2,7 +2,7 @@
Flexbar - flexible barcode and adapter removal
Version 3.0.3
Version 3.1.0
uses SeqAn library release 2.2.0
and TBB library 4.0 or later
......@@ -27,8 +27,8 @@ int main(int argc, const char* argv[]){
using namespace std;
using namespace seqan;
const string version = "3.0.3";
const string date = "May 2017";
const string version = "3.1";
const string date = "April 2018";
ArgumentParser parser("flexbar");
......
......@@ -9,17 +9,19 @@ class SeqRead {
public:
TSeqStr seq;
TString id, qual;
TString id, qual, umi;
SeqRead(TSeqStr& sequence, TString& seqID) :
seq(sequence),
id(seqID){
id(seqID),
umi(""){
}
SeqRead(TSeqStr& sequence, TString& seqID, TString& quality) :
seq(sequence),
id(seqID),
qual(quality){
qual(quality),
umi(""){
}
};
......
......@@ -17,13 +17,14 @@ struct Options{
std::string readsFile, readsFile2, barReadsFile;
std::string barcodeFile, adapterFile, barcode2File, adapter2File;
std::string adapterSeq, targetName, logAlignStr, outCompression;
std::string trimLeftNucs, trimRightNucs;
bool isPaired, useAdapterFile, useNumberTag, useRemovalTag, randTag, logStdout;
bool switch2Fasta, writeUnassigned, writeSingleReads, writeSingleReadsP, writeLengthDist;
bool useStdin, useStdout, relaxRegion, revCompAdapter, qtrimPostRm, bNoMBV;
bool useStdin, useStdout, relaxRegion, revCompAdapter, qtrimPostRm;
int cutLen_begin, cutLen_end, cutLen_read, a_tail_len, b_tail_len;
int qtrimThresh, qtrimWinSize, a_overhang;
int qtrimThresh, qtrimWinSize, a_overhang, hpsMinLength;
int maxUncalled, min_readLen, a_min_overlap, b_min_overlap, nThreads, bundleSize;
int match, mismatch, gapCost, b_match, b_mismatch, b_gapCost;
......@@ -53,6 +54,8 @@ struct Options{
barcode2File = "";
adapter2File = "";
outCompression = "";
trimLeftNucs = "";
trimRightNucs = "";
isPaired = false;
useAdapterFile = false;
......@@ -70,7 +73,6 @@ struct Options{
relaxRegion = false;
revCompAdapter = false;
qtrimPostRm = false;
bNoMBV = false;
cutLen_begin = 0;
cutLen_end = 0;
......@@ -81,6 +83,8 @@ struct Options{
b_tail_len = 0;
b_min_overlap = 0;
a_errorRate = 0.1;
format = flexbar::FASTA;
qual = flexbar::SANGER;
qTrim = flexbar::QOFF;
......@@ -163,13 +167,9 @@ void defineOptions(seqan::ArgumentParser &parser, const std::string version, con
addOption(parser, ArgParseOption("bt", "barcode-error-rate", "Error rate threshold for mismatches and gaps.", ARG::DOUBLE));
addOption(parser, ArgParseOption("bk", "barcode-keep", "Keep barcodes within reads instead of removal."));
addOption(parser, ArgParseOption("bu", "barcode-unassigned", "Include unassigned reads in output generation."));
// addOption(parser, ArgParseOption("ba", "barcode-no-mbv", "Turn off bit-vector alignment with edit distance."));
addOption(parser, ArgParseOption("bm", "barcode-match", "Alignment match score.", ARG::INTEGER));
addOption(parser, ArgParseOption("bi", "barcode-mismatch", "Alignment mismatch score.", ARG::INTEGER));
addOption(parser, ArgParseOption("bg", "barcode-gap", "Alignment gap score.", ARG::INTEGER));
// addOption(parser, ArgParseOption("bm", "barcode-match", "Alignment match score in case of no-mbv.", ARG::INTEGER));
// addOption(parser, ArgParseOption("bi", "barcode-mismatch", "Standard alignment mismatch score.", ARG::INTEGER));
// addOption(parser, ArgParseOption("bg", "barcode-gap", "Standard alignment gap score.", ARG::INTEGER));
addSection(parser, "Adapter removal");
addOption(parser, ArgParseOption("a", "adapters", "Fasta file with adapters for removal that may contain N.", ARG::INPUT_FILE));
......@@ -191,6 +191,9 @@ void defineOptions(seqan::ArgumentParser &parser, const std::string version, con
addOption(parser, ArgParseOption("u", "max-uncalled", "Allowed uncalled bases N for each read.", ARG::INTEGER));
addOption(parser, ArgParseOption("x", "pre-trim-left", "Trim given number of bases on 5' read end before detection.", ARG::INTEGER));
addOption(parser, ArgParseOption("y", "pre-trim-right", "Trim specified number of bases on 3' end prior to detection.", ARG::INTEGER));
addOption(parser, ArgParseOption("X", "post-trim-left-hps", "Trim certain homopolymers on the left read end after removal.", ARG::STRING));
addOption(parser, ArgParseOption("Y", "post-trim-right-hps", "Trim certain homopolymers on the right read end after removal.", ARG::STRING));
addOption(parser, ArgParseOption("Z", "post-trim-hps-length", "Minimum length of homopolymers at read ends.", ARG::INTEGER));
addOption(parser, ArgParseOption("k", "post-trim-length", "Trim to specified read length from 3' end after removal.", ARG::INTEGER));
addOption(parser, ArgParseOption("m", "min-read-length", "Minimum read length to remain after removal.", ARG::INTEGER));
......@@ -215,7 +218,7 @@ void defineOptions(seqan::ArgumentParser &parser, const std::string version, con
addOption(parser, ArgParseOption("o", "stdout-log", "Write statistics to console instead of target log file."));
addOption(parser, ArgParseOption("g", "removal-tags", "Tag reads that are subject to adapter or barcode removal."));
addOption(parser, ArgParseOption("e", "number-tags", "Replace read tags by ascending number to save space."));
addOption(parser, ArgParseOption("d", "random-tags", "Capture read sequence at barcode or adapter N positions."));
addOption(parser, ArgParseOption("i", "umi-tags", "Capture UMIs in reads at barcode or adapter N positions."));
hideOption(parser, "version");
......@@ -224,12 +227,10 @@ void defineOptions(seqan::ArgumentParser &parser, const std::string version, con
setAdvanced(parser, "barcode-tail-length");
setAdvanced(parser, "barcode-keep");
setAdvanced(parser, "barcode-unassigned");
// setAdvanced(parser, "barcode-no-mbv");
setAdvanced(parser, "barcode-match");
setAdvanced(parser, "barcode-mismatch");
setAdvanced(parser, "barcode-gap");
setAdvanced(parser, "adapters2");
setAdvanced(parser, "adapter-revcomp");
setAdvanced(parser, "adapter-tail-length");
// setAdvanced(parser, "adapter-overhang");
......@@ -239,6 +240,9 @@ void defineOptions(seqan::ArgumentParser &parser, const std::string version, con
setAdvanced(parser, "adapter-mismatch");
setAdvanced(parser, "adapter-gap");
setAdvanced(parser, "post-trim-left-hps");
setAdvanced(parser, "post-trim-right-hps");
setAdvanced(parser, "post-trim-hps-length");
setAdvanced(parser, "post-trim-length");
setAdvanced(parser, "qtrim-win-size");
setAdvanced(parser, "qtrim-post-removal");
......@@ -249,7 +253,7 @@ void defineOptions(seqan::ArgumentParser &parser, const std::string version, con
setAdvanced(parser, "length-dist");
setAdvanced(parser, "single-reads-paired");
setAdvanced(parser, "number-tags");
setAdvanced(parser, "random-tags");
setAdvanced(parser, "umi-tags");
setCategory(parser, "Trimming");
......@@ -295,11 +299,13 @@ void defineOptions(seqan::ArgumentParser &parser, const std::string version, con
setDefaultValue(parser, "target", "flexbarOut");
setDefaultValue(parser, "threads", "1");
setDefaultValue(parser, "bundle", "256");
setDefaultValue(parser, "max-uncalled", "0");
setDefaultValue(parser, "min-read-length", "18");
setDefaultValue(parser, "post-trim-hps-length", "3");
setDefaultValue(parser, "barcode-trim-end", "LTAIL");
setDefaultValue(parser, "barcode-error-rate", "0.1");
setDefaultValue(parser, "barcode-error-rate", "0.0");
setDefaultValue(parser, "barcode-match", "1");
setDefaultValue(parser, "barcode-mismatch", "-1");
setDefaultValue(parser, "barcode-gap", "-9");
......@@ -564,6 +570,19 @@ void loadOptions(Options &o, seqan::ArgumentParser &parser){
*out << "pre-trim-right: " << o.cutLen_end << endl;
}
if(isSet(parser, "post-trim-left-hps")){
getOptionValue(o.trimLeftNucs, parser, "post-trim-left-hps");
*out << "post-trim-left-hps: " << o.trimLeftNucs << endl;
}
if(isSet(parser, "post-trim-right-hps")){
getOptionValue(o.trimRightNucs, parser, "post-trim-right-hps");
*out << "post-trim-right-hps: " << o.trimRightNucs << endl;
}
getOptionValue(o.hpsMinLength, parser, "post-trim-hps-length");
*out << "post-trim-hps-length: " << o.hpsMinLength << endl;
if(isSet(parser, "post-trim-length")){
getOptionValue(o.cutLen_read, parser, "post-trim-length");
*out << "post-trim-length: " << o.cutLen_read << endl;
......@@ -673,7 +692,7 @@ void loadOptions(Options &o, seqan::ArgumentParser &parser){
if(isSet(parser, "length-dist")) o.writeLengthDist = true;
if(isSet(parser, "number-tags")) o.useNumberTag = true;
if(isSet(parser, "removal-tags")) o.useRemovalTag = true;
if(isSet(parser, "random-tags")) o.randTag = true;
if(isSet(parser, "umi-tags")) o.randTag = true;
*out << endl;
......@@ -716,14 +735,11 @@ void loadOptions(Options &o, seqan::ArgumentParser &parser){
}
if(isSet(parser, "barcode-unassigned")) o.writeUnassigned = true;
// if(isSet(parser, "barcode-no-mbv")) o.bNoMBV = true;
getOptionValue(o.b_match, parser, "barcode-match");
getOptionValue(o.b_mismatch, parser, "barcode-mismatch");
getOptionValue(o.b_gapCost, parser, "barcode-gap");
// if(o.bNoMBV){
*out << "barcode-match: ";
if(o.b_match >= 0) *out << " ";
*out << o.b_match << endl;
......@@ -735,7 +751,7 @@ void loadOptions(Options &o, seqan::ArgumentParser &parser){
*out << "barcode-gap: ";
if(o.b_gapCost >= 0) *out << " ";
*out << o.b_gapCost << endl;
// }
*out << endl;
}
......@@ -797,7 +813,6 @@ void loadOptions(Options &o, seqan::ArgumentParser &parser){
// getOptionValue(o.a_overhang, parser, "adapter-overhang");
// *out << "adapter-overhang: " << o.a_overhang << endl;
getOptionValue(o.match, parser, "adapter-match");
getOptionValue(o.mismatch, parser, "adapter-mismatch");
getOptionValue(o.gapCost, parser, "adapter-gap");
......
......@@ -14,13 +14,17 @@ class PairedOutput : public tbb::filter {
private:
int m_mapsize;
const int m_minLength, m_cutLen_read, m_qtrimThresh, m_qtrimWinSize;
const int m_minLength, m_qtrimThresh, m_qtrimWinSize;
const bool m_isPaired, m_writeUnassigned, m_writeSingleReads, m_writeSingleReadsP;
const bool m_twoBarcodes, m_qtrimPostRm;
const bool m_twoBarcodes, m_qtrimPostRm, m_randTag;
tbb::atomic<unsigned long> m_nSingleReads, m_nLowPhred;
const std::string m_target;
const std::string m_trimLeftNucs, m_trimRightNucs;
const int m_hpsMinLength;
const float m_errorRate;
const flexbar::FileFormat m_format;
const flexbar::RunType m_runType;
......@@ -46,12 +50,16 @@ public:
m_runType(o.runType),
m_barDetect(o.barDetect),
m_minLength(o.min_readLen),
m_cutLen_read(o.cutLen_read),
m_trimLeftNucs(o.trimLeftNucs),
m_trimRightNucs(o.trimRightNucs),
m_hpsMinLength(o.hpsMinLength),
m_errorRate(o.a_errorRate),
m_qtrim(o.qTrim),
m_qtrimThresh(o.qtrimThresh),
m_qtrimWinSize(o.qtrimWinSize),
m_qtrimPostRm(o.qtrimPostRm),
m_isPaired(o.isPaired),
m_randTag(o.randTag),
m_writeUnassigned(o.writeUnassigned),
m_writeSingleReads(o.writeSingleReads),
m_writeSingleReadsP(o.writeSingleReadsP),
......@@ -227,6 +235,72 @@ public:
};
void trimLeftNucs(flexbar::TSeqRead* seqRead){
using namespace std;
using namespace flexbar;
for(unsigned int s = 0; s < m_trimLeftNucs.length(); ++s){
char nuc = m_trimLeftNucs[s];
unsigned int cutPos = 0;
unsigned int notNuc = 0;
for(unsigned int i = 0; i < length(seqRead->seq); ++i){
if(seqRead->seq[i] != nuc){
notNuc++;
}
else if(notNuc <= m_errorRate * (i + 1)){
cutPos = i+1;
}
}
if(cutPos > 0 && cutPos >= m_hpsMinLength){
erase(seqRead->seq, 0, cutPos);
if(m_format == FASTQ){
erase(seqRead->qual, 0, cutPos);
}
}
}
}
void trimRightNucs(flexbar::TSeqRead* seqRead){
using namespace std;
using namespace flexbar;
for(unsigned int s = 0; s < m_trimRightNucs.length(); ++s){
char nuc = m_trimRightNucs[s];
unsigned int cutPos = length(seqRead->seq);
unsigned int notNuc = 0;
for(int i = length(seqRead->seq) - 1; i >= 0; --i){
if(seqRead->seq[i] != nuc){
notNuc++;
}
else if(notNuc <= m_errorRate * (length(seqRead->seq) - i)){
cutPos = i;
}
}
if(cutPos < length(seqRead->seq) && cutPos <= length(seqRead->seq) - m_hpsMinLength){
erase(seqRead->seq, cutPos, length(seqRead->seq));
if(m_format == FASTQ){
erase(seqRead->qual, cutPos, length(seqRead->qual));
}
}
}
}
void writePairedRead(flexbar::TPairedRead* pRead){
using namespace flexbar;
......@@ -245,6 +319,11 @@ public:
if(qualTrim(pRead->r1, m_qtrim, m_qtrimThresh, m_qtrimWinSize)) ++m_nLowPhred;
}
if(m_trimLeftNucs != "") trimLeftNucs(pRead->r1);
if(m_trimRightNucs != "") trimRightNucs(pRead->r1);
if(m_randTag) append(pRead->r1->id, pRead->r1->umi);
if(length(pRead->r1->seq) >= m_minLength){
m_outMap[pRead->barID].f1->writeRead(pRead->r1);
......@@ -276,6 +355,23 @@ public:
if(qualTrim(pRead->r2, m_qtrim, m_qtrimThresh, m_qtrimWinSize)) ++m_nLowPhred;
}
if(m_trimLeftNucs != ""){
trimLeftNucs(pRead->r1);
trimLeftNucs(pRead->r2);
}
if(m_trimRightNucs != ""){
trimRightNucs(pRead->r1);
trimRightNucs(pRead->r2);
}
if(m_randTag){
append(pRead->r1->id, pRead->r1->umi);
append(pRead->r1->id, pRead->r2->umi);
append(pRead->r2->id, pRead->r1->umi);
append(pRead->r2->id, pRead->r2->umi);
}
if(length(pRead->r1->seq) >= m_minLength) l1ok = true;
if(length(pRead->r2->seq) >= m_minLength) l2ok = true;
......
......@@ -234,8 +234,8 @@ public:
// valid alignment, not neccesarily removal
if(m_randTag && am.randTag != ""){
append(seqRead.id, "_");
append(seqRead.id, am.randTag);
append(seqRead.umi, "_");
append(seqRead.umi, am.randTag);
}
// alignment stats
......