Skip to content
Commits on Source (7)
......@@ -3,8 +3,8 @@
url = https://github.com/jwalabroad/fermi-lite
[submodule "htslib"]
path = htslib
url = https://github.com/walaj/htslib
branch = develop
url = https://github.com/samtools/htslib
branch = master
[submodule "bwa"]
path = bwa
url = https://github.com/jwalabroad/bwa
......
......@@ -19,7 +19,7 @@ then
## download the test data
mkdir test_data
cd test_data
wget -r -nH -nd -np -R index.html* https://data.broadinstitute.org/snowman/SeqLibTest/
wget -r -nH -nd -np -R index.html* https://data.broadinstitute.org/snowman/SeqLib/
cd ..
export LD_LIBRARY_PATH=${BOOST_ROOT}/lib:${LD_LIBRARY_PATH}
......
......@@ -285,7 +285,6 @@ pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
runstatedir = @runstatedir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
......
......@@ -9,6 +9,13 @@ API Documentation
-----------------
[API Documentation][htmldoc]
Citation
--------
If you use SeqLib in your applications, please cite: http://bioinformatics.oxfordjournals.org/content/early/2016/12/21/bioinformatics.btw741.full.pdf+html
Note that the values for the SeqAn benchmarking in Table 2 should be corrected to 7.7 Gb memory and 33.92 seconds in CPU time, when compiling SeqAn with ``-O3 -DNDEBUG``. SeqAn also does full string decompression.
Wall times for SeqAn may be shorter than CPU time because it uses embedded multi-threading during BAM IO.
Table of contents
=================
......@@ -51,7 +58,7 @@ C_INCLUDE_PATH=$C_INCLUDE_PATH:$SEQ:$SEQ/htslib
And need to link the SeqLib static library and Fermi, BWA and HTSlib libraries
```bash
SEQ=<path_to_seqlib>
LDFLAGS="$LDFLAGS -L$SEQ/bin/libseqlib.a -L$SEQ/bin/libbwa.a -L$SEQ/bin/libfml.a -L$SEQ/bin/libhts.a"
LDFLAGS="$LDFLAGS $SEQ/bin/libseqlib.a $SEQ/bin/libbwa.a $SEQ/bin/libfml.a $SEQ/bin/libhts.a"
```
To add support for reading BAMs, etc with HTTPS, FTP, S3, Google cloud, etc, you must compile and link with libcurl.
......@@ -101,13 +108,14 @@ provide excellent and high quality APIs. SeqLib provides further performance enh
bioinformatics problems.
Some differences:
* SeqLib has ~2-4x faster read/write speed over BamTools and SeqAn, and lower memory footprint.
* SeqLib has ~2-4x faster read/write speed over BamTools and lower memory footprint.
* SeqLib has support for CRAM file
* SeqLib provides in memory access to BWA-MEM, BLAT, a chromosome aware interval tree and range operations, and to read correction and sequence assembly with Fermi. BamTools has more support currently for network access.
* SeqAn provide a substantial amount of additional capabilites not in SeqLib, including graph operations and a more expanded suite of multi-sequence alignments.
* SeqLib provides in memory access to BWA-MEM, BLAT, chromosome aware interval tree, read correction, and sequence assembly with Fermi.
* SeqAn provide a substantial amount of additional capabilites not in SeqLib, including graph operations and an expanded suite of multi-sequence alignments.
* SeqAn embeds multi-threading into some functionality like BAM IO to improve wall times.
For your particular application, our hope is that SeqLib will provide a comprehensive and powerful envrionment to develop
bioinformatics tools. Feature requests and comments are welcomed.
bioinformatics tools, or to be used in conjuction with the capablities in SeqAn and BamTools. Feature requests and comments are welcomed.
Command Line Usage
------------------
......@@ -152,7 +160,7 @@ bwa.ConstructIndex(usv);
std::string querySeq = "CAGCCTCACCCAGGAAAGCAGCTGGGGGTCCACTGGGCTCAGGGAAG";
BamRecordVector results;
// hardclip=false, secondary score cutoff=0.9, max secondary alignments=10
bwa.AlignSequence("my_seq", querySeq, results, false, 0.9, 10);
bwa.AlignSequence(querySeq, "my_seq", results, false, 0.9, 10);
// print results to stdout
for (auto& i : results)
......@@ -293,21 +301,22 @@ w.Close(); // Optional. Will close on destruction
using namespace SeqLib;
// brv is some set of reads to train the error corrector
b.TrainCorrection(brv);
for (BamRecordVector::const_iterator r = brv.begin(); r != brv.end(); ++r)
b.AddSequence(r->Sequence().c_str(), r->Qualities().c_str(), r->Qname().c_str());
b.Train();
b.clear(); // clear the training sequences. Training parameters saved in BFC object
// brv2 is some set to correct
b.ErrorCorrect(brv2);
for (BamRecordVector::const_iterator r = brv2.begin(); r != brv2.end(); ++r)
b.AddSequence(r->Sequence().c_str(), r->Qualities().c_str(), r->Qname().c_str());
b.ErrorCorrect();
// retrieve the sequences
UnalignedSequenceVector v;
b.GetSequences(v);
// alternatively, to train and correct the same set of reads
b.TrainAndCorrect(brv);
b.GetSequences(v);
std::string name, seq;
while (b.GetSequences(seq, name))
v.push_back({name, seq});
// alternatively, train and correct, and modify the sequence in-place
b.TrainCorrection(brv);
b.ErrorCorrectInPlace(brv);
```
Support
......
......@@ -24,6 +24,7 @@ namespace SeqLib {
public:
/** Construct a new BFC engine */
BFC() {
m_idx = 0;
bfc_opt_init(&bfc_opt);
ch = NULL;
kmer = 0;
......@@ -43,57 +44,31 @@ namespace SeqLib {
bfc_ch_destroy(ch);
}
/** Allocate a block of memory for the reads if the amount to enter is known
* @note This is not necessary, as reads will dynamically reallocate
*/
bool AllocateMemory(size_t n);
/** Peform BFC error correction on the sequences stored in this object */
bool ErrorCorrect();
/** Train the error corrector using the reads stored in this object */
bool Train();
/** Add a sequence for either training or correction */
bool AddSequence(const BamRecord& r);
/** Add a sequence for either training or correction */
/** Add a sequence for either training or correction
* @param seq A sequence to be copied into this object (A, T, C, G)
*/
bool AddSequence(const char* seq, const char* qual, const char* name);
/** Set the k-mer size */
void SetKmer(int k) { kmer = k; }
/** Train error correction using sequences from aligned reads */
void TrainCorrection(const BamRecordVector& brv);
/** Train error correction from raw character strings */
void TrainCorrection(const std::vector<char*>& v);
/** Train and error correction on same reads */
void TrainAndCorrect(const BamRecordVector& brv);
/** Error correct a collection of reads */
void ErrorCorrect(const BamRecordVector& brv);
/** Error correct in place, modify sequence, and the clear memory from this object */
void ErrorCorrectInPlace(BamRecordVector& brv);
/** Error correct and add tag with the corrected sequence data, and the clear memory from this object
* @param brv Aligned reads to error correct
* @param tag Tag to assign error corrected sequence to (eg KC)
* @exception Throws an invalid_argument if tag is not length 2
/** Set the k-mer size for training
* @note zero is auto
*/
void ErrorCorrectToTag(BamRecordVector& brv, const std::string& tag);
void SetKmer(int k) { kmer = k; }
/** Return the reads (error corrected if ran ErrorCorrect) */
void GetSequences(UnalignedSequenceVector& v) const;
/** Correct a single new sequence not stored in object
* @param str Sequence of string to correct (ACTG)
* @param q Quality score of sequence to correct
* @value Returns true if corrected */
bool CorrectSequence(std::string& str, const std::string& q);
/** Clear the stored reads */
void clear();
/** Filter reads with unique k-mers. Do after error correction */
void FilterUnique();
/** Return the calculated kcov */
float GetKCov() const { return kcov; }
......@@ -103,8 +78,21 @@ namespace SeqLib {
/** Return the number of sequences controlled by this */
int NumSequences() const { return n_seqs; }
/** Return the next sequence stored in object
* @param s Empty string to be filled.
* @param q Empty string name to be filled.
* @value True if string was filled with sequence. False if no more sequences.
*/
bool GetSequence(std::string& s, std::string& q);
/** Reset the sequence iterator inside GetSequence
*/
void ResetGetSequence() { m_idx = 0; };
private:
size_t m_idx;
// the amount of memory allocated
size_t m_seqs_size;
......@@ -146,6 +134,8 @@ namespace SeqLib {
// assign names, qualities and seq to m_seqs
void allocate_sequences_from_char(const std::vector<char*>& v);
void allocate_sequences_from_strings(const std::vector<std::string>& v);
// do the actual read correction
void correct_reads();
......
......@@ -42,7 +42,8 @@ namespace SeqLib {
private:
// do the read loading
bool load_read(BamRecord& r);
// the return value here is just passed along from sam_read1
int32_t load_read(BamRecord& r);
void reset() {
empty = true;
......
......@@ -108,6 +108,14 @@ class CigarField {
public:
/** Construct an empty CIGAR */
Cigar() {}
/** Construct from a CIGAR string
* @param cig CIGAR string, e.g. 54M46S
*/
Cigar(const std::string& cig);
typedef std::vector<CigarField>::iterator iterator; ///< Iterator for move between CigarField ops
typedef std::vector<CigarField>::const_iterator const_iterator; ///< Iterator (const) for move between CigarField ops
iterator begin() { return m_data.begin(); } ///< Iterator (aka std::vector<CigarField>.begin()
......@@ -185,11 +193,8 @@ class CigarField {
};
//typedef std::vector<CigarField> Cigar;
typedef SeqHashMap<std::string, size_t> CigarMap;
Cigar cigarFromString(const std::string& cig);
/** Class to store and interact with a SAM alignment record
*
* HTSLibrary reads are stored in the bam1_t struct. Memory allocation
......@@ -345,7 +350,10 @@ class BamRecord {
int32_t CountBWAChimericAlignments() const;
/** Get the end of the alignment */
inline int32_t PositionEnd() const { return b ? bam_endpos(b.get()) : -1; }
int32_t PositionEnd() const;
/** Get the end of the aligment mate pair */
int32_t PositionEndMate() const;
/** Get the chromosome ID of the read */
inline int32_t ChrID() const { return b ? b->core.tid : -1; }
......@@ -398,8 +406,8 @@ class BamRecord {
inline std::string ParseReadGroup() const {
// try to get from RG tag first
std::string RG = GetZTag("RG");
if (!RG.empty())
std::string RG;
if (GetZTag("RG", RG))
return RG;
// try to get the read group tag from qname second
......@@ -414,7 +422,7 @@ class BamRecord {
if (b->core.tid != b->core.mtid || !PairMappedFlag())
return 0;
return std::abs(b->core.pos - b->core.mpos) + Length();
return std::abs(b->core.pos - b->core.mpos) + GetCigar().NumQueryConsumed();
}
......@@ -559,8 +567,8 @@ class BamRecord {
uint8_t * p = bam_get_qual(b);
if (!p)
return std::string();
if (!p[0])
return std::string();
//if (!p[0])
// return std::string();
std::string out(b->core.l_qseq, ' ');
for (int32_t i = 0; i < b->core.l_qseq; ++i)
out[i] = (char)(p[i] + offset);
......@@ -659,9 +667,17 @@ class BamRecord {
/** Get a string (Z) tag
* @param tag Name of the tag. eg "XP"
* @return The value stored in the tag. Returns empty string if it does not exist.
* @param s The string to be filled in with the tag information
* @return Returns true if the tag is present, even if empty. Return false if no tag or not a Z tag.
*/
std::string GetZTag(const std::string& tag) const;
bool GetZTag(const std::string& tag, std::string& s) const;
/** Get a string of either Z, f or i type. Useful if tag type not known at compile time.
* @param tag Name of the tag. eg "XP"
* @param s The string to be filled in with the tag information
* @return Returns true if the tag is present and is either Z or i, even if empty. Return false if no tag or not Z or i.
*/
bool GetTag(const std::string& tag, std::string& s) const;
/** Get a vector of type int from a Z tag delimited by "^"
* Smart-tags allow one to store vectors of strings, ints or doubles in the alignment tags, and
......@@ -691,13 +707,28 @@ class BamRecord {
/** Get an int (i) tag
* @param tag Name of the tag. eg "XP"
* @return The value stored in the tag. Returns 0 if it does not exist.
* @param t Value to be filled in with the tag value.
* @return Return true if the tag exists.
*/
inline int32_t GetIntTag(const std::string& tag) const {
inline bool GetIntTag(const std::string& tag, int32_t& t) const {
uint8_t* p = bam_aux_get(b.get(),tag.c_str());
if (!p)
return 0;
return bam_aux2i(p);
return false;
t = bam_aux2i(p);
return true;
}
/** Get a float (f) tag
* @param tag Name of the tag. eg "AS"
* @param t Value to be filled in with the tag value.
* @return Return true if the tag exists.
*/
inline bool GetFloatTag(const std::string& tag, float& t) const {
uint8_t* p = bam_aux_get(b.get(),tag.c_str());
if (!p)
return false;
t = bam_aux2f(p);
return true;
}
/** Add a string (Z) tag
......@@ -825,7 +856,10 @@ class BamRecord {
*/
int OverlappingCoverage(const BamRecord& r) const;
private:
/** Return the shared pointer */
SeqPointer<bam1_t> shared_pointer() const { return b; }
protected:
SeqPointer<bam1_t> b; // bam1_t shared pointer
......
#ifndef SNOWUTILS_H
#define SNOWUTILS_H
#ifndef SEQLIB_UTILS_H
#define SEQLIB_UTILS_H
#include <string>
#include <time.h>
......
#ifndef SEQLIB_UNALIGNED_SEQ_H__
#define SEQLIB_UNALIGNED_SEQ_H__
#ifndef SEQLIB_UNALIGNED_SEQ_H
#define SEQLIB_UNALIGNED_SEQ_H
extern "C" {
#include "bwa/bwa.h"
......@@ -26,20 +26,20 @@ namespace SeqLib {
/** Construct an empty sequence */
UnalignedSequence() {}
/** Construct an unaliged sequence with name and sequence
/** Construct an unaligned sequence with name and sequence
* @param n Name of the sequence
* @param s Sequence, stored as ACTG or N characters
*/
UnalignedSequence(const std::string& n, const std::string& s) : Name(n), Seq(s), Qual(std::string()), Strand('*') {}
/** Construct an unaliged sequence with name, sequence and quality score
/** Construct an unaligned sequence with name, sequence and quality score
* @param n Name of the sequence
* @param s Sequence, stored as ACTG or N characters
* @param q Quality string
*/
UnalignedSequence(const std::string& n, const std::string& s, const std::string& q) : Name(n), Seq(s), Qual(q), Strand('*') {}
/** Construct an unaliged sequence with name, sequence and quality score
/** Construct an unaligned sequence with name, sequence, quality score and strand
* @param n Name of the sequence
* @param s Sequence, stored as ACTG or N characters
* @param q Quality string
......
##INCLUDES=-I/xchip/gistic/Jeremiah/software/seqan-library-2.0.2/include -I /xchip/gistic/Jeremiah/GIT/SeqLib/src -I/xchip/gistic/Jeremiah/GIT/SeqLib/htslib -I/xchip/gistic/Jeremiah/software/boost_1.61.0_gcc5.1 -I/xchip/gistic/Jeremiah/software/bamtools-2.4.0/include
INCLUDES=-I/xchip/gistic/Jeremiah/software/seqan-library-2.2.0/include -I /xchip/gistic/Jeremiah/GIT/SeqLib/src -I/xchip/gistic/Jeremiah/GIT/SeqLib/htslib -I/xchip/gistic/Jeremiah/software/boost_1.61.0_gcc5.1 -I/xchip/gistic/Jeremiah/software/bamtools-2.4.0/include -I/xchip/gistic/Jeremiah/software/bzip2-1.0.6
INCLUDES=-I/xchip/gistic/Jeremiah/software/seqan-library-2.2.0/include -I /xchip/gistic/Jeremiah/GIT/SeqLib -I/xchip/gistic/Jeremiah/GIT/SeqLib/htslib -I/xchip/gistic/Jeremiah/software/boost_1.61.0_gcc5.1 -I/xchip/gistic/Jeremiah/software/bamtools-2.4.0/include -I/xchip/gistic/Jeremiah/software/bzip2-1.0.6
LIBS=/xchip/gistic/Jeremiah/software/bamtools-2.4.0/lib/libbamtools.a /xchip/gistic/Jeremiah/GIT/SeqLib/src/libseqlib.a /xchip/gistic/Jeremiah/GIT/SeqLib/htslib/libhts.a /xchip/gistic/Jeremiah/software/boost_1.61.0_gcc5.1/stage/lib/libboost_timer.a /xchip/gistic/Jeremiah/software/boost_1.61.0_gcc5.1/stage/lib/libboost_chrono.a /xchip/gistic/Jeremiah/software/boost_1.61.0_gcc5.1/stage/lib/libboost_system.a /xchip/gistic/Jeremiah/software/bzip2-1.0.6/libbz2.a
CFLAGS=-W -Wall -pedantic -std=c++14 -DSEQAN_HAS_ZLIB=1 -DSEQAN_HAS_BZIP2=1
CXXFLAGS=-W -Wall -pedantic -std=c++14 -DSEQAN_HAS_ZLIB=1 -DSEQAN_HAS_BZIP2=1 -O3 -DNDEBUG -DSEQAN_ENABLE_DEBUG=0 -DSEQAN_ENABLE_TESTING=0
binaries=benchmark
all: benchmark.o
g++ benchmark.o -g -o benchmark $(LIBS) -lrt -lpthread -lz -lm
g++ benchmark.o -o benchmark $(LIBS) -lrt -lpthread -lz -lm
benchmark.o: benchmark.cpp
g++ -g -c benchmark.cpp $(INCLUDES) $(CFLAGS)
g++ -c benchmark.cpp $(INCLUDES) $(CXXFLAGS)
.PHONY: clean
......
#define USE_BOOST
#define JUMPING_TEST 1
//#define READ_TEST 1
//#define JUMPING_TEST 1
#define READ_TEST 1
#include "SeqLib/SeqLibUtils.h"
......@@ -14,6 +14,7 @@
//#define RUN_SEQAN 1
//#define RUN_BAMTOOLS 1
#define RUN_SEQLIB 1
//#define RUN_HTSLIB 1
#ifdef RUN_SEQAN
#include <seqan/bam_io.h>
......@@ -26,7 +27,18 @@ using namespace seqan;
#include "SeqLib/BamWriter.h"
#endif
#define BAMTOOLS_GET_CORE 1
#ifdef RUN_HTSLIB
#include <iostream>
extern "C" {
#include "htslib/htslib/hts.h"
#include "htslib/htslib/sam.h"
#include "htslib/htslib/bgzf.h"
#include "htslib/htslib/kstring.h"
#include "htslib/htslib/faidx.h"
}
#endif
//#define BAMTOOLS_GET_CORE 1
#ifdef RUN_BAMTOOLS
#include "api/BamReader.h"
......@@ -37,7 +49,9 @@ int main()
const size_t limit = 5000000;
const size_t print_limit = 1000000;
#ifdef JUMPING_TEST
const size_t jump_limit = 1000;
#endif
size_t count = 0;
//std::string bam = "/xchip/gistic/Jeremiah/GIT/SeqLib/seq_test/test_data/small.bam";
......@@ -99,13 +113,18 @@ int main()
SeqLib::BamRecord rec;
SeqLib::BamRecordVector bav;
std::string dummy;
std::stringstream ss;
#ifdef READ_TEST
std::vector<std::string> sq;
while(r.GetNextRecord(rec) && count++ < limit) {
if (count % print_limit == 0)
std::cerr << "...at read " << SeqLib::AddCommas(count) << std::endl;
bav.push_back(rec);
//dummy = rec.Sequence();
//sq.push_back(rec.Sequence());
//sq.push_back(rec.Qname());
//sq.push_back(rec.CigarString());
}
#endif
......@@ -162,6 +181,7 @@ int main()
}
bool hasAlignments = false;
long l = 0;
for (int i = 0; i < jump_limit; ++i) {
int chr = rand() % 22;
int pos = rand() % 1000000 + 1000000;
......@@ -171,7 +191,8 @@ int main()
}
if (hasAlignments) {
readRecord(record, bamFileIn);
bav.push_back(record);
l += getAlignmentLengthInRef(record);
//bav.push_back(record);
} else {
std::cerr << "no alignments here " << std::endl;
}
......@@ -200,6 +221,47 @@ int main()
}
#endif
#endif
#ifdef RUN_HTSLIB
std::cerr << " **** RUNNING HTSLIB **** " << std::endl;
bam1_t* b = bam_init1();
htsFile* in = hts_open(bam.c_str(), "r");
bam_hdr_t* header;
if (in == NULL)
return -1;
if (b == NULL)
return -1;
header = sam_hdr_read(in);
int i = 0;
std::vector<bam1_t*> bav;
while (sam_read1(in, header, b) >= 0 && count++ < limit) {
if (count % print_limit == 0)
std::cerr << "...at read " << SeqLib::AddCommas(count) << std::endl;
bav.push_back(b);
/*
int i;
const bam1_core_t* c = &b->core;
uint8_t *s = bam1_seq(b), *t = bam1_qual(b);
fwrite(bam1_qname(b), c->l_qname - 1, sizeof(char), stdout);
fputc('\t', stdout);
for (i = 0; i < c->l_qseq; ++i)
fputc(bam_nt16_rev_table[bam1_seqi(s, i)], stdout);
fputc('\t', stdout);
if (t[0] == 0xff) {
fputs("*", stdout);
}
else {
for (i = 0; i < c->l_qseq; ++i)
fputc(t[i] + 33, stdout);
}
fputc('\n', stdout);
*/
}
printf("%d", i);
bam_hdr_destroy(header);
hts_close(in);
#endif
std::cerr << " Copied " << bav.size() << " records " << std::endl;
......
......@@ -703,7 +703,6 @@ infodir
docdir
oldincludedir
includedir
runstatedir
localstatedir
sharedstatedir
sysconfdir
......@@ -781,7 +780,6 @@ datadir='${datarootdir}'
sysconfdir='${prefix}/etc'
sharedstatedir='${prefix}/com'
localstatedir='${prefix}/var'
runstatedir='${localstatedir}/run'
includedir='${prefix}/include'
oldincludedir='/usr/include'
docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
......@@ -1034,15 +1032,6 @@ do
| -silent | --silent | --silen | --sile | --sil)
silent=yes ;;
-runstatedir | --runstatedir | --runstatedi | --runstated \
| --runstate | --runstat | --runsta | --runst | --runs \
| --run | --ru | --r)
ac_prev=runstatedir ;;
-runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
| --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
| --run=* | --ru=* | --r=*)
runstatedir=$ac_optarg ;;
-sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
ac_prev=sbindir ;;
-sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
......@@ -1180,7 +1169,7 @@ fi
for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \
datadir sysconfdir sharedstatedir localstatedir includedir \
oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
libdir localedir mandir runstatedir
libdir localedir mandir
do
eval ac_val=\$$ac_var
# Remove trailing slashes.
......@@ -1333,7 +1322,6 @@ Fine tuning of the installation directories:
--sysconfdir=DIR read-only single-machine data [PREFIX/etc]
--sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
--localstatedir=DIR modifiable single-machine data [PREFIX/var]
--runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run]
--libdir=DIR object code libraries [EPREFIX/lib]
--includedir=DIR C header files [PREFIX/include]
--oldincludedir=DIR C header files for non-gcc [/usr/include]
......
libseqlib (1.1.1+dfsg-6) UNRELEASED; urgency=medium
libseqlib (1.1.2+dfsg-1) UNRELEASED; urgency=medium
* Fix watch file
* Add symbols file
* New upstream version
* Point Vcs fields to salsa.debian.org
* Standards-Version: 4.1.4
* Bump ABI version due to upstream changes (see
https://github.com/walaj/SeqLib/issues/24 )
-- Andreas Tille <tille@debian.org> Wed, 23 May 2018 10:09:08 +0200
-- Andreas Tille <tille@debian.org> Wed, 23 May 2018 10:21:45 +0200
libseqlib (1.1.1+dfsg-5) unstable; urgency=medium
......
......@@ -13,12 +13,12 @@ Build-Depends: debhelper (>= 11~),
libbwa-dev (>= 0.7.15-3),
libjsoncpp-dev,
libssw-dev
Standards-Version: 4.1.3
Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/libseqlib.git
Vcs-Git: https://anonscm.debian.org/git/debian-med/libseqlib.git
Standards-Version: 4.1.4
Vcs-Browser: https://salsa.debian.org/med-team/libseqlib
Vcs-Git: https://salsa.debian.org/med-team/libseqlib.git
Homepage: https://github.com/walaj/SeqLib
Package: libseqlib0
Package: libseqlib1
Architecture: any
Section: libs
Depends: ${shlibs:Depends},
......@@ -39,7 +39,7 @@ Description: C++ htslib/bwa-mem/fermi interface for interrogating sequence data
Package: libseqlib-dev
Architecture: any
Section: libdevel
Depends: libseqlib0 (= ${binary:Version}),
Depends: libseqlib1 (= ${binary:Version}),
${shlibs:Depends},
${misc:Depends},
libhts-dev (>= 1.3.2-3),
......
......@@ -57,7 +57,7 @@ libseqlib.so.0 libseqlib0 #MINVER#
_ZN6SeqLib14FermiAssemblerC2Ev@Base 1.1.1+dfsg
_ZN6SeqLib14FermiAssemblerD1Ev@Base 1.1.1+dfsg
_ZN6SeqLib14FermiAssemblerD2Ev@Base 1.1.1+dfsg
_ZN6SeqLib15cigarFromStringERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@Base 1.1.1+dfsg
#MISSING: 1.1.2+dfsg# _ZN6SeqLib15cigarFromStringERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@Base 1.1.1+dfsg
_ZN6SeqLib17UnalignedSequenceD1Ev@Base 1.1.1+dfsg
_ZN6SeqLib17UnalignedSequenceD2Ev@Base 1.1.1+dfsg
_ZN6SeqLib23GenomicRegionCollectionINS_13GenomicRegionEE12allocate_grcEv@Base 1.1.1+dfsg
......@@ -74,20 +74,22 @@ libseqlib.so.0 libseqlib0 #MINVER#
_ZN6SeqLib23GenomicRegionCollectionINS_13GenomicRegionEED1Ev@Base 1.1.1+dfsg
_ZN6SeqLib23GenomicRegionCollectionINS_13GenomicRegionEED2Ev@Base 1.1.1+dfsg
_ZN6SeqLib3BFC11AddSequenceEPKcS2_S2_@Base 1.1.1+dfsg
_ZN6SeqLib3BFC11AddSequenceERKNS_9BamRecordE@Base 1.1.1+dfsg
_ZN6SeqLib3BFC12ErrorCorrectERKSt6vectorINS_9BamRecordESaIS2_EE@Base 1.1.1+dfsg
#MISSING: 1.1.2+dfsg# _ZN6SeqLib3BFC11AddSequenceERKNS_9BamRecordE@Base 1.1.1+dfsg
_ZN6SeqLib3BFC11GetSequenceERNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES7_@Base 1.1.2+dfsg
#MISSING: 1.1.2+dfsg# _ZN6SeqLib3BFC12ErrorCorrectERKSt6vectorINS_9BamRecordESaIS2_EE@Base 1.1.1+dfsg
_ZN6SeqLib3BFC12ErrorCorrectEv@Base 1.1.1+dfsg
_ZN6SeqLib3BFC12FilterUniqueEv@Base 1.1.1+dfsg
#MISSING: 1.1.2+dfsg# _ZN6SeqLib3BFC12FilterUniqueEv@Base 1.1.1+dfsg
_ZN6SeqLib3BFC13correct_readsEv@Base 1.1.1+dfsg
_ZN6SeqLib3BFC13learn_correctEv@Base 1.1.1+dfsg
_ZN6SeqLib3BFC14AllocateMemoryEm@Base 1.1.1+dfsg
_ZN6SeqLib3BFC15TrainAndCorrectERKSt6vectorINS_9BamRecordESaIS2_EE@Base 1.1.1+dfsg
_ZN6SeqLib3BFC15TrainCorrectionERKSt6vectorINS_9BamRecordESaIS2_EE@Base 1.1.1+dfsg
_ZN6SeqLib3BFC15TrainCorrectionERKSt6vectorIPcSaIS2_EE@Base 1.1.1+dfsg
_ZN6SeqLib3BFC17ErrorCorrectToTagERSt6vectorINS_9BamRecordESaIS2_EERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@Base 1.1.1+dfsg
_ZN6SeqLib3BFC19ErrorCorrectInPlaceERSt6vectorINS_9BamRecordESaIS2_EE@Base 1.1.1+dfsg
_ZN6SeqLib3BFC28allocate_sequences_from_charERKSt6vectorIPcSaIS2_EE@Base 1.1.1+dfsg
_ZN6SeqLib3BFC29allocate_sequences_from_readsERKSt6vectorINS_9BamRecordESaIS2_EE@Base 1.1.1+dfsg
#MISSING: 1.1.2+dfsg# _ZN6SeqLib3BFC14AllocateMemoryEm@Base 1.1.1+dfsg
_ZN6SeqLib3BFC15CorrectSequenceERNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS6_@Base 1.1.2+dfsg
#MISSING: 1.1.2+dfsg# _ZN6SeqLib3BFC15TrainAndCorrectERKSt6vectorINS_9BamRecordESaIS2_EE@Base 1.1.1+dfsg
#MISSING: 1.1.2+dfsg# _ZN6SeqLib3BFC15TrainCorrectionERKSt6vectorINS_9BamRecordESaIS2_EE@Base 1.1.1+dfsg
#MISSING: 1.1.2+dfsg# _ZN6SeqLib3BFC15TrainCorrectionERKSt6vectorIPcSaIS2_EE@Base 1.1.1+dfsg
#MISSING: 1.1.2+dfsg# _ZN6SeqLib3BFC17ErrorCorrectToTagERSt6vectorINS_9BamRecordESaIS2_EERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@Base 1.1.1+dfsg
#MISSING: 1.1.2+dfsg# _ZN6SeqLib3BFC19ErrorCorrectInPlaceERSt6vectorINS_9BamRecordESaIS2_EE@Base 1.1.1+dfsg
#MISSING: 1.1.2+dfsg# _ZN6SeqLib3BFC28allocate_sequences_from_charERKSt6vectorIPcSaIS2_EE@Base 1.1.1+dfsg
#MISSING: 1.1.2+dfsg# _ZN6SeqLib3BFC29allocate_sequences_from_readsERKSt6vectorINS_9BamRecordESaIS2_EE@Base 1.1.1+dfsg
_ZN6SeqLib3BFC5TrainEv@Base 1.1.1+dfsg
_ZN6SeqLib3BFC5clearEv@Base 1.1.1+dfsg
_ZN6SeqLib4_Bam20open_BAM_for_readingEv@Base 1.1.1+dfsg
......@@ -95,6 +97,8 @@ libseqlib.so.0 libseqlib0 #MINVER#
_ZN6SeqLib4_Bam9load_readERNS_9BamRecordE@Base 1.1.1+dfsg
_ZN6SeqLib4_BamD1Ev@Base 1.1.1+dfsg
_ZN6SeqLib4_BamD2Ev@Base 1.1.1+dfsg
_ZN6SeqLib5CigarC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@Base 1.1.2+dfsg
_ZN6SeqLib5CigarC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@Base 1.1.2+dfsg
_ZN6SeqLib6Filter10ReadFilter10addRegionsERKNS_23GenomicRegionCollectionINS_13GenomicRegionEEE@Base 1.1.1+dfsg
_ZN6SeqLib6Filter10ReadFilter10setRegionsERKNS_23GenomicRegionCollectionINS_13GenomicRegionEEE@Base 1.1.1+dfsg
_ZN6SeqLib6Filter10ReadFilter7AddRuleERKNS0_12AbstractRuleE@Base 1.1.1+dfsg
......@@ -210,7 +214,7 @@ libseqlib.so.0 libseqlib0 #MINVER#
_ZNK6SeqLib14FermiAssembler10GetContigsB5cxx11Ev@Base 1.1.1+dfsg
_ZNK6SeqLib14FermiAssembler12GetSequencesEv@Base 1.1.1+dfsg
_ZNK6SeqLib23GenomicRegionCollectionINS_13GenomicRegionEE13CountOverlapsERKS1_@Base 1.1.1+dfsg
_ZNK6SeqLib3BFC12GetSequencesERSt6vectorINS_17UnalignedSequenceESaIS2_EE@Base 1.1.1+dfsg
#MISSING: 1.1.2+dfsg# _ZNK6SeqLib3BFC12GetSequencesERSt6vectorINS_17UnalignedSequenceESaIS2_EE@Base 1.1.1+dfsg
_ZNK6SeqLib5CigareqERKS0_@Base 1.1.1+dfsg
_ZNK6SeqLib6Filter10ReadFilter23isReadOverlappingRegionERKNS_9BamRecordE@Base 1.1.1+dfsg
_ZNK6SeqLib6Filter11AhoCorasick9QueryTextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@Base 1.1.1+dfsg
......@@ -229,10 +233,12 @@ libseqlib.so.0 libseqlib0 #MINVER#
_ZNK6SeqLib9BamReader12PrintRegionsB5cxx11Ev@Base 1.1.1+dfsg
_ZNK6SeqLib9BamReader6HeaderEv@Base 1.1.1+dfsg
_ZNK6SeqLib9BamRecord11CountNBasesEv@Base 1.1.1+dfsg
_ZNK6SeqLib9BamRecord11PositionEndEv@Base 1.1.2+dfsg
_ZNK6SeqLib9BamRecord14GetSmartIntTagERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@Base 1.1.1+dfsg
_ZNK6SeqLib9BamRecord14ParseReadGroupB5cxx11Ev@Base 1.1.1+dfsg
_ZNK6SeqLib9BamRecord15AsGenomicRegionEv@Base 1.1.1+dfsg
_ZNK6SeqLib9BamRecord15PairOrientationEv@Base 1.1.1+dfsg
_ZNK6SeqLib9BamRecord15PositionEndMateEv@Base 1.1.2+dfsg
_ZNK6SeqLib9BamRecord15QualitySequenceB5cxx11Ev@Base 1.1.1+dfsg
_ZNK6SeqLib9BamRecord17GetSmartDoubleTagERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@Base 1.1.1+dfsg
_ZNK6SeqLib9BamRecord17GetSmartStringTagERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@Base 1.1.1+dfsg
......@@ -241,7 +247,9 @@ libseqlib.so.0 libseqlib0 #MINVER#
_ZNK6SeqLib9BamRecord22QualityTrimmedSequenceEiRiS1_@Base 1.1.1+dfsg
_ZNK6SeqLib9BamRecord26CountBWAChimericAlignmentsEv@Base 1.1.1+dfsg
_ZNK6SeqLib9BamRecord27CountBWASecondaryAlignmentsEv@Base 1.1.1+dfsg
_ZNK6SeqLib9BamRecord7GetZTagERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@Base 1.1.1+dfsg
_ZNK6SeqLib9BamRecord6GetTagERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS6_@Base 1.1.2+dfsg
#MISSING: 1.1.2+dfsg# _ZNK6SeqLib9BamRecord7GetZTagERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@Base 1.1.1+dfsg
_ZNK6SeqLib9BamRecord7GetZTagERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS6_@Base 1.1.2+dfsg
_ZNK6SeqLib9BamRecord8SequenceB5cxx11Ev@Base 1.1.1+dfsg
_ZNK6SeqLib9BamRecord9MeanPhredEv@Base 1.1.1+dfsg
_ZNK6SeqLib9BamWriter10BuildIndexEv@Base 1.1.1+dfsg
......@@ -347,7 +355,7 @@ libseqlib.so.0 libseqlib0 #MINVER#
_ZNSt6vectorIN6SeqLib14HeaderSequenceESaIS1_EE17_M_realloc_insertIJS1_EEEvN9__gnu_cxx17__normal_iteratorIPS1_S3_EEDpOT_@Base 1.1.1+dfsg
_ZNSt6vectorIN6SeqLib15PlottedReadLineESaIS1_EE17_M_realloc_insertIJRKS1_EEEvN9__gnu_cxx17__normal_iteratorIPS1_S3_EEDpOT_@Base 1.1.1+dfsg
_ZNSt6vectorIN6SeqLib17UnalignedSequenceESaIS1_EE17_M_realloc_insertIJRKS1_EEEvN9__gnu_cxx17__normal_iteratorIPS1_S3_EEDpOT_@Base 1.1.1+dfsg
_ZNSt6vectorIN6SeqLib17UnalignedSequenceESaIS1_EE17_M_realloc_insertIJS1_EEEvN9__gnu_cxx17__normal_iteratorIPS1_S3_EEDpOT_@Base 1.1.1+dfsg
#MISSING: 1.1.2+dfsg# _ZNSt6vectorIN6SeqLib17UnalignedSequenceESaIS1_EE17_M_realloc_insertIJS1_EEEvN9__gnu_cxx17__normal_iteratorIPS1_S3_EEDpOT_@Base 1.1.1+dfsg
_ZNSt6vectorIN6SeqLib6Filter10ReadFilterESaIS2_EE17_M_realloc_insertIJRKS2_EEEvN9__gnu_cxx17__normal_iteratorIPS2_S4_EEDpOT_@Base 1.1.1+dfsg
_ZNSt6vectorIN6SeqLib6Filter12AbstractRuleESaIS2_EE17_M_realloc_insertIJRKS2_EEEvN9__gnu_cxx17__normal_iteratorIPS2_S4_EEDpOT_@Base 1.1.1+dfsg
_ZNSt6vectorIN6SeqLib9BamRecordESaIS1_EE17_M_realloc_insertIJRKS1_EEEvN9__gnu_cxx17__normal_iteratorIPS1_S3_EEDpOT_@Base 1.1.1+dfsg
......
......@@ -6,7 +6,7 @@ Description: In Debian's fermi-lite 0.1-3 a name space pollution with
--- a/SeqLib/BFC.h
+++ b/SeqLib/BFC.h
@@ -129,7 +129,7 @@ namespace SeqLib {
@@ -117,7 +117,7 @@ namespace SeqLib {
float kcov;
// reads to correct in place
......@@ -36,16 +36,7 @@ Description: In Debian's fermi-lite 0.1-3 a name space pollution with
#include <stdexcept>
#include <algorithm>
@@ -43,7 +44,7 @@ namespace SeqLib {
return false;
m_seqs_size = n;
- m_seqs = (fseq1_t*)realloc(m_seqs, n * sizeof(fseq1_t));
+ m_seqs = (fml_seq1_t*)realloc(m_seqs, n * sizeof(fml_seq1_t));
if (!m_seqs)
return false;
@@ -78,12 +79,12 @@ namespace SeqLib {
@@ -42,12 +43,12 @@ namespace SeqLib {
// do the intial allocation
if (n_seqs == 0 && !m_seqs) {
m_seqs_size = 32;
......@@ -60,7 +51,7 @@ Description: In Debian's fermi-lite 0.1-3 a name space pollution with
}
if (!m_seqs)
@@ -96,7 +97,7 @@ namespace SeqLib {
@@ -60,7 +61,7 @@ namespace SeqLib {
if (!strlen(seq))
return false;
......@@ -69,38 +60,22 @@ Description: In Debian's fermi-lite 0.1-3 a name space pollution with
s = &m_seqs[n_seqs];
@@ -245,12 +246,12 @@ namespace SeqLib {
void BFC::allocate_sequences_from_char(const std::vector<char*>& v) {
@@ -108,13 +109,13 @@ namespace SeqLib {
assert(n_seqs == 0);
assert(m_names.size() == 0);
m_seqs_size = v.size();
- m_seqs = (fseq1_t*)malloc(v.size() * sizeof(fseq1_t));
+ m_seqs = (fml_seq1_t*)malloc(v.size() * sizeof(fml_seq1_t));
- m_seqs = (fseq1_t*)malloc(1 * sizeof(fseq1_t));
+ m_seqs = (fml_seq1_t*)malloc(1 * sizeof(fml_seq1_t));
n_seqs = 1;
uint64_t size = 0;
for (std::vector<char*>::const_iterator r = v.begin(); r != v.end(); ++r) {
//uint64_t size = 0;
//for (std::vector<char*>::const_iterator r = v.begin(); r != v.end(); ++r) {
// for (auto& r : v) {
- fseq1_t *s;
+ fml_seq1_t *s;
s = &m_seqs[n_seqs];
@@ -268,14 +269,14 @@ namespace SeqLib {
// alloc the memory
m_seqs_size = brv.size();
- m_seqs = (fseq1_t*)malloc(brv.size() * sizeof(fseq1_t));
+ m_seqs = (fml_seq1_t*)malloc(brv.size() * sizeof(fml_seq1_t));
uint64_t size = 0;
for (BamRecordVector::const_iterator r = brv.begin(); r != brv.end(); ++r) {
// for (auto& r : brv) {
m_names.push_back(strdup(r->Qname().c_str()));
- fseq1_t *s;
+ fml_seq1_t *s;
s = &m_seqs[n_seqs];
s = &m_seqs[0];
s->seq = strdup(str.c_str());
s->qual = q.empty() || q.length() != str.length() ? NULL : strdup(q.c_str());
--- a/src/FermiAssembler.cpp
+++ b/src/FermiAssembler.cpp
@@ -41,13 +41,13 @@ namespace SeqLib {
......@@ -133,14 +108,13 @@ Description: In Debian's fermi-lite 0.1-3 a name space pollution with
s = &m_seqs[n_seqs];
@@ -81,13 +81,13 @@ namespace SeqLib {
@@ -81,12 +81,12 @@ namespace SeqLib {
void FermiAssembler::AddReads(const BamRecordVector& brv) {
// alloc the memory
- m_seqs = (fseq1_t*)realloc(m_seqs, (n_seqs + brv.size()) * sizeof(fseq1_t));
+ m_seqs = (fml_seq1_t*)realloc(m_seqs, (n_seqs + brv.size()) * sizeof(fml_seq1_t));
int m = 0;
uint64_t size = 0;
for (BamRecordVector::const_iterator r = brv.begin(); r != brv.end(); ++r) {
m_names.push_back(r->Qname());
......@@ -149,7 +123,7 @@ Description: In Debian's fermi-lite 0.1-3 a name space pollution with
s = &m_seqs[n_seqs];
@@ -111,7 +111,7 @@ namespace SeqLib {
@@ -110,7 +110,7 @@ namespace SeqLib {
return; //already cleared
for (size_t i = 0; i < n_seqs; ++i) {
......@@ -158,7 +132,7 @@ Description: In Debian's fermi-lite 0.1-3 a name space pollution with
if (s->qual)
free(s->qual);
s->qual = NULL;
@@ -164,7 +164,7 @@ namespace SeqLib {
@@ -163,7 +163,7 @@ namespace SeqLib {
UnalignedSequenceVector r;
for (size_t i = 0; i < n_seqs; ++i) {
......
......@@ -22,7 +22,7 @@ Description: Create dynamic library
AM_MAINTAINER_MODE([disable])
##m4_include([m4/m4_ax_openmp.m4])
+LIB_VERSION=0:0
+LIB_VERSION=1:0
+
+AC_SUBST([VERSION])
+AC_SUBST([LIB_VERSION])
......
......@@ -20,7 +20,7 @@ Description: Make sure header files will be found in /usr/include/fml
- ../htslib/libhts.a \
-lboost_unit_test_framework -lboost_system -lboost_timer -lboost_chrono
seq_test_LDFLAGS = --coverage
seq_test_LDFLAGS = --coverage -pthread
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,6 +1,6 @@
......
......@@ -12,7 +12,7 @@ seq_test_LDADD = \
../htslib/libhts.a \
-lboost_unit_test_framework -lboost_system -lboost_timer -lboost_chrono
seq_test_LDFLAGS = --coverage
seq_test_LDFLAGS = --coverage -pthread
seq_test_SOURCES = seq_test.cpp \
../src/BFC.cpp ../src/GenomicRegion.cpp \
......
This diff is collapsed.