Skip to content
Commits on Source (12)
# Ion Torrent Variant Caller
Ion Torrent Variant Caller (TVC) is a genetic variant caller for Ion Torrent sequencing platforms,
and is specially optimized to exploit the underlying flow signal information in the statistical model
to evaluate variants. Torrent Variant Caller is designed to call single-nucleotide polymorphisms (SNPs),
multi-nucleotide polymorphisms (MNPs), insertions, deletions, and block substitutions.
### Quick Start
```
git clone https://github.com/domibel/IonTorrent-VariantCaller.git
BASE=`pwd`
TVC_SOURCE_DIR=$BASE/IonTorrent-VariantCaller
*** adjust build and install directory ***
TVC_BUILD_DIR=$BASE/IonTorrent-VariantCaller-build
TVC_INSTALL_DIR=$BASE/IonTorrent-VariantCaller-install/usr/local
mkdir -p $TVC_BUILD_DIR
mkdir -p $TVC_INSTALL_DIR
cd $TVC_BUILD_DIR
wget --no-clobber updates.iontorrent.com/updates/software/external/armadillo-4.600.1.tar.gz
tar xvzf armadillo-4.600.1.tar.gz
cd armadillo-4.600.1/
sed -i 's:^// #define ARMA_USE_LAPACK$:#define ARMA_USE_LAPACK:g' include/armadillo_bits/config.hpp
sed -i 's:^// #define ARMA_USE_BLAS$:#define ARMA_USE_BLAS:g' include/armadillo_bits/config.hpp
cmake .
make -j4
cd $TVC_BUILD_DIR
wget --no-clobber updates.iontorrent.com/updates/software/external/bamtools-2.4.0.20150702+git15eadb925f.tar.gz
tar xvzf bamtools-2.4.0.20150702+git15eadb925f.tar.gz
mkdir bamtools-2.4.0.20150702+git15eadb925f-build
cd bamtools-2.4.0.20150702+git15eadb925f-build
cmake ../bamtools-2.4.0.20150702+git15eadb925f -DCMAKE_BUILD_TYPE:STRING=RelWithDebInfo
make -j4
cd $TVC_BUILD_DIR
wget --no-clobber --no-check-certificate https://github.com/samtools/htslib/archive/1.2.1.tar.gz -O htslib-1.2.1.tar.gz
tar xvzf htslib-1.2.1.tar.gz
ln -s htslib-1.2.1 htslib # for samtools
cd htslib-1.2.1
make -j4
cd $TVC_BUILD_DIR
wget --no-clobber --no-check-certificate https://github.com/samtools/samtools/archive/1.2.tar.gz -O samtools-1.2.tar.gz
tar xvzf samtools-1.2.tar.gz
cd samtools-1.2
make -j4
mkdir $TVC_INSTALL_DIR/bin
cp samtools $TVC_INSTALL_DIR/bin/
cd $TVC_BUILD_DIR
mkdir TVC-build
cd TVC-build
cmake $TVC_SOURCE_DIR -DCMAKE_INSTALL_PREFIX:PATH=$TVC_INSTALL_DIR -DCMAKE_BUILD_TYPE:STRING=RelWithDebInfo
make -j4 install
*** quick test ***
TVC_ROOT_DIR=$TVC_INSTALL_DIR
$TVC_ROOT_DIR/bin/variant_caller_pipeline.py \
--input-bam $TVC_ROOT_DIR/share/TVC/examples/example1/test.bam \
--reference-fasta $TVC_ROOT_DIR/share/TVC/examples/example1/reference.fasta \
--region-bed $TVC_ROOT_DIR/share/TVC/examples/example1/test_merged_plain.bed \
--primer-trim-bed $TVC_ROOT_DIR/share/TVC/examples/example1/test_unmerged_detail.bed
*** export PATH, following tools are required: samtools zip tvc ***
export PATH=$PATH:$TVC_ROOT_DIR/bin
```
### Get reference genome hg19 from Ion Torrent, Size: 868170684 (828M)
##### Download zip archive
```
$ wget http://ionupdates.com/reference/hg19.zip
```
##### Unpack zip archive
```
$ unzip hg19.zip
```
##### Create hg19.fasta.fai
```
$ samtools faidx hg19.fasta
```
##### Content
```
$ md5sum hg19.zip hg19.fasta hg19.fasta.fai
21afdf14b6b3734e0d25157e60bdfb24 hg19.zip
d6851f9f4537ff4e9beb5b7a08b89230 hg19.fasta
cdcd61262b6f4af8e8ac6c2be88a161a hg19.fasta.fai
$ ls -l
-rw-r--r-- 1 ionadmin ionadmin 3147289017 Mar 22 2011 hg19.fasta
-rw-rw-r-- 1 ionadmin ionadmin 788 Nov 18 16:18 hg19.fasta.fai
-rw-rw-r-- 1 ionadmin ionadmin 868170684 Jul 8 2014 hg19.zip
```
### Examples
#### 316 - H. sapiens - AmpliSeq BRCA1/BRCA2 Community Panel
##### https://ioncommunity.thermofisher.com/docs/DOC-7515
##### CN:TorrentServer/BRCA PL:IONTORRENT PU:PGM/316D/IonXpress_009
```
wget http://ion-torrent.s3.amazonaws.com/pgm/BRCArun94/BRCArun94IonXpress_009_rawlib.bam # 163535738 156M
wget http://ion-torrent.s3.amazonaws.com/pgm/BRCArun94/BRCArun94IonXpress_009_rawlib.bam.bai # 1675968 1.6M
wget http://ion-torrent.s3.amazonaws.com/pgm/BRCArun94/BRCArun94TSVC_variants_IonXpress_009.vcf # 85850 84K
variant_caller_pipeline.py \
--input-bam BRCArun94IonXpress_009_rawlib.bam \
--reference-fasta hg19.fasta \
--parameters-file=/usr/share/TVC/pluginMedia/parameter_sets/ampliseq_somatic_lowstringency_pgm_parameters.json \
--generate-gvcf=on \
--num-threads=4
```
#### Alternative: Get reference genome hg19 with TMAP index files from Ion Torrent, Size: 4613331358 (4.3G)
```
$ wget http://ionupdates.com/reference_downloads/hg19.zip
$ unzip hg19.zip
```
##### Content
```
$ md5sum *
d2dacce998d72687ca1f2dd28ce2280e CreateSequenceDictionary.log
d264f416c20f7d144d74924ab7c356a0 hg19.dict
d6851f9f4537ff4e9beb5b7a08b89230 hg19.fasta
cdcd61262b6f4af8e8ac6c2be88a161a hg19.fasta.fai
6903f05eef92e5e6f5a6bf890a01d6e6 hg19.fasta.tmap.anno
f9d7c7dedf322022a0393d99a629229b hg19.fasta.tmap.bwt
19c79b65021812df67904f7448367d5f hg19.fasta.tmap.pac
ebd201bec6945900ce9f26b62cf89915 hg19.fasta.tmap.sa
09b3113801b9c6ae69134f8712ef6a8c hg19.info.txt
3bc8b68ce604c77f2fecd14ab20a13bc hg19.md5sum.txt
3b725f35b395cf763bb7ac2110ab8d1a hg19.zip
d41d8cd98f00b204e9800998ecf8427e samtools.log
b9ba9df7fc3cd18883e7922bc6dd20d6 tmap.log
```
```
$ ls -l
total 13218824
-rwxr-xr-x 1 ionadmin ionadmin 624 May 6 2013 CreateSequenceDictionary.log
-rwxr-xr-x 1 ionadmin ionadmin 3174 May 6 2013 hg19.dict
-rwxr-xr-x 1 ionadmin ionadmin 3147289017 May 6 2013 hg19.fasta
-rwxr-xr-x 1 ionadmin ionadmin 788 May 6 2013 hg19.fasta.fai
-rwxr-xr-x 1 ionadmin ionadmin 3985 May 6 2013 hg19.fasta.tmap.anno
-rwxr-xr-x 1 ionadmin ionadmin 3453608024 May 6 2013 hg19.fasta.tmap.bwt
-rwxr-xr-x 1 ionadmin ionadmin 773923497 May 6 2013 hg19.fasta.tmap.pac
-rwxr-xr-x 1 ionadmin ionadmin 1547847008 May 6 2013 hg19.fasta.tmap.sa
-rwxr-xr-x 1 ionadmin ionadmin 92 May 6 2013 hg19.info.txt
-rwxr-xr-x 1 ionadmin ionadmin 642 May 6 2013 hg19.md5sum.txt
-rw-rw-r-- 1 ionadmin ionadmin 4613331358 Jul 8 2014 hg19.zip
-rwxr-xr-x 1 ionadmin ionadmin 0 May 6 2013 samtools.log
-rwxr-xr-x 1 ionadmin ionadmin 15170 May 6 2013 tmap.log
```
See also https://ioncommunity.thermofisher.com/community/products/software/torrent-variant-caller
Compilation instructions for TVC
# Steps 1 through 11 explain how to compile TVC.
# Step 12 onward explain how to deploy and run the compiled version on a single computer or cluster.
For more information see:
http://ioncommunity.lifetechnologies.com/community/products/torrent-variant-caller
# 1. Download source file
wget updates.iontorrent.com/tvc_standalone/tvc-5.0.3.tar.gz
# 2. Copy source file into build root directory
TVC_VERSION=tvc-5.0.3
BUILD_ROOT_DIR=`mktemp -d`
cp $TVC_VERSION.tar.gz $BUILD_ROOT_DIR
DISTRIBUTION_CODENAME=`lsb_release -is`_`lsb_release -rs`_`uname -m`
TVC_INSTALL_DIR=$BUILD_ROOT_DIR/$TVC_VERSION-$DISTRIBUTION_CODENAME-binary
mkdir -p $TVC_INSTALL_DIR/bin/
# 3. Install dependencies
# 3.1 RedHat/CentOS
yum -y install gcc-c++ cmake zlib-devel bzip2-devel bzip2 \
ncurses-devel python-simplejson atlas-devel blas-devel lapack-devel redhat-lsb-core boost-devel \
# perl perl-Data-Dumper # with version 7
# 3.2 Debian/Ubuntu
sudo aptitude install g++ cmake zlib1g-dev libbz2-dev libncurses-dev libboost-math-dev \
libatlas-dev liblapack-dev
# 3.3 cmake
# Required is cmake (>=2.8.0), you can check the installed version with e.g.:
# $ cmake -version
# cmake version 2.8.0
# Installation of a newer cmake version is only required if the installed version is older than 2.8.0
# To delete the old cmake package:
# Redhat/CentOS:
yum -y erase cmake
# Debian/Ubuntu:
aptitude purge cmake
cd ~
wget http://www.cmake.org/files/v2.8/cmake-2.8.11.2.tar.gz
tar xvzf cmake-2.8.11.2.tar.gz
cd cmake-2.8.11.2
./configure
make -j5
make install
# 4. build armadillo
cd $BUILD_ROOT_DIR
wget http://updates.iontorrent.com/updates/software/external/armadillo-4.600.1.tar.gz
tar xvzf armadillo-4.600.1.tar.gz
cd armadillo-4.600.1/
sed -i 's:^// #define ARMA_USE_LAPACK$:#define ARMA_USE_LAPACK:g' include/armadillo_bits/config.hpp
sed -i 's:^// #define ARMA_USE_BLAS$:#define ARMA_USE_BLAS:g' include/armadillo_bits/config.hpp
cmake .
make -j4
# 5. build bamtools
cd $BUILD_ROOT_DIR
wget updates.iontorrent.com/updates/software/external/bamtools-2.4.0.20150702+git15eadb925f.tar.gz
tar xvzf bamtools-2.4.0.20150702+git15eadb925f.tar.gz
mkdir bamtools-2.4.0.20150702+git15eadb925f-build
cd bamtools-2.4.0.20150702+git15eadb925f-build
cmake ../bamtools-2.4.0.20150702+git15eadb925f -DCMAKE_BUILD_TYPE:STRING=RelWithDebInfo
make -j4
# 7. build htslib
cd $BUILD_ROOT_DIR
wget --no-check-certificate https://github.com/samtools/htslib/archive/1.2.1.tar.gz -O htslib-1.2.1.tar.gz
tar xvzf htslib-1.2.1.tar.gz
ln -s htslib-1.2.1 htslib # for samtools
cd htslib-1.2.1
make -j4
# 8. build samtools
cd $BUILD_ROOT_DIR
wget --no-check-certificate https://github.com/samtools/samtools/archive/1.2.tar.gz -O samtools-1.2.tar.gz
tar xvzf samtools-1.2.tar.gz
cd samtools-1.2
make -j4
cp samtools $TVC_INSTALL_DIR/bin/
# 10. build TVC
cd $BUILD_ROOT_DIR
tar xvzf $TVC_VERSION.tar.gz
TVC_SOURCE_DIR=$BUILD_ROOT_DIR/$TVC_VERSION
mkdir $TVC_VERSION-build
cd $TVC_VERSION-build
cmake $TVC_SOURCE_DIR -DCMAKE_INSTALL_PREFIX:PATH=$TVC_INSTALL_DIR -DCMAKE_BUILD_TYPE:STRING=RelWithDebInfo
make -j4 install
# 11. create binary package
tar cvzf $TVC_VERSION-$DISTRIBUTION_CODENAME-binary.tar.gz -C $BUILD_ROOT_DIR $TVC_VERSION-$DISTRIBUTION_CODENAME-binary
######################################################################################
# 12.1 Either use the TVC version from the (temporary) TVC_INSTALL_DIR directory
TVC_ROOT_DIR=$TVC_INSTALL_DIR
# 12.2 or use the TVC binary package
tar xvzf $TVC_VERSION-$DISTRIBUTION_CODENAME-binary.tar.gz
TVC_ROOT_DIR=`pwd`/$TVC_VERSION-$DISTRIBUTION_CODENAME-binary
# 13. export PATH, following tools are required: samtools zip tvc
export PATH=$PATH:$TVC_ROOT_DIR/bin
# 14. adjust some file paths and invoke TVC
# Required are 1 reference, 2 bed files, 1 aligned bam file, and 1 tvc parameter file
# Example 1:
$TVC_ROOT_DIR/bin/variant_caller_pipeline.py \
--input-bam $TVC_ROOT_DIR/share/TVC/examples/example1/test.bam \
--reference-fasta $TVC_ROOT_DIR/share/TVC/examples/example1/reference.fasta \
--region-bed $TVC_ROOT_DIR/share/TVC/examples/example1/test_merged_plain.bed \
--primer-trim-bed $TVC_ROOT_DIR/share/TVC/examples/example1/test_unmerged_detail.bed
# Example 1 with specified parameter file and output directory:
$TVC_ROOT_DIR/bin/variant_caller_pipeline.py \
--input-bam $TVC_ROOT_DIR/share/TVC/examples/example1/test.bam \
--reference-fasta $TVC_ROOT_DIR/share/TVC/examples/example1/reference.fasta \
--region-bed $TVC_ROOT_DIR/share/TVC/examples/example1/test_merged_plain.bed \
--primer-trim-bed $TVC_ROOT_DIR/share/TVC/examples/example1/test_unmerged_detail.bed \
--parameters-file $TVC_ROOT_DIR/share/TVC/pluginMedia/parameter_sets/ampliseq_somatic_lowstringency_pgm_parameters.json \
--output-dir /tmp/tvc_example1
# Example 2 (not included yet) :
# Required are 1 reference, 2 bed files, 1 aligned bam file, and 1 tvc parameter file
REF=/mnt/TS/source/hg19/hg19.fasta
BED_UNMERGED_DETAIL=/mnt/TS/source/tvc_test_GB1-118-CSI/unmerged/detail/Exome_draft_Designed_20130531.bed
BED_MERGED_PLAIN=/mnt/TS/source/tvc_test_GB1-118-CSI/merged/plain/Exome_draft_Designed_20130531.bed
BAM=/mnt/TS/source/tvc_test_GB1-118-CSI/IonXpress_045_rawlib.bam
TVC_PARAM=$TVC_ROOT_DIR/share/TVC/pluginMedia/configs/germline_low_stringency_proton.json
TMP_DIR=`mktemp -d`
$TVC_ROOT_DIR/bin/variant_caller_pipeline.py \
--parameters-file $TVC_PARAM \
--input-bam $BAM \
--reference-fasta $REF \
--region-bed $BED_MERGED_PLAIN \
--primer-trim-bed $BED_UNMERGED_DETAIL \
--postprocessed-bam $TMP_DIR/trimmed.bam \
--output-dir $TMP_DIR
# Example 3 (hotspot) (not included yet):
REF=/mnt/TS/source/hg19/hg19.fasta
BED_UNMERGED_DETAIL=/mnt/TS/source/tvc_test_Z06-506-CCP/unmerged_detail_CCP.20131001.designed.bed
BED_MERGED_PLAIN=/mnt/TS/source/tvc_test_Z06-506-CCP/merged_plain_CCP.20131001.designed.bed
BAM=/mnt/TS/source/tvc_test_Z06-506-CCP/IonXpress_019_rawlib.bam
TVC_PARAM=/mnt/TS/source/tvc_test_Z06-506-CCP/local_parameters.json
HOTSPOT=/mnt/TS/source/tvc_test_Z06-506-CCP/hotspot.vcf
TMP_DIR=`mktemp -d`
time $TVC_ROOT_DIR/bin/variant_caller_pipeline.py \
--parameters-file $TVC_PARAM \
--input-bam $BAM \
--reference-fasta $REF \
--region-bed $BED_MERGED_PLAIN \
--primer-trim-bed $BED_UNMERGED_DETAIL \
--hotspot-vcf $HOTSPOT \
--postprocessed-bam $TMP_DIR/trimmed.bam \
--output-dir $TMP_DIR
tvc (5.0.3+dfsg-3) UNRELEASED; urgency=medium
tvc (5.0.3+git20151221.80e144e+dfsg-1) unstable; urgency=medium
[ Steffen Moeller ]
* Added notion of lacking entries in research software
registries in debian/upstream/metadata
-- Steffen Moeller <moeller@debian.org> Thu, 14 Sep 2017 09:39:02 +0200
[ Andreas Tille ]
* Add watch file using Git mode
* Remove code copy of libsmithwaterman, mention all other files that
were previously removed in d/copyright
* Use Debian packaged libsmithwaterman
* hardening=+all
* debhelper 11
* Point Vcs fields to salsa.debian.org
* Standards-Version: 4.2.1
* Ignore script-with-language-extension as per consensus on Debian Med
list
-- Andreas Tille <tille@debian.org> Thu, 27 Sep 2018 10:09:12 +0200
tvc (5.0.3+dfsg-2) experimental; urgency=medium
......
......@@ -4,15 +4,16 @@ Uploaders: Dominique Belhachemi <domibel@debian.org>,
Andreas Tille <tille@debian.org>
Section: science
Priority: optional
Build-Depends: debhelper (>= 10),
Build-Depends: debhelper (>= 11~),
cmake,
libbamtools-dev,
libjsoncpp-dev,
libarmadillo-dev,
libboost-math-dev
Standards-Version: 3.9.8
Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/tvc.git
Vcs-Git: https://anonscm.debian.org/git/debian-med/tvc.git
libboost-math-dev,
libsmithwaterman-dev
Standards-Version: 4.2.1
Vcs-Browser: https://salsa.debian.org/med-team/tvc
Vcs-Git: https://salsa.debian.org/med-team/tvc.git
Homepage: http://ioncommunity.thermofisher.com
Package: tvc
......
Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Upstream-Name: tvc
Source: https://github.com/iontorrent
http://updates.iontorrent.com/tvc_standalone/ (Dominique Belhachemi <domibel@debian.org>, Fri, 13 Jan 2017 10:11:18 -0500)
Source: https://github.com/domibel/IonTorrent-VariantCaller
Files-Excluded: */smithwaterman
*/jsoncpp-src*
*/.gitignore
*/tabixpp/perl
*/tabixpp/python
*/tabixpp/Makefile
*/tabixpp/*.java
*/tabixpp/example*
*/tabixpp/*.py
*/tabixpp/*.tex
*/pluginMedia/configs
*/pluginMedia/hotspots
*/parameter_sets/4_*
*/parameter_sets/builtin_parameter_sets.json
*/parameter_sets/parameter_sets.json
*/parameter_sets/*.py
Files: *
Copyright: 2010-2014 Ion Torrent Systems, Inc.
License: GPL-2+
Files: external/freebayes/*
Copyright: 2010 Erik Garrison, Gabor Marth
License: MIT
Files: external/vcflib/*
Copyright: 2008-2009 Genome Research Ltd (GRL)
2008-2010 Broad Institute / Massachusetts Institute of Technology
2010-2012 Erik Garrison
License: MIT
Files: debian/*
Copyright: 2015 Dominique Belhachemi <domibel@debian.org>
2017-2018 Andreas Tille <tille@debian.org>
License: GPL-2+
License: GPL-2+
This package is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
......@@ -16,39 +48,9 @@ License: GPL-2+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
.
On Debian systems, the complete text of the GNU General
Public License version 2 can be found in "/usr/share/common-licenses/GPL-2".
Files: external/freebayes/*
Copyright: 2010 Erik Garrison, Gabor Marth
License: MIT
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
.
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
Files: external/vcflib/*
Copyright: 2008-2009 Genome Research Ltd (GRL)
2008-2010 Broad Institute / Massachusetts Institute of Technology
2010-2012 Erik Garrison
License: MIT
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
......@@ -67,23 +69,3 @@ License: MIT
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
Files: debian/*
Copyright: 2015 Dominique Belhachemi <domibel@debian.org>
License: GPL-2+
This package is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
.
This package is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
.
On Debian systems, the complete text of the GNU General
Public License version 2 can be found in "/usr/share/common-licenses/GPL-2".
# see https://lists.debian.org/debian-med/2018/06/msg00043.html
tvc: script-with-language-extension usr/bin/variant_caller_pipeline.py
......@@ -3,3 +3,5 @@ bamtools.patch
help.patch
hurd.patch
sse.patch
use_debian_packaged_smithwaterman.patch
# use_debian_packaged_vcflib.patch
Description: Use Debian packaged libsmithwaterman
Author: Andreas Tille <tille@debian.org>
Last-Update: Thu, 27 Sep 2018 08:59:37 +0200
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -85,10 +85,6 @@ set(tvcSRCS
${ION_VCFLIB_DIR}/tabixpp/tabix.cpp
${ION_VCFLIB_DIR}/tabixpp/index.c
${ION_VCFLIB_DIR}/tabixpp/bgzf.c
- ${ION_VCFLIB_DIR}/smithwaterman/LeftAlign.cpp
- ${ION_VCFLIB_DIR}/smithwaterman/Repeats.cpp
- ${ION_VCFLIB_DIR}/smithwaterman/IndelAllele.cpp
- ${ION_VCFLIB_DIR}/smithwaterman/SmithWatermanGotoh.cpp
${ION_FREEBAYES_DIR}/src/AlleleParser.cpp
BaseCaller/PIDloop.cpp
@@ -108,7 +104,7 @@ if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL
endif()
add_executable(tvc ${tvcSRCS})
-target_link_libraries(tvc bamtools z pthread blas lapack armadillo jsoncpp)
+target_link_libraries(tvc bamtools z pthread blas lapack armadillo jsoncpp disorder smithwaterman)
add_executable(tvcassembly
@@ -135,14 +131,10 @@ add_executable(tvcutils
${ION_VCFLIB_DIR}/tabixpp/tabix.cpp
${ION_VCFLIB_DIR}/tabixpp/index.c
${ION_VCFLIB_DIR}/tabixpp/bgzf.c
- ${ION_VCFLIB_DIR}/smithwaterman/LeftAlign.cpp
- ${ION_VCFLIB_DIR}/smithwaterman/Repeats.cpp
- ${ION_VCFLIB_DIR}/smithwaterman/IndelAllele.cpp
- ${ION_VCFLIB_DIR}/smithwaterman/SmithWatermanGotoh.cpp
${PROJECT_BINARY_DIR}/IonVersion.cpp
)
-target_link_libraries(tvcutils bamtools z jsoncpp)
+target_link_libraries(tvcutils bamtools z jsoncpp disorder smithwaterman)
install(TARGETS tvc DESTINATION bin)
install(TARGETS tvcassembly DESTINATION bin)
......@@ -6,7 +6,7 @@ DPKG_EXPORT_BUILDFLAGS = 1
include /usr/share/dpkg/default.mk
# see FEATURE AREAS in dpkg-buildflags(1)
#export DEB_BUILD_MAINT_OPTIONS = hardening=+all
export DEB_BUILD_MAINT_OPTIONS = hardening=+all
# see ENVIRONMENT in dpkg-buildflags(1)
# package maintainers to append CFLAGS
......
version=4
opts="mode=git,pretty=5.0.3+git%cd.%h,repacksuffix=+dfsg,dversionmangle=s/\+dfsg//g,repack,compression=xz" \
https://github.com/domibel/IonTorrent-VariantCaller.git HEAD
# This is not really maintained legacy code but useful anyway
# It is not to be expected that upstream will set any tags
#pragma once
#include <iostream>
#include <algorithm>
#include <memory>
//#include "Alignment.h"
#include "Mosaik.h"
//#include "HashRegion.h"
#include <string.h>
#include <stdio.h>
#include <sstream>
#include <string>
using namespace std;
#define MOSAIK_NUM_NUCLEOTIDES 26
#define GAP '-'
typedef unsigned char DirectionType;
typedef unsigned char PositionType;
struct ElementInfo {
unsigned int Direction : 2;
unsigned int mSizeOfVerticalGaps : 15;
unsigned int mSizeOfHorizontalGaps : 15;
};
class CBandedSmithWaterman {
public:
// constructor
CBandedSmithWaterman(float matchScore, float mismatchScore, float gapOpenPenalty, float gapExtendPenalty, unsigned int bandWidth);
// destructor
~CBandedSmithWaterman(void);
// aligns the query sequence to the anchor using the Smith Waterman Gotoh algorithm
void Align(unsigned int& referenceAl, string& stringAl, const string& s1, const string& s2, pair< pair<unsigned int, unsigned int>, pair<unsigned int, unsigned int> >& hr);
// enables homo-polymer scoring
void EnableHomoPolymerGapPenalty(float hpGapOpenPenalty);
private:
// calculates the score during the forward algorithm
float CalculateScore(const string& s1, const string& s2, const unsigned int rowNum, const unsigned int columnNum, float& currentQueryGapScore, const unsigned int rowOffset, const unsigned int columnOffset);
// creates a simple scoring matrix to align the nucleotides and the ambiguity code N
void CreateScoringMatrix(void);
// corrects the homopolymer gap order for forward alignments
void CorrectHomopolymerGapOrder(const unsigned int numBases, const unsigned int numMismatches);
// returns the maximum floating point number
static inline float MaxFloats(const float& a, const float& b, const float& c);
// reinitializes the matrices
void ReinitializeMatrices(const PositionType& positionType, const unsigned int& s1Length, const unsigned int& s2Length, const pair< pair<unsigned int, unsigned int>, pair<unsigned int, unsigned int> > hr);
// performs the backtrace algorithm
void Traceback(unsigned int& referenceAl, string& stringAl, const string& s1, const string& s2, unsigned int bestRow, unsigned int bestColumn, const unsigned int rowOffset, const unsigned int columnOffset);
// updates the best score during the forward algorithm
inline void UpdateBestScore(unsigned int& bestRow, unsigned int& bestColumn, float& bestScore, const unsigned int rowNum, const unsigned int columnNum, const float score);
// our simple scoring matrix
float mScoringMatrix[MOSAIK_NUM_NUCLEOTIDES][MOSAIK_NUM_NUCLEOTIDES];
// keep track of maximum initialized sizes
unsigned int mCurrentMatrixSize;
unsigned int mCurrentAnchorSize;
unsigned int mCurrentAQSumSize;
unsigned int mBandwidth;
// define our backtrace directions
const static DirectionType Directions_STOP;
const static DirectionType Directions_LEFT;
const static DirectionType Directions_DIAGONAL;
const static DirectionType Directions_UP;
// store the backtrace pointers
ElementInfo* mPointers;
// define our position types
const static PositionType Position_REF_AND_QUERY_ZERO;
const static PositionType Position_REF_ZERO;
const static PositionType Position_QUERY_ZERO;
const static PositionType Position_REF_AND_QUERO_NONZERO;
// define scoring constants
const float mMatchScore;
const float mMismatchScore;
const float mGapOpenPenalty;
const float mGapExtendPenalty;
// score if xi aligns to a gap after yi
float* mAnchorGapScores;
// best score of alignment x1...xi to y1...yi
float* mBestScores;
// our reversed alignment
char* mReversedAnchor;
char* mReversedQuery;
// define static constants
static const float FLOAT_NEGATIVE_INFINITY;
// toggles the use of the homo-polymer gap open penalty
bool mUseHomoPolymerGapOpenPenalty;
float mHomoPolymerGapOpenPenalty;
};
// returns the maximum floating point number
inline float CBandedSmithWaterman::MaxFloats(const float& a, const float& b, const float& c) {
float max = 0.0f;
if(a > max) max = a;
if(b > max) max = b;
if(c > max) max = c;
return max;
}
// updates the best score during the forward algorithm
inline void CBandedSmithWaterman::UpdateBestScore(unsigned int& bestRow, unsigned int& bestColumn, float& bestScore, const unsigned int rowNum, const unsigned int columnNum, const float score) {
//const unsigned int row = rowNum + rowOffset;
//const unsigned int column = columnOffset - rowNum + columnNum;
if(score > bestScore) {
bestRow = rowNum;
bestColumn = columnNum;
bestScore = score;
}
}
#include "IndelAllele.h"
using namespace std;
bool IndelAllele::homopolymer(void) {
string::iterator s = sequence.begin();
char c = *s++;
while (s != sequence.end()) {
if (c != *s++) return false;
}
return true;
}
int IndelAllele::readLength(void) {
if (insertion) {
return length;
} else {
return 0;
}
}
int IndelAllele::referenceLength(void) {
if (insertion) {
return 0;
} else {
return length;
}
}
bool homopolymer(string sequence) {
string::iterator s = sequence.begin();
char c = *s++;
while (s != sequence.end()) {
if (c != *s++) return false;
}
return true;
}
ostream& operator<<(ostream& out, const IndelAllele& indel) {
string t = indel.insertion ? "i" : "d";
out << t << ":" << indel.position << ":" << indel.readPosition << ":" << indel.length << ":" << indel.sequence;
return out;
}
bool operator==(const IndelAllele& a, const IndelAllele& b) {
return (a.insertion == b.insertion
&& a.length == b.length
&& a.position == b.position
&& a.sequence == b.sequence);
}
bool operator!=(const IndelAllele& a, const IndelAllele& b) {
return !(a==b);
}
bool operator<(const IndelAllele& a, const IndelAllele& b) {
ostringstream as, bs;
as << a;
bs << b;
return as.str() < bs.str();
}
#ifndef __INDEL_ALLELE_H
#define __INDEL_ALLELE_H
#include <string>
#include <iostream>
#include <sstream>
using namespace std;
class IndelAllele {
friend ostream& operator<<(ostream&, const IndelAllele&);
friend bool operator==(const IndelAllele&, const IndelAllele&);
friend bool operator!=(const IndelAllele&, const IndelAllele&);
friend bool operator<(const IndelAllele&, const IndelAllele&);
public:
bool insertion;
int length;
int referenceLength(void);
int readLength(void);
int position;
int readPosition;
string sequence;
bool homopolymer(void);
IndelAllele(bool i, int l, int p, int rp, string s)
: insertion(i), length(l), position(p), readPosition(rp), sequence(s)
{ }
};
bool homopolymer(string sequence);
ostream& operator<<(ostream& out, const IndelAllele& indel);
bool operator==(const IndelAllele& a, const IndelAllele& b);
bool operator!=(const IndelAllele& a, const IndelAllele& b);
bool operator<(const IndelAllele& a, const IndelAllele& b);
#endif
This diff is collapsed.
#ifndef __LEFTALIGN_H
#define __LEFTALIGN_H
#include <algorithm>
#include <map>
#include <vector>
#include <list>
#include <utility>
#include <sstream>
#include "IndelAllele.h"
#include "convert.h"
#ifdef VERBOSE_DEBUG
#define LEFTALIGN_DEBUG(msg) \
if (debug) { cerr << msg; }
#else
#define LEFTALIGN_DEBUG(msg)
#endif
using namespace std;
bool leftAlign(string& alternateQuery, string& cigar, string& referenceSequence, int& offset, bool debug = false);
bool stablyLeftAlign(string alternateQuery, string& cigar, string referenceSequence, int& offset, int maxiterations = 20, bool debug = false);
int countMismatches(string& alternateQuery, string& cigar, string& referenceSequence);
string mergeCIGAR(const string& c1, const string& c2);
vector<pair<int, string> > splitCIGAR(const string& cigarStr);
string joinCIGAR(const vector<pair<int, string> >& cigar);
#endif
# =========================================
# MOSAIK Banded Smith-Waterman Makefile
# (c) 2009 Michael Stromberg & Wan-Ping Lee
# =========================================
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES= smithwaterman.cpp BandedSmithWaterman.cpp SmithWatermanGotoh.cpp Repeats.cpp LeftAlign.cpp IndelAllele.cpp
OBJECTS= $(SOURCES:.cpp=.o)
# ----------------
# compiler options
# ----------------
CFLAGS=-Wall -O3
LDFLAGS=-Wl,-s
#CFLAGS=-g
PROGRAM=smithwaterman
LIBS=
all: $(PROGRAM)
.PHONY: all
$(PROGRAM): $(OBJECTS)
@echo " * linking $(PROGRAM)"
@$(CXX) $(LDFLAGS) $(CFLAGS) -o $@ $^ $(LIBS)
.PHONY: clean
clean:
@echo "Cleaning up."
@rm -f *.o $(PROGRAM) *~
#pragma once
#ifndef WIN32
//#include "SafeFunctions.h"
#endif
// ==============
// MOSAIK version
// ==============
#define MOSAIK_VERSION_DATE "2009-02-11"
// adopt a major.minor.build version number [1].[1].[3]
const unsigned char MOSAIK_MAJOR_VERSION = 0;
const unsigned char MOSAIK_MINOR_VERSION = 9;
const unsigned short MOSAIK_BUILD_VERSION = 899;
// ================================
// Platform specific variable sizes
// ================================
// Windows Vista 32-bit
// Fedora Core 7 32-bit
// Fedora Core 6 64-bit
// Itanium2 64-bit
#define SIZEOF_CHAR 1
#define SIZEOF_WCHAR 2
#define SIZEOF_SHORT 2
#define SIZEOF_INT 4
#define SIZEOF_FLOAT 4
#define SIZEOF_DOUBLE 8
#define SIZEOF_UINT64 8
#define MOSAIK_LITTLE_ENDIAN 1
#ifdef WIN32
typedef signed long long int64_t;
typedef unsigned long long uint64_t;
#endif
#define NEGATIVE_ONE_INT 0xffffffff
#define NEGATIVE_TWO_INT 0xfffffffe
#define NEGATIVE_THREE_INT 0xfffffffd
#define NEGATIVE_FOUR_INT 0xfffffffc
#define MAX_SHORT 0xffff
// ==========================
// Platform specific file I/O
// ==========================
#ifdef WIN32
const char OS_DIRECTORY_SEPARATOR = '\\';
#else
const char OS_DIRECTORY_SEPARATOR = '/';
#endif
#define DIRECTORY_NAME_LENGTH 255
// ====================================
// Enable unit test diagnostic messages
// ====================================
#ifdef UNITTEST
#define SILENTMODE if(0)
#else
#define SILENTMODE
#endif
// =================
// Aligner constants
// =================
const double HASH_REGION_EXTENSION_PERCENT = 0.025;
const unsigned char REFERENCE_SEQUENCE_QUALITY = 40;
#include "Repeats.h"
map<string, int> repeatCounts(long int position, const string& sequence, int maxsize) {
map<string, int> counts;
for (int i = 1; i <= maxsize; ++i) {
// subseq here i bases
string seq = sequence.substr(position, i);
// go left.
int j = position - i;
int leftsteps = 0;
while (j >= 0 && seq == sequence.substr(j, i)) {
j -= i;
++leftsteps;
}
// go right.
j = position;
int rightsteps = 0;
while (j + i <= (int)sequence.size() && seq == sequence.substr(j, i)) {
j += i;
++rightsteps;
}
// if we went left and right a non-zero number of times,
if (leftsteps + rightsteps > 1) {
counts[seq] = leftsteps + rightsteps;
}
}
// filter out redundant repeat information
if (counts.size() > 1) {
map<string, int> filteredcounts;
map<string, int>::iterator c = counts.begin();
string prev = c->first;
filteredcounts[prev] = c->second; // shortest sequence
++c;
for (; c != counts.end(); ++c) {
int i = 0;
string seq = c->first;
while (i + prev.length() <= seq.length() && seq.substr(i, prev.length()) == prev) {
i += prev.length();
}
if (i < (int)seq.length()) {
filteredcounts[seq] = c->second;
prev = seq;
}
}
return filteredcounts;
} else {
return counts;
}
}
bool isRepeatUnit(const string& seq, const string& unit) {
if (seq.size() % unit.size() != 0) {
return false;
} else {
int maxrepeats = seq.size() / unit.size();
for (int i = 0; i < maxrepeats; ++i) {
if (seq.substr(i * unit.size(), unit.size()) != unit) {
return false;
}
}
return true;
}
}