Skip to content
Commits on Source (4)
[submodule "htslib"]
path = htslib
url = https://github.com/lomereiter/htslib.git
[submodule "undeaD"]
path = undeaD
url = https://github.com/dlang/undeaD
[submodule "BioD"]
path = BioD
url = https://github.com/biod/BioD.git
[submodule "lz4"]
path = lz4
url = https://github.com/lz4/lz4.git
[submodule "BioD"]
path = BioD
url = https://github.com/biod/BioD.git
[submodule "undeaD"]
path = undeaD
url = https://github.com/biod/undeaD.git
......@@ -8,15 +8,8 @@ matrix:
include:
- os: linux
compiler: gcc
addons:
apt:
packages:
# Our dev environment is a more recent GNU C++
# note that Debian liblz4-dev no longer supports LZ4 frames
# - liblz4-dev
- shunit2
- os: osx
compiler: clang
script:
- make
# - make test - disable tests because shunit2 we use is older
\ No newline at end of file
- make check
......@@ -2,10 +2,27 @@
## Sambamba dependencies
* D compiler
* gcc tool chain
* BioD
* htslib
* undeaD
* D compiler (ldc)
* BioD (git submodule)
* gcc tool chain (for htslib and lz4)
* htslib (git submodule)
* undeaD (git submodule)
* libz
* liblz4
## Install Sambamba from source
After checking out the source from github with git submodules is is
possibleto install the build tools with GNU Guix
guix package -i gcc gdb bash ld-wrapper ldc which python2 git
Even better, with Guix, you can create a light-weight container in the source tree
and run our development setup
guix environment -C guix --ad-hoc gcc gdb bash ld-wrapper ldc which python git
make clean
make -j 4
make check
this way all dependencies are isolated.
# This is a minimalistic make file to build sambamba with ldc2 as per instructions on
# https://github.com/biod/sambamba#compiling-sambamba
#
# Targets (64-bit):
#
# Linux
# OSX
#
# Typical usage:
#
# make LIBRARY_PATH=~/opt/ldc2-1.7.0-linux-x86_64/lib debug|profile|release|static
# make LIBRARY_PATH=~/opt/ldc2-$ver-linux-x86_64/lib debug|profile|release|static
#
# Static release with optimization (for releases):
#
# make LIBRARY_PATH=~/opt/ldc2-1.7.0-linux-x86_64/lib pgo-static
# make LIBRARY_PATH=~/opt/ldc2-$ver-linux-x86_64/lib pgo-static
#
# Debug version
#
# make LIBRARY_PATH=~/opt/ldc2-1.7.0-linux-x86_64/lib debug
# make LIBRARY_PATH=~/opt/ldc2-$ver-linux-x86_64/lib debug
#
D_COMPILER=ldc2
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Darwin)
SYS = OSX
else
SYS = LINUX
LINK_OBJ = utils/ldc_version_info_.o
endif
DFLAGS = -wi -I. -IBioD -IundeaD/src -g
DLIBS = $(LIBRARY_PATH)/libphobos2-ldc.a $(LIBRARY_PATH)/libdruntime-ldc.a
DLIBS_DEBUG = $(LIBRARY_PATH)/libphobos2-ldc-debug.a $(LIBRARY_PATH)/libdruntime-ldc-debug.a
LIBS = htslib/libhts.a lz4/lib/liblz4.a -L-L$(LIBRARY_PATH) -L-lrt -L-lpthread -L-lm
LIBS = htslib/libhts.a lz4/lib/liblz4.a -L-L$(LIBRARY_PATH) -L-lpthread -L-lm
LIBS_STATIC = $(LIBRARY_PATH)/libc.a $(DLIBS) htslib/libhts.a $(LIBRARY_PATH)/liblz4.a
SRC = $(wildcard main.d utils/*.d thirdparty/*.d cram/*.d) $(wildcard undeaD/src/undead/*.d) $(wildcard BioD/bio/*/*.d BioD/bio/*/*/*.d BioD/bio2/*.d BioD/bio2/*/*.d) $(wildcard sambamba/*.d sambamba/*/*.d sambamba/*/*/*.d)
OBJ = $(SRC:.d=.o) utils/ldc_version_info_.o
......@@ -49,7 +64,7 @@ htslib-static:
cd htslib && $(MAKE)
ldc-version-info:
./gen_ldc_version_info.py $(shell which ldmd2) > utils/ldc_version_info_.d
python3 ./gen_ldc_version_info.py $(shell which ldmd2) > utils/ldc_version_info_.d
cat utils/ldc_version_info_.d
utils/ldc_version_info_.o: ldc-version-info
......@@ -78,7 +93,7 @@ singleobj:
# ---- Link step
$(OUT): build-setup singleobj utils/ldc_version_info_.o
$(info linking...)
$(D_COMPILER) $(DFLAGS) -of=bin/sambamba bin/sambamba.o utils/ldc_version_info_.o $(LIBS)
$(D_COMPILER) $(DFLAGS) -of=bin/sambamba bin/sambamba.o $(LINK_OBJ) $(LIBS)
test:
./run_tests.sh
......@@ -97,7 +112,7 @@ install:
install -m 0755 bin/sambamba $(prefix)/bin
clean: clean-d
cd htslib ; make clean
cd htslib ; $(MAKE) clean
rm -f profile.data
rm -f profile.raw
......
# GNU Guix makefile. GNU Guix is the package manager for GNU and we use it for
# sambamba development and deployment. Normally use the standard Makefile
# instead.
# GNU Guix makefile. GNU Guix is the package manager for GNU and we
# use it for sambamba development and deployment. Normally use the
# standard Makefile instead because it compiles into a singleobj.
#
# To build sambamba on GNU Guix:
#
# make -f Makefile.guix
# make -f Makefile.guix GUIX=~/opt/sambamba-dev-env
#
# run with
#
......@@ -18,7 +18,10 @@
#
# The following two are modified by the Guix package:
D_COMPILER=ldc2
LDC_LIB_PATH=$(HOME)/.guix-profile/lib
ifndef GUIX
GUIX=$(HOME)/.guix-profile
endif
LDC_LIB_PATH=$(GUIX)/lib
UNDEAD_PATH=../undeaD/src
BIOD_PATH=../BioD
......@@ -31,7 +34,7 @@ RPATH = -L--rpath=$(dir $(realpath $(LDC_LIB_PATH)/libz.so)):$(dir $(realpath $
LIBS = htslib/libhts.a -L-L$(LDC_LIB_PATH) -L-lrt -L-lpthread -L-lm -L-lz -L-llz4
# LIBS = htslib/libhts.a -L-lrt -L-lpthread -L-lm -L-lz -L-llz4
LIBS_STATIC = $(DLIBS) htslib/libhts.a $(LDC_LIB_PATH)/liblz4.a
SRC = $(wildcard utils/*.d thirdparty/*.d cram/*.d) $(wildcard $(UNDEAD_PATH)/undead/**/*.d) $(wildcard $(UNDEAD_PATH)/undead/*.d) $(wildcard $(BIOD_PATH)/bio/*/*.d $(BIOD_PATH)/bio/*/*/*.d) $(wildcard sambamba/*.d sambamba/*/*.d sambamba/*/*/*.d) utils/ldc_version_info_.d
SRC = $(wildcard main.d utils/*.d thirdparty/*.d cram/*.d) $(wildcard undeaD/src/undead/*.d undeaD/src/undead/*/*.d) $(wildcard BioD/bio/*/*.d BioD/bio/*/*/*.d BioD/bio2/*.d BioD/bio2/*/*.d) $(wildcard sambamba/*.d sambamba/*/*.d sambamba/*/*/*.d)
OBJ = $(SRC:.d=.o)
OUT = bin/sambamba
......
......@@ -2,15 +2,36 @@
# sambamba
## Table of Contents
Table of Contents
=================
* [sambamba](#sambamba)
* [Table of Contents](#table-of-contents)
* [Introduction](#introduction)
* [Binary installation](#binary-installation)
* [Install stable release](#install-stable-release)
* [Bioconda install](#bioconda-install)
* [GNU Guix install](#gnu-guix-install)
* [Debian GNU/Linux install](#debian-gnulinux-install)
* [Homebrew install](#homebrew-install)
* [Getting help](#getting-help)
* [Reporting a sambamba bug or issue](#reporting-a-sambamba-bug-or-issue)
* [Check list:](#check-list)
* [Code of conduct](#code-of-conduct)
* [Compiling Sambamba](#compiling-sambamba)
* [Compilation dependencies](#compilation-dependencies)
* [Compiling for Linux](#compiling-for-linux)
* [GNU Guix](#gnu-guix)
* [Compiling for Mac OS X](#compiling-for-mac-os-x)
* [Development](#development)
* [Debugging and troubleshooting](#debugging-and-troubleshooting)
* [Segfaults on certain Intel Xeons](#segfaults-on-certain-intel-xeons)
* [Dump core](#dump-core)
* [Use catchsegv](#use-catchsegv)
* [Using gdb](#using-gdb)
* [License](#license)
* [Credit](#credit)
- [Introduction](#intro)
- [Binary installation](#install)
- [Getting help](#help)
- [Compiling](#compile)
- [Debugging and troubleshooting](#debug)
- [License](#license)
- [Credits](#credits)
<a name="intro"></a>
# Introduction
......@@ -33,8 +54,8 @@ configurations. Here are some comparison
[metrics](https://public-docs.crg.es/rguigo/Data/epalumbo/sambamba_ws_report.html). For
example for flagstat sambamba is 1.4x faster than samtools. For index
they are similar. For Markdup almost 6x faster and for view 4x
faster. For sort sambamba has been beaten generally, though sambamba
is up to 2x faster on large RAM machines.
faster. For sort sambamba has been beaten, though sambamba is up to 2x
faster than samtools on large RAM machines (120GB+).
In addition sambamba has a few interesting features to offer, in particular
......@@ -58,7 +79,7 @@ Sambamba is free and open source software, licensed under GPLv2+.
See manual pages [online](https://lomereiter.github.io/sambamba/docs/sambamba-view.html)
to know more about what is available and how to use it.
For more information on Sambamba contact the mailing list (see below).
For more information on Sambamba contact the mailing list (see [Getting help](#getting-help)).
<a name="install"></a>
# Binary installation
......@@ -69,7 +90,7 @@ are Github source and binary
[releases](https://github.com/biod/sambamba/releases). Simply download
the tarball, unpack it and run it. For example
```sh
```bash
wget https://github.com/biod/sambamba/releases/download/v0.6.8/sambamba_v0.6.8_linux.tar.bz2
tar xvjf sambamba_v0.6.8_linux.tar.bz2
./sambamba_v0.6.8
......@@ -95,13 +116,12 @@ A [GNU Guix package](https://www.gnu.org/software/guix/packages/s.html) for samb
## Debian GNU/Linux install
Debian: see Debian packages.
Debian: see [Debian packages](https://tracker.debian.org/pkg/sambamba).
## Homebrew install
Users of Homebrew can also use the formula from `homebrew-science`.
<a name="help"></a>
# Getting help
......@@ -155,20 +175,11 @@ Note: in general there is no need to compile sambamba. You can use a
recent binary install as listed above.
The preferred method for compiling Sambamba is with the LDC compiler
which targets LLVM.
which targets LLVM. LLVM versions 6 is faster than earlier editions.
## Compilation dependencies
- git (to check out the repo)
- gcc compiler 4.9 or later (for htslib)
- D compiler 1.7.0 or later (ldc2, see below)
- python2 (parses D-compiler header for version info)
- zlib (library)
- lz4 (library)
- htslib (submodule)
- BioD (source)
- undeaD (source)
- python2
See [INSTALL.md](./INSTALL.md).
## Compiling for Linux
......@@ -190,7 +201,7 @@ cd sambamba
make
```
To build a debug release run
To build a development/debug version run
```sh
make clean && make debug
......@@ -203,6 +214,8 @@ you can run
make check
```
See also [INSTALL.md](./INSTALL.md).
### GNU Guix
To build sambamba the LDC compiler is also available in GNU Guix:
......@@ -213,15 +226,14 @@ guix package -i ldc
## Compiling for Mac OS X
Note: the Makefile does not work. Someone want to fix that using the
Makefile.old version? See also https://github.com/biod/sambamba/issues/338.
Sambamba builds on MacOS. We have a Travis [integration test](https://travis-ci.org/pjotrp/sambamba) as
an example. It can be something like
```sh
brew install ldc
git clone --recursive https://github.com/biod/sambamba.git
cd sambamba
git clone https://github.com/dlang/undeaD
make sambamba-ldmd2-64
make
```
## Development
......@@ -232,6 +244,7 @@ documentation can be found in the source code and the [development
documentation](https://github.com/biod/sambamba-dev-docs).
<a name="debug"></a>
# Debugging and troubleshooting
## Segfaults on certain Intel Xeons
......@@ -305,6 +318,8 @@ work on Sambamba, please cite the following publication:
A. Tarasov, A. J. Vilella, E. Cuppen, I. J. Nijman, and P. Prins. [Sambamba: fast processing of NGS alignment formats](https://doi.org/10.1093/bioinformatics/btv098). Bioinformatics, 2015.
## Bibtex reference
```bibtex
@article{doi:10.1093/bioinformatics/btv098,
......
## ChangeLog v0.6.8-pre1 (20180207)
## ChangeLog v0.6.8 (20181004)
Minor release with a much faster binary. 10-20% faster than v0.6.6,
due to ldc and LLVM improvements. Fixes speed regression of v0.6.7 for
large files. See also [performance](https://github.com/biod/sambamba/blob/master/test/benchmark/stats.org)
Pre-release with a much faster statically compiled binary. 10-20%
faster than v0.6.6, due to ldc and LLVM improvements. Fixes speed
regression of v0.6.7 for large files due to singleobj compilation. See
also #345 and
[performance](https://github.com/biod/sambamba/blob/master/test/benchmark/stats.org)
64-bit compilation should be fine on ldc 1.10+. i386 target is still a problem.
+ Fix mark duplicates in files with many contigs, see #361 (thanks Devon Ryan @dpryan79)
+ Fix missing PM tag in #356 (thanks Kurt Hetrick @Kurt-Hetrick)
+ Fix Bcftools version checking #352 (thanks Nathan S. Watson-Haigh @nathanhaigh)
+ Fixate version info in BAM output headers for reproducibility. See #357
+ Fixed Makefile for general use, see #332
+ Started benchmarking, see #283 and https://github.com/biod/sambamba/blob/master/test/benchmark/stats.org
+ Readded [Travis-ci support](https://travis-ci.org/biod/sambamba) for Linux (MacOS is disabled #338)
+ Fixed MacOS build in Travis with ae269cfbdf2e78750ce7f8dc70ad32f80a6682df
+ Updated BioD to latest https://github.com/biod/BioD/commit/5e56b2bb45324af2194b3339d298fd827c8003ae
+ Bug fixes:
* #328 Debug version: SAM output of CRAM file is populated with debug on pipe
* #331 Segmentation fault attempting to view header in json format
* #335 Intel Xeon bug may segfault Sambamba - this was tracked down to an Intel Xeon bug
* #345 sambamba index 0.6.7 takes 4x longer than 0.6.6 on the same files
+ Documentation updates
+ Updated lz4 to latest (still source in tree because Debian dropped frame support in liblz4-dev)
+ Added support for GNU Guix and build containers
+ Added shunit2 to the source tree for testing
+ Update python build dependencies to use python3.x
+ Fixed a number of D compiler messages on deprecated features (ldc 1.11)
To install the image, download and
```sh
md5sum sambamba-0.6.8.gz
25efb5604ae5fe7c750e8020326787c5 sambamba-0.8.6.gz
gzip -d sambamba-0.6.8.gz
chmod a+x sambamba-0.6.8
./sambamba-0.8.6
sambamba 0.6.8 by Artem Tarasov and Pjotr Prins (C) 2012-2018
LDC 1.10.0 / DMD v2.080.1 / LLVM6.0.1 / bootstrap LDC - the LLVM D compiler (0.17.4)
```
The binary images were reproducibly built on x86_64 with
```sh
~/.config/guix/current/bin/guix pull -l
Generation 3 Sep 25 2018 09:39:08
guix 932839f
repository URL: https://git.savannah.gnu.org/git/guix.git
branch: origin/master
commit: 932839ff124ff3b0dd3070914fb1c5beec69bf32
guix environment -C guix --ad-hoc gcc gdb bash ld-wrapper ldc which python git
make clean && make -j 16 && make check
for x in `ldd bin/sambamba|cut -d ' ' -f 3` ; do realpath $x ; done
/gnu/store/l4lr0f5cjd0nbsaaf8b5dmcw1a1yypr3-glibc-2.27/lib/libpthread-2.27.so
/gnu/store/l4lr0f5cjd0nbsaaf8b5dmcw1a1yypr3-glibc-2.27/lib/libm-2.27.so
/gnu/store/l4lr0f5cjd0nbsaaf8b5dmcw1a1yypr3-glibc-2.27/lib/librt-2.27.so
/gnu/store/l4lr0f5cjd0nbsaaf8b5dmcw1a1yypr3-glibc-2.27/lib/libdl-2.27.so
/gnu/store/bmaxmigwnlbdpls20px2ipq1fll36ncd-gcc-8.2.0-lib/lib/libgcc_s.so.1
/gnu/store/l4lr0f5cjd0nbsaaf8b5dmcw1a1yypr3-glibc-2.27/lib/libc-2.27.so
# build static image
make clean && make release -j 16 && make check
```
Git submodule versions were
```
git submodule status
2f0634b187e0f454809432093238cf31e9fbfee6 BioD (v0.2.0-5-g2f0634b)
2f3c3ea7b301f9b45737a793c0b2dcf0240e5ee5 htslib (0.2.0-rc10-271-g2f3c3ea)
b3692db46d2b23a7c0af2d5e69988c94f126e10a lz4 (v1.8.2)
9be93876982b5f14fcca60832563b3cd767dd84d undeaD (v1.0.1-49-g9be9387)
```
CHANGES WITH 2.0.4
Unset additional variables that were missed.
Fixed off-by-one in exit value for scripts caught by the trap handler.
The library did not fail gracefully when the 'od' command was not installed.
CHANGES WITH 2.0.3
Back ported the Makefile from 2.1.1pre that included changes to the
docs-docbook-prep target.
Changed the test in assertFalse() so that any non-zero value registers as
false. (Credits to Bryan Larsen)
Updated the testPartyLikeItIs1999() function in the Quick Start documentation.
The 'expected' and 'actual' values were swapped. (Credits to Richard Jensen)
It was pointed out that the simple 'failed' message for a failed assert was not
only insufficient, it was nonstandard (when compared to JUnit) and didn't
provide the user with an expected vs actual result. The code was revised
somewhat to bring closer into alignment with JUnit (v4.3.1 specifically) so
that it feels more "normal". (Credits to Richard Jensen)
As part of the JUnit realignment, it was noticed that fail*() functions in
JUnit don't actually do any comparisons themselves. They only generate a
failure message. Updated the code to match.
Added self-testing unit tests. Kinda horkey, but they did find bugs during the
JUnit realignment.
Fixed the code for returning from asserts as the return was being called before
the unsetting of variables occurred. (Credits to Mathias Goldau)
The assert(True|False)() functions now accept an integer value for a
conditional test. A value of '0' is considered 'true', while any non-zero value
is considered 'false'.
All public functions now fill use default values to work properly with the '-x'
shell debugging flag.
Fixed the method of percent calculation for the report to get achieve better
accuracy.
CHANGES WITH 2.0.2
Fixed problem with fail(). The failure message was not properly printed.
Reworked the Makefile so that the DocBook XML and XSLT files are properly
downloaded before parsing can continue.
CHANGES WITH 2.0.1
Fixed some really stupid mistakes with the fail* functions. They were doing the
exact opposite of what they were supposed to be doing.
CHANGES WITH 2.0.0
Made the first stand-alone release!
$Revision$
vim:spell
This diff is collapsed.
#------------------------------------------------------------------------------
# SourceForge
#
This project is stored on SourceForge as http://sf.net/projects/shunit2. The
source code can be accessed using the following information.
* Subversion
$ svn co https://shunit2.svn.sourceforge.net/svnroot/shunit2/trunk/source shunit2
Subversion may also be browsed via a web browser at
http://svn.sourceforge.net/shunit2
#------------------------------------------------------------------------------
# Making a release
#
For these steps, it is assumed we are working with release 2.0.0.
Steps:
* write release notes
* update version
* finish changelog
* check all the code in
* tag the release
* export the release
* create tarball
* md5sum the tarball and sign with gpg
* update website
* post to SourceForge and Freshmeat
WRITE RELEASE NOTES
This should be pretty self explainatory. Use one of the release notes from a
previous release as an example.
To get the versions of the various shells, do the following:
Cygwin
bash: $ bash --version
ksh: actually pdksh
pdksh: look in the downloaded Cygwin directory
Linux
bash: $ bash --version
dash: look at installed version
ksh: $ ksh --version
pdksh: $ strings /bin/pdksh |grep 'PD KSH'
zsh: $ zsh --version
Solaris 10
sh: not possible
bash: $ bash --version
ksh: $ strings /usr/bin/ksh |grep 'Version'
UPDATE VERSION
Edit the shunit2 source code, and change the version number in the comment, as
well as in the __SHUNIT_VERSION variable. Next, edit the
src/docbook/shunit2.xml file, edit the version in the <title> element, and make
sure there is a revision section for this release.
FINISH DOCUMENTATION
Make sure that any remaning changes get put into the CHANGES-X.X.txt file.
Finish writing the RELEASE_NOTES-X.X.X.txt. Once it is finished, run it through
the 'fmt' command to make it pretty.
$ fmt -w 80 RELEASE_NOTES-2.0.0.txt >RELEASE_NOTES-2.0.0.txt.new
$ mv RELEASE_NOTES-2.0.0.txt.new RELEASE_NOTES-2.0.0.txt
We want to have an up-to-date version of the documentation in the release, so
we'd better build it.
$ pwd
.../shunit2/source/2.0
$ make docs
...
$ cp -p build/shunit2.html doc
$ svn ci -m "" doc/shunit2.html
CHECK IN ALL THE CODE
This step is pretty self-explainatory
TAG THE RELEASE
$ pwd
.../shunit2/source
$ ls
2.0 2.1
$ svn cp -m "Release 2.0.0" \
2.0 https://shunit2.svn.sourceforge.net/svnroot/shunit2/tags/source/2.0.0
EXPORT THE RELEASE
$ pwd
.../shunit2/builds
$ svn export \
https://shunit2.svn.sourceforge.net/svnroot/shunit2/tags/source/2.0.0 \
shunit2-2.0.0
CREATE TARBALL
$ tar cfz ../releases/shunit2-2.0.0.tgz shunit2-2.0.0
MD5SUM THE TARBALL AND SIGN WITH GPG
$ cd ../releases
$ md5sum shunit2-2.0.0.tgz >shunit2-2.0.0.tgz.md5
$ gpg --default-key kate.ward@forestent.com --detach-sign shunit2-2.0.0.tgz
UPDATE WEBSITE
Again, pretty self-explainatory. Make sure to copy the MD5 and GPG signature
files. Once that is done, make sure to tag the website so we can go back in
time if needed.
$ pwd
.../shunit2
$ ls
source website
$ svn cp -m "Release 2.0.0" \
website https://shunit2.svn.sourceforge.net/svnroot/shunit2/tags/website/20060916
Now, update the website. It too is held in Subversion, so ssh into SourceForge
and use 'svn up' to grab the latest version.
POST TO SOURCEFORGE AND FRESHMEAT
http://sourceforge.net/projects/shunit2/
http://freshmeat.net/
#------------------------------------------------------------------------------
# Related documentation
#
Docbook XML
docbook-xml-4.4.zip
http://www.docbook.org/xml/4.4/docbook-xml-4.4.zip
http://www.oasis-open.org/docbook/xml/4.4/docbook-xml-4.4.zip
docbook-xml-4.5.zip
http://www.docbook.org/xml/4.5/docbook-xml-4.5.zip
Docbook XSL
docbook-xsl-1.71.0.tar.bz2
http://prdownloads.sourceforge.net/docbook/docbook-xsl-1.71.0.tar.bz2?download
docbook-xsl-1.71.1.tar.bz2
http://downloads.sourceforge.net/docbook/docbook-xsl-1.71.1.tar.bz2?use_mirror=puzzle
JUnit
http://www.junit.org
$Revision$
The original author of shunit2 is Kate Ward. The following people have
contributed in some way or another to shunit2.
Bryan Larsen
Kevin Van Horn
Mathias Goldau
Richard Jensen
Rob Holland
$Revision$
This diff is collapsed.
......@@ -756,6 +756,7 @@ struct _Anonymous_22
}
cram_block_method method;
cram_block_method orig_method;
/*
enum cram_content_type
{
CT_ERROR = -1,
......@@ -766,6 +767,7 @@ struct _Anonymous_22
EXTERNAL = 4,
CORE = 5
}
*/
cram_content_type content_type;
int content_id;
int comp_size;
......@@ -833,6 +835,7 @@ struct cram_map
struct _Anonymous_24
{
/*
enum cram_content_type
{
CT_ERROR = -1,
......@@ -843,6 +846,7 @@ struct _Anonymous_24
EXTERNAL = 4,
CORE = 5
}
*/
cram_content_type content_type;
int ref_seq_id;
int ref_seq_start;
......
sambamba (0.6.8-pre3-1) UNRELEASED; urgency=medium
sambamba (0.6.8-1) UNRELEASED; urgency=medium
* New upstream pre-release
* New upstream release
Closes: #907489
* Point Vcs fields to salsa.debian.org
* Standards-Version: 4.2.1
* Add build time test
* Allow pre-releases in watch file
-- Andreas Tille <tille@debian.org> Sun, 23 Sep 2018 07:42:58 +0200
-- Andreas Tille <tille@debian.org> Thu, 04 Oct 2018 10:06:08 +0200
sambamba (0.6.7-2) unstable; urgency=medium
......
version=4
opts="uversionmangle=s/-/~/" \
https://github.com/lomereiter/sambamba/releases .*/archive/v(\d[\d.pre-]+)@ARCHIVE_EXT@
#!/usr/bin/env python2
#!/usr/bin/env python
from __future__ import print_function
import re, sys, subprocess
......@@ -10,7 +10,7 @@ if len(sys.argv) < 2:
ldc = sys.argv[1].replace("ldmd2", "ldc2")
ldc_output = subprocess.Popen([ldc, '-version'], stdout=subprocess.PIPE).communicate()[0]
version_re = r"""^.+\((?P<LDC>[^\)]+)\):\n\s*based on DMD (?P<DMD>\S+) and LLVM (?P<LLVM>\S+)\n(?:\s*built with (?P<BOOTSTRAP>.*)\n)?"""
match = re.match(version_re, ldc_output, re.MULTILINE)
match = re.match(version_re, ldc_output.decode("utf-8") , re.MULTILINE)
if not match:
sys.exit("ERROR: failed to generated LDC version information")
......
#!/usr/bin/env bash
#!/bin/sh
# download shunit2 in order to run tests:
# curl -L "https://dl.dropboxusercontent.com/u/7916095/shunit2-2.0.3.tgz" | tar zx --overwrite
./test/test_suite.sh | tee /dev/stderr | grep -q 'success rate: 100%'
echo "Running tests..."
./test/test_suite.sh 2>&1 | tr '\r' '\n' > test.log
cat test.log
cat test.log | grep -q 'success rate: 100%'
......@@ -19,7 +19,9 @@
*/
import std.algorithm;
import std.experimental.logger;
import std.range.primitives;
import sambamba.depth;
import sambamba.index;
......@@ -73,6 +75,10 @@ Usage: sambamba [command] [args...]
To get help on a particular command, call it without args.
Global options
-q quiet mode (do not show banner)
For bug reports and feature requests see
https://github.com/biod/
......@@ -81,7 +87,7 @@ For bug reports and feature requests see
void printVersion() {
stderr.writeln();
stderr.writeln("sambamba " ~ VERSION ~ " by Artem Tarasov and Pjotr Prins (C) 2012-2017");
stderr.writeln("sambamba " ~ VERSION ~ " by Artem Tarasov and Pjotr Prins (C) 2012-2018");
stderr.writeln(" LDC " ~ LDC_VERSION_STRING ~ " / DMD " ~ DMD_VERSION_STRING ~
" / LLVM" ~ LLVM_VERSION_STRING ~ " / bootstrap " ~ BOOTSTRAP_VERSION_STRING);
stderr.writeln();
......@@ -89,14 +95,16 @@ void printVersion() {
int main(string[] args) {
globalLogLevel(LogLevel.info);
if (args.find("-q").empty)
printVersion();
if (args.length == 1) {
auto args2 = args.remove!(a => a == "-q");
if (args2.length == 1) {
printUsage();
return 1;
}
auto _args = args[0] ~ args[2 .. $];
auto _args = args2[0] ~ args2[2 .. $];
switch (args[1]) {
case "view": return view_main(_args);
......
......@@ -37,6 +37,9 @@ import std.traits, std.typecons, std.range, std.algorithm, std.parallelism,
std.exception, std.file, std.typetuple, std.conv, std.array, std.bitmanip,
core.stdc.stdlib, std.datetime, undead.stream : BufferedFile, FileMode;
import std.datetime;
import std.datetime.stopwatch : benchmark, StopWatch;
/// Read + its index (0-based)
struct IndexedBamRead {
ulong index;
......@@ -270,7 +273,7 @@ struct CollateReadPairRange(R, bool keepFragments, alias charsHashFunc)
version(profile) {
~this() {
stderr.writeln("duped during compaction: ", _duped_during_compaction);
stderr.writeln("time spent on compaction: ", _compact_sw.peek().msecs, " ms");
stderr.writeln("time spent on compaction: ", _compact_sw.peek().total!"msecs", " ms");
}
}
......@@ -715,12 +718,12 @@ auto collectSingleEndInfo(IndexedBamRead read, ReadGroupIndex read_group_index)
result.coord = computeFivePrimeCoord(read);
result.idx = read.index;
result.score = computeScore(read);
result.ref_id = cast(ushort)read.ref_id;
result.ref_id = read.ref_id;
result.reversed = read.is_reverse_strand ? 1 : 0;
result.paired = (read.is_paired && !read.mate_is_unmapped) ? 1 : 0;
auto rg = read_group_index.getId(getRG(read));
result.library_id = cast(short)read_group_index.getLibraryId(rg);
result.library_id = read_group_index.getLibraryId(rg);
return result;
}
......@@ -1071,7 +1074,9 @@ auto getDuplicateOffsets(R)(R reads, ReadGroupIndex rg_index,
stderr.write(" collecting indices of duplicate reads... ");
sw.start();
auto duplicates = collectDuplicates(paired_ends, single_ends, second_ends, cfg, pool);
sw.stop(); stderr.writeln(" done in ", sw.peek().msecs, " ms"); sw.reset();
sw.stop();
immutable t = sw.peek();
stderr.writeln(" done in ", t.total!"msecs", " ms"); sw.reset();
stderr.writeln(" found ", duplicates.length, " duplicates");
paired_ends.removeTemporaryFiles();
......@@ -1256,9 +1261,7 @@ int markdup_main(string[] args) {
auto dup_idx_storage = getDuplicateOffsets(reads, rg_index, taskPool, cfg);
auto elapsed = sw.peek();
stderr.writeln("collected list of positions in ",
elapsed.seconds / 60, " min ",
elapsed.seconds % 60, " sec");
stderr.writeln("collected list of positions in ",elapsed.total!"minutes"," min ",elapsed.total!"seconds" % 60," sec");
// marking or removing duplicates
bam = new MultiBamReader(args[1 .. $-1]); // FIXME: initialized twice
......@@ -1298,9 +1301,8 @@ int markdup_main(string[] args) {
}
sw.stop();
stderr.writeln("total time elapsed: ",
sw.peek().seconds / 60, " min ",
sw.peek().seconds % 60, " sec");
auto elapsed2 = sw.peek();
stderr.writeln("collected list of positions in ",elapsed2.total!"minutes"," min ",elapsed2.total!"seconds" % 60," sec");
} catch (Throwable e) {
stderr.writeln("sambamba-markdup: ", e.msg);
......
* Performance metrics
** View
*** 4x Intel(R) Core(TM) i5-2520M CPU @ 2.50GHz (hyperthreaded)
**** sambamba 0.6.8-pre3
*** 8x Intel(R) Core(TM) i7-6770HQ CPU @ 2.60GHz, 16Gb, SSD
#+BEGIN_SRC
monza:~/tmp$ time ./sambamba view /gnu/data/HG00100.chrom20.ILLUMINA.bwa.GBR.low_coverage.20130415.bam.orig > /dev/null
sambamba 0.6.8-pre3 by Artem Tarasov and Pjotr Prins (C) 2012-2018
LDC 1.11.0 / DMD v2.081.2 / LLVM6.0.1 / bootstrap LDC - the LLVM D compiler (0.17.6)
real 0m6.930s
user 0m26.940s
sys 0m0.516s
sambamba 0.6.8-pre2 by Artem Tarasov and Pjotr Prins (C) 2012-2018
LDC 1.10.0 / DMD v2.080.1 / LLVM6.0.1 / bootstrap LDC - the LLVM D compiler (0.17.4)
real 0m6.854s
user 0m26.456s
sys 0m0.584s
linux-vdso.so.1 (0x00007ffd227fc000)
librt.so.1 => /gnu/store/n6nvxlk2j8ysffjh3jphn1k5silnakh6-glibc-2.25/lib/librt.so.1 (0x00007f5d31082000)
libpthread.so.0 => /gnu/store/n6nvxlk2j8ysffjh3jphn1k5silnakh6-glibc-2.25/lib/libpthread.so.0 (0x00007f5d30e64000)
libm.so.6 => /gnu/store/n6nvxlk2j8ysffjh3jphn1k5silnakh6-glibc-2.25/lib/libm.so.6 (0x00007f5d30b52000)
libdl.so.2 => /gnu/store/n6nvxlk2j8ysffjh3jphn1k5silnakh6-glibc-2.25/lib/libdl.so.2 (0x00007f5d3094e000)
libgcc_s.so.1 => /gnu/store/h3z6nshhdlc8zgh4mi13x1br03xipi9r-gcc-7.2.0-lib/lib/libgcc_s.so.1 (0x00007f5d30737000)
libc.so.6 => /gnu/store/n6nvxlk2j8ysffjh3jphn1k5silnakh6-glibc-2.25/lib/libc.so.6 (0x00007f5d30398000)
/gnu/store/n6nvxlk2j8ysffjh3jphn1k5silnakh6-glibc-2.25/lib/ld-linux-x86-64.so.2 (0x00007f5d3128a000)
#+END_SRC
**** sambamba 0.6.7
*** 8x Intel(R) Core(TM) i7-6770HQ CPU @ 2.60GHz, 16Gb, SSD (hyperthreaded)
#+BEGIN_SRC sh
time ./build/sambamba view /gnu/data/HG00100.chrom20.ILLUMINA.bwa.GBR.low_coverage.20130415.bam.orig > /dev/null
#+END_SRC
**** sambamba 0.6.8
#+BEGIN_SRC
sambamba 0.6.8 by Artem Tarasov and Pjotr Prins (C) 2012-2018
LDC 1.10.0 / DMD v2.080.1 / LLVM6.0.1 / bootstrap LDC - the LLVM D compiler (0.17.4)
real 0m2.869s
user 0m21.972s
sys 0m0.356s
#+END_SRC
**** sambamba 0.6.7
#+BEGIN_SRC
This version was built with:
LDC 1.1.1
......@@ -33,38 +74,80 @@ sys 0m0.344s
#+END_SRC
** Sort
*** 56x Intel(R) Xeon(R) CPU E5-2683 v3 @ 2.00GHz, 256Gb
**** sambamba 0.6.6
#+BEGIN_SRC sh
time ./sambamba_v0.6.6 sort -m 20615843020 -N -o /dev/null ENCFF696RLQ.bam -p
#+END_SRC
**** sambamba 0.6.8
#+BEGIN_SRC
This version was built with:
LDC 0.17.1
using DMD v2.068.2
using LLVM 3.8.0
bootstrapped with version not available
sambamba 0.6.8 by Artem Tarasov and Pjotr Prins (C) 2012-2018
LDC 1.10.0 / DMD v2.080.1 / LLVM6.0.1 / bootstrap LDC - the LLVM D compiler (0.17.4)
real 10m0.932s
user 151m39.172s
sys 3m7.596s
real 7m50.558s
user 89m10.808s
sys 2m57.188s
#+END_SRC
and with 120GB RAM
#+BEGIN_SRC
sambamba 0.6.8 by Artem Tarasov and Pjotr Prins (C) 2012-2018
LDC 1.10.0 / DMD v2.080.1 / LLVM6.0.1 / bootstrap LDC - the LLVM D compiler (0.17.4)
real 3m49.953s
user 81m16.956s
sys 1m58.332s
#+END_SRC
**** sambamba 0.6.8-pre1
#+BEGIN_SRC
Wed Feb 7 03:43:14 CST 2018
sambamba 0.6.8-pre1
This version was built with:
LDC 1.1.1
using DMD v2.071.2
using LLVM 3.9.1
bootstrapped with LDC - the LLVM D compiler (1.1.1)
LDC 1.7.0
using DMD v2.077.1
using LLVM 5.0.1
bootstrapped with LDC - the LLVM D compiler (1.7.0)
real 8m0.528s
user 88m44.084s
sys 2m45.888s
real 9m22.501s
user 98m24.748s
sys 2m51.996s
#+END_SRC
Note, updating compiler shows a speed gain for 0.6.6.
When sambamba is given enough RAM to hold everything in memory sambamba is twice
as fast (apparently half the time goes to intermediate IO)
#+BEGIN_SRC sh
time ./sambamba sort -N -o /dev/null ENCFF696RLQ.bam -p -m 120G
#+END_SRC
#+BEGIN_SRC
real 3m46.856s
user 81m44.524s
sys 1m56.388s
#+END_SRC
with 64GB it is
#+BEGIN_SRC
real 5m36.062s
user 88m43.176s
sys 3m0.536s
#+END_SRC
and with 32GB it is
#+BEGIN_SRC
real 7m22.125s
user 89m6.188s
sys 2m51.228s
#+END_SRC
**** sambamba 0.6.7
......@@ -143,48 +226,109 @@ sys 2m56.244s
So, the release is reverted an after a version bump:
**** sambamba 0.6.8
**** sambamba 0.6.6
#+BEGIN_SRC
Wed Feb 7 03:43:14 CST 2018
sambamba 0.6.8-pre1
This version was built with:
LDC 1.7.0
using DMD v2.077.1
using LLVM 5.0.1
bootstrapped with LDC - the LLVM D compiler (1.7.0)
LDC 0.17.1
using DMD v2.068.2
using LLVM 3.8.0
bootstrapped with version not available
real 8m0.528s
user 88m44.084s
sys 2m45.888s
#+END_SRC
real 10m0.932s
user 151m39.172s
sys 3m7.596s
When sambamba is given enough RAM to hold everything in memory sambamba is twice
as fast (apparently half the time goes to intermediate IO)
This version was built with:
LDC 1.1.1
using DMD v2.071.2
using LLVM 3.9.1
bootstrapped with LDC - the LLVM D compiler (1.1.1)
#+BEGIN_SRC sh
time ./sambamba sort -N -o /dev/null ENCFF696RLQ.bam -p -m 120G
real 9m22.501s
user 98m24.748s
sys 2m51.996s
#+END_SRC
#+BEGIN_SRC
real 3m46.856s
user 81m44.524s
sys 1m56.388s
#+END_SRC
Note, updating compiler shows a speed gain for 0.6.6.
with 64GB it is
** Markdup
*** 8x Intel(R) Core(TM) i7-6770HQ CPU @ 2.60GHz, 16Gb, SSD (hyperthreaded)
**** sambamba 0.6.8
#+BEGIN_SRC
real 5m36.062s
user 88m43.176s
sys 3m0.536s
sambamba 0.6.8 by Artem Tarasov and Pjotr Prins (C) 2012-2018
LDC 1.10.0 / DMD v2.080.1 / LLVM6.0.1 / bootstrap LDC - the LLVM D compiler (0.17.4)
finding positions of the duplicate reads in the file...
sorted 11286293 end pairs
and 156042 single ends (among them 0 unmatched pairs)
collecting indices of duplicate reads... done in 1325 ms
found 6603388 duplicates
collected list of positions in 0 min 16 sec
marking duplicates...
collected list of positions in 1 min 2 sec
Command being timed: "./bin/sambamba markdup /gnu/data/in_raw.sorted.bam /gnu/data/in_raw.sorted.bam t2.bam"
User time (seconds): 406.49
System time (seconds): 3.86
Percent of CPU this job got: 649%
Elapsed (wall clock) time (h:mm:ss or m:ss): 1:03.13
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 1709720
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 1140382
Voluntary context switches: 393213
Involuntary context switches: 8993
Swaps: 0
File system inputs: 0
File system outputs: 2663824
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
#+END_SRC
and with 32GB it is
Uses slightly more memory but is faster than
**** sambamba 0.6.7-pre1
#+BEGIN_SRC
real 7m22.125s
user 89m6.188s
sys 2m51.228s
/usr/bin/time --verbose sambamba markdup /gnu/data/in_raw.sorted.bam /gnu/data/in_raw.sorted.bam t2.bam
finding positions of the duplicate reads in the file...
sorted 11286293 end pairs
and 156042 single ends (among them 0 unmatched pairs)
collecting indices of duplicate reads... done in 1521 ms
found 6603388 duplicates
collected list of positions in 0 min 16 sec
marking duplicates...
total time elapsed: 1 min 4 sec
Command being timed: "sambamba markdup /gnu/data/in_raw.sorted.bam /gnu/data/in_raw.sorted.bam t2.bam"
User time (seconds): 423.78
System time (seconds): 4.47
Percent of CPU this job got: 666%
Elapsed (wall clock) time (h:mm:ss or m:ss): 1:04.24
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 1542764
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 1839470
Voluntary context switches: 368082
Involuntary context switches: 8537
Swaps: 0
File system inputs: 0
File system outputs: 2643840
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
#+END_SRC