Skip to content
Commits on Source (6)
# version format.
# you can use {branch} name in version format too
# version: 1.0.{build}-{branch}
version: 'vers.{build}'
# branches to build
branches:
# Whitelist
only:
- develop
# Blacklist
except:
- gh-pages
# Do not build on tags (GitHub and BitBucket)
skip_tags: true
# Skipping commits affecting specific files (GitHub only). More details here: /docs/appveyor-yml
#skip_commits:
# files:
# - docs/*
# - '**/*.html'
# We use Mingw/Msys, so use pacman for installs
install:
- set HOME=.
- set MSYSTEM=MINGW64
- set PATH=C:/msys64/usr/bin;C:/msys64/mingw64/bin;%PATH%
- set MINGWPREFIX=x86_64-w64-mingw32
- "sh -lc \"pacman -S --noconfirm --needed base-devel mingw-w64-x86_64-toolchain mingw-w64-x86_64-zlib mingw-w64-x86_64-bzip2 mingw-w64-x86_64-xz mingw-w64-x86_64-curl\""
# The user may have e.g. jkbonfield/bcftools branch FOO and an associated
# jkbonfield/htslib branch FOO. If so use that related htslib, obtained by
# munging $APPVEYOR_REPO_NAME. Otherwise we assume this is a PR only to
# bcftools and should be linked against samtools(org)/htslib develop branch.
clone_script:
- "sh -lc \"if test x$APPVEYOR_PULL_REQUEST_HEAD_REPO_NAME != x ; then git clone --branch=$APPVEYOR_PULL_REQUEST_HEAD_REPO_BRANCH https://github.com/$APPVEYOR_PULL_REQUEST_HEAD_REPO_NAME $APPVEYOR_BUILD_FOLDER ; else false ; fi || git clone --branch=$APPVEYOR_REPO_BRANCH https://github.com/$APPVEYOR_REPO_NAME $APPVEYOR_BUILD_FOLDER\""
- "sh -lc \"git show-branch --sha1-name HEAD"
- "sh -lc \"git clone --branch=$APPVEYOR_REPO_BRANCH https://github.com/`echo $APPVEYOR_REPO_NAME|sed 's#/bcftools#/htslib#'`.git $APPVEYOR_BUILD_FOLDER/htslib || git clone https://github.com/samtools/htslib.git $APPVEYOR_BUILD_FOLDER/htslib \""
- "sh -lc \"cd $APPVEYOR_BUILD_FOLDER/htslib && git show-branch --sha1-name HEAD\""
build_script:
- set HOME=.
- set MSYSTEM=MINGW64
- set PATH=C:/msys64/usr/bin;C:/msys64/mingw64/bin;%PATH%
- "sh -lc \"(cd htslib; aclocal && autoheader && autoconf)\""
- "sh -lc \"aclocal && autoheader && autoconf && ./configure && make -j2\""
test_script:
- set HOME=.
- set MSYSTEM=MINGW64
- set PATH=C:/msys64/usr/bin;C:/msys64/mingw64/bin;%APPVEYOR_BUILD_FOLDER%/htslib;%PATH%
- "sh -lc \"MSYS2_ARG_CONV_EXCL=* make test-plugins\""
......@@ -723,3 +723,26 @@ Public License instead of this License. But first, please read
-----------------------------------------------------------------------------
LICENSE FOR VariantKey (https://github.com/Genomicsplc/variantkey)
The MIT License
Copyright (c) 2017-2018 GENOMICS plc
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
......@@ -23,8 +23,10 @@
# DEALINGS IN THE SOFTWARE.
CC = gcc
AR = ar
RANLIB = ranlib
CPPFLAGS =
CFLAGS = -g -Wall -Wc++-compat -O2
CFLAGS = -g -Wall -O2
LDFLAGS =
LIBS =
......@@ -36,11 +38,12 @@ OBJS = main.o vcfindex.o tabix.o \
vcfstats.o vcfisec.o vcfmerge.o vcfquery.o vcffilter.o filter.o vcfsom.o \
vcfnorm.o vcfgtcheck.o vcfview.o vcfannotate.o vcfroh.o vcfconcat.o \
vcfcall.o mcall.o vcmp.o gvcf.o reheader.o convert.o vcfconvert.o tsv2vcf.o \
vcfcnv.o HMM.o vcfplugin.o consensus.o ploidy.o bin.o hclust.o version.o \
vcfcnv.o HMM.o consensus.o ploidy.o bin.o hclust.o version.o \
regidx.o smpl_ilist.o csq.o vcfbuf.o \
mpileup.o bam2bcf.o bam2bcf_indel.o bam_sample.o \
vcfsort.o \
vcfsort.o cols.o \
ccall.o em.o prob1.o kmin.o # the original samtools calling
PLUGIN_OBJS = vcfplugin.o
prefix = /usr/local
exec_prefix = $(prefix)
......@@ -73,11 +76,18 @@ MISC_SCRIPTS = \
misc/vcfutils.pl
TEST_PROGRAMS = test/test-rbuf test/test-regidx
all: $(PROGRAMS) $(TEST_PROGRAMS) plugins
ALL_CPPFLAGS = -I. $(HTSLIB_CPPFLAGS) $(CPPFLAGS)
ALL_LDFLAGS = $(HTSLIB_LDFLAGS) $(LDFLAGS)
ALL_LIBS = -lz -ldl $(LIBS)
ALL_LIBS = -lz $(DL_LIBS) $(LIBS)
all: $(PROGRAMS) $(TEST_PROGRAMS) plugins
EXTRA_CPPFLAGS =
GSL_LIBS =
# On windows, plugins need to be fully linked. This adds the extra libraries
# needed. Defined here so config.mk can override it.
W32_PLUGIN_LIBS = libbcftools.a $(HTSLIB_DLL) $(ALL_LIBS)
# Usually config.mk and config.h are generated by running configure
# or config.status, but if those aren't used create defaults here.
......@@ -88,12 +98,12 @@ config.h:
echo '/* Basic config.h generated by Makefile */' > $@
ifneq "$(PLUGINS_ENABLED)" "no"
echo '#define ENABLE_BCF_PLUGINS 1' >> $@
echo '#define PLUGIN_EXT ".so"' >> $@
echo '#define PLUGIN_EXT "$(PLUGIN_EXT)"' >> $@
endif
include config.mk
PACKAGE_VERSION = 1.9
PACKAGE_VERSION = 1.10
# If building from a Git repository, replace $(PACKAGE_VERSION) with the Git
# description of the working tree: either a release tag with the same value
......@@ -134,14 +144,15 @@ ifdef USE_GPL
GSL_LIBS ?= -lgsl -lcblas
endif
bcftools: $(OBJS) $(HTSLIB)
$(CC) $(DYNAMIC_FLAGS) -pthread $(ALL_LDFLAGS) -o $@ $(OBJS) $(HTSLIB_LIB) -lm $(ALL_LIBS) $(GSL_LIBS) $(PERL_LIBS)
print-%:
@echo '$*=$($*)'
# Plugin rules
ifneq "$(PLUGINS_ENABLED)" "no"
PLUGINC = $(foreach dir, plugins, $(wildcard $(dir)/*.c))
PLUGINS = $(PLUGINC:.c=$(PLUGIN_EXT))
PLUGINM = $(PLUGINC:.c=.mk)
OBJS += $(PLUGIN_OBJS)
ifneq "$(origin PLATFORM)" "file"
PLATFORM := $(shell uname -s)
......@@ -149,12 +160,35 @@ endif
ifeq "$(PLATFORM)" "Darwin"
$(PLUGINS): | bcftools
PLUGIN_FLAGS = -bundle -bundle_loader bcftools -Wl,-undefined,dynamic_lookup
DL_LIBS =
else ifeq "$(PLATFORM)" "CYGWIN"
$(PLUGINS): | bcftools
PLUGIN_FLAGS = -fPIC -shared
PLUGIN_LIBS = $(W32_PLUGIN_LIBS)
DL_LIBS = -ldl
else ifneq "$(filter MINGW% MSYS%,$(PLATFORM))" ""
DYNAMIC_FLAGS =
$(PLUGINS): | bcftools
PLUGIN_FLAGS = -fPIC -shared -Wl,-export-all-symbols
PLUGIN_LIBS = $(W32_PLUGIN_LIBS)
# On windows, plugins need to be fully linked, including bcftools_version() symbol
# from the application they will be loaded into.
DL_LIBS =
else
PLUGIN_FLAGS = -fPIC -shared
DL_LIBS = -ldl
endif
libbcftools.a: $(OBJS)
@-rm -f $@
$(AR) -rc $@ $(OBJS)
-$(RANLIB) $@
vcfplugin.o: EXTRA_CPPFLAGS += -DPLUGINPATH='"$(pluginpath)"'
%.dll %.cygdll: %.c version.h version.c libbcftools.a $(HTSLIB_DLL)
$(CC) $(PLUGIN_FLAGS) $(CFLAGS) $(ALL_CPPFLAGS) $(EXTRA_CPPFLAGS) $(LDFLAGS) -o $@ version.c $< $(PLUGIN_LIBS)
%.so: %.c version.h version.c
$(CC) $(PLUGIN_FLAGS) $(CFLAGS) $(ALL_CPPFLAGS) $(EXTRA_CPPFLAGS) $(LDFLAGS) -o $@ version.c $< $(LIBS)
......@@ -172,10 +206,14 @@ test check: test-no-plugins
endif # PLUGINS_ENABLED
bcftools: $(OBJS) $(HTSLIB)
$(CC) $(DYNAMIC_FLAGS) -pthread $(ALL_LDFLAGS) -o $@ $(OBJS) $(HTSLIB_LIB) -lm $(ALL_LIBS) $(GSL_LIBS) $(PERL_LIBS)
plugins: $(PLUGINS)
bcftools_h = bcftools.h $(htslib_hts_defs_h) $(htslib_vcf_h)
call_h = call.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) vcmp.h
variantkey_h = variantkey.h hex.h
convert_h = convert.h $(htslib_vcf_h)
tsv2vcf_h = tsv2vcf.h $(htslib_vcf_h)
filter_h = filter.h $(htslib_vcf_h)
......@@ -189,41 +227,42 @@ bam2bcf_h = bam2bcf.h $(htslib_hts_h) $(htslib_vcf_h)
bam_sample_h = bam_sample.h $(htslib_sam_h)
main.o: main.c $(htslib_hts_h) config.h version.h $(bcftools_h)
vcfannotate.o: vcfannotate.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(bcftools_h) vcmp.h $(filter_h) $(convert_h) $(smpl_ilist_h) $(htslib_khash_h)
vcfannotate.o: vcfannotate.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(bcftools_h) vcmp.h $(filter_h) $(convert_h) $(smpl_ilist_h) regidx.h $(htslib_khash_h)
vcfplugin.o: vcfplugin.c config.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(bcftools_h) vcmp.h $(filter_h)
vcfcall.o: vcfcall.c $(htslib_vcf_h) $(htslib_kfunc_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(bcftools_h) $(call_h) $(prob1_h) $(ploidy_h) $(gvcf_h)
vcfconcat.o: vcfconcat.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(bcftools_h)
vcfcall.o: vcfcall.c $(htslib_vcf_h) $(htslib_kfunc_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(bcftools_h) $(call_h) $(prob1_h) $(ploidy_h) $(gvcf_h) regidx.h $(vcfbuf_h)
vcfconcat.o: vcfconcat.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_thread_pool_h) $(bcftools_h)
vcfconvert.o: vcfconvert.c $(htslib_faidx_h) $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kseq_h) $(bcftools_h) $(filter_h) $(convert_h) $(tsv2vcf_h)
vcffilter.o: vcffilter.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) rbuf.h
vcfgtcheck.o: vcfgtcheck.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) hclust.h
vcfindex.o: vcfindex.c $(htslib_vcf_h) $(htslib_tbx_h) $(htslib_kstring_h) $(htslib_bgzf_h) $(bcftools_h)
vcfisec.o: vcfisec.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h)
vcfisec.o: vcfisec.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_hts_os_h) $(bcftools_h) $(filter_h)
vcfmerge.o: vcfmerge.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) regidx.h $(bcftools_h) vcmp.h $(htslib_khash_h)
vcfnorm.o: vcfnorm.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_faidx_h) $(htslib_khash_str2int_h) $(bcftools_h) rbuf.h
vcfquery.o: vcfquery.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(convert_h)
vcfroh.o: vcfroh.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(bcftools_h) HMM.h $(smpl_ilist_h) $(filter_h)
vcfcnv.o: vcfcnv.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kstring_h) $(htslib_kfunc_h) $(htslib_khash_str2int_h) $(bcftools_h) HMM.h rbuf.h
vcfsom.o: vcfsom.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h)
vcfsort.o: vcfsort.c $(htslib_vcf_h) $(htslib_kstring_h) kheap.h $(bcftools_h)
vcfsom.o: vcfsom.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_hts_os_h) $(bcftools_h)
vcfsort.o: vcfsort.c $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_hts_os_h) kheap.h $(bcftools_h)
vcfstats.o: vcfstats.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) $(bcftools_h) $(filter_h) bin.h
vcfview.o: vcfview.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(htslib_khash_str2int_h)
reheader.o: reheader.c $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_kseq_h) $(htslib_thread_pool_h) $(bcftools_h) $(khash_str2str_h)
reheader.o: reheader.c $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_kseq_h) $(htslib_thread_pool_h) $(htslib_faidx_h) $(htslib_khash_str2int_h) $(bcftools_h) $(khash_str2str_h)
tabix.o: tabix.c $(htslib_bgzf_h) $(htslib_tbx_h)
ccall.o: ccall.c $(htslib_kfunc_h) $(call_h) kmin.h $(prob1_h)
convert.o: convert.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(convert_h)
convert.o: convert.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kfunc_h) $(bcftools_h) $(variantkey_h) $(convert_h)
tsv2vcf.o: tsv2vcf.c $(tsv2vcf_h)
em.o: em.c $(htslib_vcf_h) kmin.h $(call_h)
filter.o: filter.c $(htslib_khash_str2int_h) $(htslib_hts_defs_h) $(htslib_vcfutils_h) $(htslib_kfunc_h) config.h $(filter_h) $(bcftools_h)
$(CC) $(CFLAGS) $(ALL_CPPFLAGS) $(EXTRA_CPPFLAGS) $(PERL_CFLAGS) -c -o $@ $<
gvcf.o: gvcf.c $(gvcf_h) $(bcftools_h)
kmin.o: kmin.c kmin.h
mcall.o: mcall.c $(htslib_kfunc_h) $(call_h)
mcall.o: mcall.c $(htslib_kfunc_h) $(htslib_khash_str2int_h) $(call_h)
prob1.o: prob1.c $(prob1_h)
vcmp.o: vcmp.c $(htslib_hts_h) $(htslib_vcf_h) vcmp.h
ploidy.o: ploidy.c $(htslib_khash_str2int_h) $(htslib_kseq_h) $(htslib_hts_h) $(bcftools_h) $(ploidy_h)
polysomy.o: polysomy.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(bcftools_h) peakfit.h
peakfit.o: peakfit.c peakfit.h $(htslib_hts_h) $(htslib_kstring_h)
bin.o: bin.c $(bcftools_h) bin.h
cols.o: cols.c cols.h
regidx.o: regidx.c $(htslib_hts_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) regidx.h
consensus.o: consensus.c $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) regidx.h $(bcftools_h) rbuf.h $(filter_h)
mpileup.o: mpileup.c $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) regidx.h $(bcftools_h) $(bam2bcf_h) $(bam_sample_h) $(gvcf_h)
......@@ -242,25 +281,28 @@ csq.o: csq.c $(htslib_hts_h) $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(hts
# For tests that might use it, set $REF_PATH explicitly to use only reference
# areas within the test suite (or set it to ':' to use no reference areas).
# (regression.sh sets $REF_PATH to a subdirectory itself.)
test-no-plugins: $(PROGRAMS) $(TEST_PROGRAMS) $(BGZIP) $(TABIX)
#
# If using MSYS, avoid poor shell expansion via:
# MSYS2_ARG_CONV_EXCL="*" make check
check test-no-plugins: $(PROGRAMS) $(TEST_PROGRAMS) $(BGZIP) $(TABIX)
./test/test-rbuf
./test/test-regidx
REF_PATH=: ./test/test.pl --exec bgzip=$(BGZIP) --exec tabix=$(TABIX)
REF_PATH=: ./test/test.pl --exec bgzip=$(BGZIP) --exec tabix=$(TABIX) --htsdir=$(HTSDIR) $${TEST_OPTS:-}
test-plugins: $(PROGRAMS) $(TEST_PROGRAMS) $(BGZIP) $(TABIX) plugins
check-plugins test-plugins: $(PROGRAMS) $(TEST_PROGRAMS) $(BGZIP) $(TABIX) plugins
./test/test-rbuf
./test/test-regidx
REF_PATH=: ./test/test.pl --plugins --exec bgzip=$(BGZIP) --exec tabix=$(TABIX)
REF_PATH=: ./test/test.pl --plugins --exec bgzip=$(BGZIP) --exec tabix=$(TABIX) --htsdir=$(HTSDIR) $${TEST_OPTS:-}
test/test-rbuf.o: test/test-rbuf.c rbuf.h
test/test-rbuf: test/test-rbuf.o
$(CC) $(LDFLAGS) -o $@ $^ $(ALL_LIBS)
test/test-regidx.o: test/test-regidx.c $(htslib_kstring_h) regidx.h
test/test-regidx.o: test/test-regidx.c $(htslib_kstring_h) $(htslib_hts_os_h) regidx.h
test/test-regidx: test/test-regidx.o regidx.o $(HTSLIB)
$(CC) $(ALL_LDFLAGS) -o $@ $^ $(HTSLIB) -lpthread $(HTSLIB_LIB) $(ALL_LIBS)
test/test-regidx: test/test-regidx.o regidx.o | $(HTSLIB)
$(CC) $(ALL_LDFLAGS) -o $@ $^ $(HTSLIB_LIB) -lpthread $(ALL_LIBS)
# make docs target depends the a2x asciidoc program
......@@ -282,18 +324,19 @@ install: $(PROGRAMS) $(PLUGINS)
$(INSTALL_PROGRAM) $(PROGRAMS) $(DESTDIR)$(bindir)
$(INSTALL_SCRIPT) $(MISC_SCRIPTS) $(DESTDIR)$(misc_bindir)
$(INSTALL_MAN) doc/bcftools.1 $(DESTDIR)$(man1dir)
$(INSTALL_PROGRAM) plugins/*.so $(DESTDIR)$(plugindir)
$(INSTALL_PROGRAM) plugins/*$(PLUGIN_EXT) $(DESTDIR)$(plugindir)
clean: testclean clean-plugins
-rm -f gmon.out *.o *~ $(PROGRAMS) version.h plugins/*.so plugins/*.P
-rm -rf *.dSYM plugins/*.dSYM test/*.dSYM
-rm -f gmon.out *.o *.a *~ $(PROGRAMS) version.h
-rm -rf *.dSYM test/*.dSYM
clean-plugins:
-rm -f plugins/*.so plugins/*.P
-rm -f plugins/*$(PLUGIN_EXT) plugins/*.P
-rm -rf plugins/*.dSYM
testclean:
-rm -f test/*.o test/*~ $(TEST_PROGRAMS)
-rm -f test/*.hex
distclean: clean
-rm -f config.cache config.h config.log config.mk config.status
......
## Release 1.10 (6th December 2019)
* Numerous bug fixes, usability improvements and sanity checks were added
to prevent common user errors.
* The -r, --regions (and -R, --regions-file) option should never create
unsorted VCFs or duplicates records again. This also fixes rare cases where
a spanning deletion makes a subsequent record invisible to `bcftools isec`
and other commands.
* Additions to filtering and formatting expressions
- support for the spanning deletion alternate allele (ALT=*)
- new ILEN filtering expression to be able to filter by indel length
- new MEAN, MEDIAN, MODE, STDEV, phred filtering functions
- new formatting expression %PBINOM (phred-scaled binomial probability),
%INFO (the whole INFO column), %FORMAT (the whole FORMAT column),
%END (end position of the REF allele), %END0 (0-based end position
of the REF allele), %MASK (with multiple files indicates the presence
of the site in other files)
* New plugins
- `+gvcfz`: compress gVCF file by resizing gVCF blocks according to
specified criteria
- `+indel-stats`: collect various indel-specific statistics
- `+parental-origin`: determine parental origin of a CNV region
- `+remove-overlaps`: remove overlapping variants.
- `+split-vep`: query structured annotations such INFO/CSQ created by
bcftools/csq or VEP
- `+trio-dnm`: screen variants for possible de-novo mutations in trios
* `annotate`
- new -l, --merge-logic option for combining multiple overlapping regions
* `call`
- new `bcftools call -G, --group-samples` option which allows grouping
samples into populations and applying the HWE assumption within but
not across the groups.
* `csq`
- significant reduction of memory usage in the local -l mode for VCFs
with thousands of samples and 20% reduction in the non-local
haplotype-aware mode.
- fixes a small memory leak and formatting issue in FORMAT/BCSQ at
sites with many consequences
- do not print protein sequence of start_lost events
- support for "start_retained" consequence
- support for symbolic insertions (ALT="<INS...>"), "feature_elongation"
consequence
- new -b, --brief-predictions option to output abbreviated protein
predictions.
* `concat`
- the `--naive` command now checks header compatibility when concatenating
multiple files.
* `consensus`
- add a new `-H, --haplotype 1pIu/2pIu` feature to output first/second
allele for phased genotypes and the IUPAC code for unphased genotypes
- new -p, --prefix option to add a prefix to sequence names on output
* `+contrast`
- added support for Fisher's test probability and other annotations
* `+fill-from-fasta`
- new -N, --replace-non-ACGTN option
* `+dosage`
- fix some serious bugs in dosage calculation
* `+fill-tags`
- extended to perform simple on-the-fly calculations such as calculating
INFO/DP from FORMAT/DP.
* `merge`
- add support for merging FORMAT strings
- bug fixed in gVCF merging
* `mpileup`
- a new optional SCR annotation for the number of soft-clipped reads
* `reheader`
- new -f, --fai option for updating contig lines in the VCF header
* `+trio-stats`
- extend output to include DNM homs and recurrent DNMs
* VariantKey support
## Release 1.9 (18th July 2018)
* `annotate`
......@@ -18,21 +139,22 @@
* `convert`
- the --tsv2vcf option now makes the missing genotypes diploid, "./." instead of "."
- the --tsv2vcf option now makes the missing genotypes diploid, "./."
instead of "."
- the behavior of -i/-e with --gvcf2vcf changed. Previously only sites with
FILTER set to "PASS" or "." were expanded and the -i/-e options dropped
sites completely. The new behavior is to let the -i/-e options control which
records will be expanded. In order to drop records completely, one can stream
through "bcftools view" first.
sites completely. The new behavior is to let the -i/-e options control
which records will be expanded. In order to drop records completely,
one can stream through "bcftools view" first.
* `csq`
- since the real consequence of start/splice events are not known, the aminoacid
positions at subsequent variants should stay unchanged
- since the real consequence of start/splice events are not known,
the amino acid positions at subsequent variants should stay unchanged
- add `--force` option to skip malformatted transcripts in GFFs with out-of-phase
CDS exons.
- add `--force` option to skip malformatted transcripts in GFFs with
out-of-phase CDS exons.
* `+dosage`: output all alleles and all their dosages at multiallelic sites
......@@ -52,25 +174,28 @@
- fix a parsing problem, '@' was not removed from '@filename' expressions
* `mpileup`: fixed bug where, if samples were renamed using the `-G` (`--read-groups`)
option, some samples could be omitted from the output file.
* `mpileup`: fixed bug where, if samples were renamed using the `-G`
(`--read-groups`) option, some samples could be omitted from the output file.
* `norm`: update INFO/END when normalizing indels
* `+split`: new -S option to subset samples and to use custom file names instead of the defaults
* `+split`: new -S option to subset samples and to use custom file names
instead of the defaults
* `+smpl-stats`: new plugin
* `+trio-stats`: new plugin
* Fixed build problems with non-functional configure script produced on some platforms
* Fixed build problems with non-functional configure script produced on
some platforms
## Release 1.8 (April 2018)
* `-i, -e` filtering: Support for custom perl scripts
* `+contrast`: New plugin to annotate genotype differences between groups of samples
* `+contrast`: New plugin to annotate genotype differences between groups
of samples
* `+fixploidy`: New options for simpler ploidy usage
......
......@@ -125,6 +125,7 @@ void bcf_callaux_clean(bcf_callaux_t *bca, bcf_call_t *call)
memset(bca->rev_mqs,0,sizeof(int)*bca->nqual);
if ( call->ADF ) memset(call->ADF,0,sizeof(int32_t)*(call->n+1)*B2B_MAX_ALLELES);
if ( call->ADR ) memset(call->ADR,0,sizeof(int32_t)*(call->n+1)*B2B_MAX_ALLELES);
if ( call->SCR ) memset(call->SCR,0,sizeof(*call->SCR)*(call->n+1));
}
/*
......@@ -152,6 +153,7 @@ int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t
memset(r->qsum,0,sizeof(float)*4);
memset(r->anno,0,sizeof(double)*16);
memset(r->p,0,sizeof(float)*25);
r->SCR = 0;
if (ref_base >= 0) {
ref4 = seq_nt16_int[ref_base];
......@@ -199,6 +201,7 @@ int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t
if (q > 63) q = 63;
if (q < 4) q = 4; // MQ=0 reads count as BQ=4
bca->bases[n++] = q<<5 | (int)bam_is_rev(p->b)<<4 | b;
if ( bca->fmt_flag&(B2B_INFO_SCR|B2B_FMT_SCR) && PLP_HAS_SOFT_CLIP(p->cd.i) ) r->SCR++;
// collect annotations
if (b < 4)
{
......@@ -225,8 +228,12 @@ int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t
// collect for bias tests
if ( baseQ > 59 ) baseQ = 59;
if ( mapQ > 59 ) mapQ = 59;
int len, pos = get_position(p, &len);
int epos = (double)pos/(len+1) * bca->npos;
int len, epos = 0;
if ( bca->fmt_flag & (B2B_INFO_RPB|B2B_INFO_VDB) )
{
int pos = get_position(p, &len);
epos = (double)pos/(len+1) * bca->npos;
}
int ibq = baseQ/60. * bca->nqual;
int imq = mapQ/60. * bca->nqual;
if ( bam_is_rev(p->b) ) bca->rev_mqs[imq]++;
......@@ -650,6 +657,14 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int
call->DP4[4*i+3] = calls[i].anno[3];
}
}
if ( call->SCR )
{
for (i=0; i<n; i++)
{
call->SCR[0] += calls[i].SCR;
call->SCR[1+i] = calls[i].SCR;
}
}
if ( call->ADF )
{
assert( call->n_alleles<=B2B_MAX_ALLELES ); // this is always true for SNPs and so far for indels as well
......@@ -702,19 +717,23 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int
// calc_chisq_bias("XMQ", call->bcf_hdr->id[BCF_DT_CTG][call->tid].key, call->pos, bca->ref_mq, bca->alt_mq, bca->nqual);
// calc_chisq_bias("XBQ", call->bcf_hdr->id[BCF_DT_CTG][call->tid].key, call->pos, bca->ref_bq, bca->alt_bq, bca->nqual);
call->mwu_pos = calc_mwu_bias(bca->ref_pos, bca->alt_pos, bca->npos);
if ( bca->fmt_flag & B2B_INFO_RPB )
call->mwu_pos = calc_mwu_bias(bca->ref_pos, bca->alt_pos, bca->npos);
call->mwu_mq = calc_mwu_bias(bca->ref_mq, bca->alt_mq, bca->nqual);
call->mwu_bq = calc_mwu_bias(bca->ref_bq, bca->alt_bq, bca->nqual);
call->mwu_mqs = calc_mwu_bias(bca->fwd_mqs, bca->rev_mqs, bca->nqual);
#if CDF_MWU_TESTS
call->mwu_pos_cdf = calc_mwu_bias_cdf(bca->ref_pos, bca->alt_pos, bca->npos);
// CDF version of MWU tests is not calculated by default
if ( bca->fmt_flag & B2B_INFO_RPB )
call->mwu_pos_cdf = calc_mwu_bias_cdf(bca->ref_pos, bca->alt_pos, bca->npos);
call->mwu_mq_cdf = calc_mwu_bias_cdf(bca->ref_mq, bca->alt_mq, bca->nqual);
call->mwu_bq_cdf = calc_mwu_bias_cdf(bca->ref_bq, bca->alt_bq, bca->nqual);
call->mwu_mqs_cdf = calc_mwu_bias_cdf(bca->fwd_mqs, bca->rev_mqs, bca->nqual);
#endif
call->vdb = calc_vdb(bca->alt_pos, bca->npos);
if ( bca->fmt_flag & B2B_INFO_VDB )
call->vdb = calc_vdb(bca->alt_pos, bca->npos);
return 0;
}
......@@ -790,6 +809,8 @@ int bcf_call2bcf(bcf_call_t *bc, bcf1_t *rec, bcf_callret1_t *bcr, int fmt_flag,
if ( fmt_flag&B2B_INFO_DPR )
bcf_update_info_int32(hdr, rec, "DPR", bc->ADF, rec->n_allele);
}
if ( fmt_flag&B2B_INFO_SCR )
bcf_update_info_int32(hdr, rec, "SCR", bc->SCR, 1);
float tmpf[16];
for (i=0; i<16; i++) tmpf[i] = bc->anno[i];
......@@ -861,6 +882,8 @@ int bcf_call2bcf(bcf_call_t *bc, bcf1_t *rec, bcf_callret1_t *bcr, int fmt_flag,
if ( fmt_flag&B2B_FMT_DPR )
bcf_update_format_int32(hdr, rec, "DPR", bc->ADF+B2B_MAX_ALLELES, rec->n_sample*rec->n_allele);
}
if ( fmt_flag&B2B_FMT_SCR )
bcf_update_format_int32(hdr, rec, "SCR", bc->SCR+1, rec->n_sample);
return 0;
}
......@@ -55,10 +55,18 @@ DEALINGS IN THE SOFTWARE. */
#define B2B_INFO_AD (1<<9)
#define B2B_INFO_ADF (1<<10)
#define B2B_INFO_ADR (1<<11)
#define B2B_INFO_SCR (1<<12)
#define B2B_FMT_SCR (1<<13)
#define B2B_INFO_VDB (1<<14)
#define B2B_INFO_RPB (1<<15)
#define B2B_MAX_ALLELES 5
#define PLP_HAS_SOFT_CLIP(i) ((i)&1)
#define PLP_SAMPLE_ID(i) ((i)>>1)
typedef struct __bcf_callaux_t {
int fmt_flag;
int capQ, min_baseQ;
int openQ, extQ, tandemQ; // for indels
uint32_t min_support, max_support; // for collecting indel candidates
......@@ -77,10 +85,11 @@ typedef struct __bcf_callaux_t {
void *rghash;
} bcf_callaux_t;
// per-sample values
typedef struct {
uint32_t ori_depth;
unsigned int mq0;
int32_t *ADF, *ADR;
int32_t *ADF, *ADR, SCR;
float qsum[4];
// The fields are:
// depth fwd .. ref (0) and non-ref (2)
......@@ -98,6 +107,7 @@ typedef struct {
float p[25]; // phred-scaled likelihood of each genotype
} bcf_callret1_t;
// values for all samples
typedef struct {
int tid, pos;
bcf_hdr_t *bcf_hdr;
......@@ -107,7 +117,7 @@ typedef struct {
int n_supp; // number of supporting non-reference reads
double anno[16];
unsigned int depth, ori_depth, mq0;
int32_t *PL, *DP4, *ADR, *ADF;
int32_t *PL, *DP4, *ADR, *ADF, *SCR;
uint8_t *fmt_arr;
float vdb; // variant distance bias
float mwu_pos, mwu_mq, mwu_bq, mwu_mqs;
......
......@@ -39,7 +39,15 @@ THE SOFTWARE. */
#define FT_STDIN (1<<3)
char *bcftools_version(void);
/// Report an error and exit -1
void error(const char *format, ...) HTS_NORETURN HTS_FORMAT(HTS_PRINTF_FMT, 1, 2);
/// Report an error and exit -1. If errno != 0, appends strerror(errno).
// Note: unlike error() above, the message should not end with "\n" as a
// newline will be added by the function.
void error_errno(const char *format, ...) HTS_NORETURN HTS_FORMAT(HTS_PRINTF_FMT, 1, 2);
void bcf_hdr_append_version(bcf_hdr_t *hdr, int argc, char **argv, const char *cmd);
const char *hts_bcf_wmode(int file_type);
......
......@@ -49,12 +49,35 @@ typedef struct
}
family_t;
// For the single-sample and grouped -G calling
typedef struct
{
float *qsum; // QS(quality sum) values
int nqsum, dp;
double fa,fb,fc,fa2,fb2,fc2,fab,fac,fbc;
}
grp1_t;
typedef struct
{
grp1_t *grp;
int ngrp;
int *smpl2grp;
}
grp_t;
// For the `-C alleles -i` constrained calling
typedef struct
{
uint32_t n:31, used:1;
char **allele;
}
tgt_als_t;
typedef struct _ccall_t ccall_t;
typedef struct
{
// mcall only
float *qsum; // QS(sum) values
int nqsum, npdg;
int npdg;
int *als_map, nals_map; // mapping from full set of alleles to trimmed set of alleles (old -> new)
int *pl_map, npl_map; // same as above for PLs, but reverse (new -> old)
char **als; // array to hold the trimmed set of alleles to appear on output
......@@ -65,14 +88,19 @@ typedef struct
uint16_t *trio[5][5]; // family type, second index: allele count (2-4, first two are unused)
double *GLs;
float *GPs; // FORMAT/GP: posterior probabilities
int32_t *GQs; // FORMAT/GQ: genotype qualities
int32_t *GQs, *ADs; // FORMAT/GQ: genotype qualities; AD: allelic depth for -G
int32_t *itmp; // temporary int array, used for new PLs with CALL_CONSTR_ALLELES
int n_itmp, nGPs;
int n_itmp, nGPs, nADs;
vcmp_t *vcmp;
double trio_Pm_SNPs, trio_Pm_del, trio_Pm_ins; // P(mendelian) for trio calling, see mcall_call_trio_genotypes()
int32_t *ugts, *cgts; // unconstraind and constrained GTs
uint32_t output_tags;
char *prior_AN, *prior_AC; // reference panel AF tags (AF=AC/AN)
tgt_als_t *tgt_als; // for CALL_CONSTR_ALLELES
char *sample_groups; // for single-sample or grouped calling with -G
grp_t smpl_grp;
float *qsum;
int nqsum;
// ccall only
double indel_frac, min_perm_p, min_lrt;
......
/*
Copyright (C) 2019 Genome Research Ltd.
Author: Petr Danecek <pd3@sanger.ac.uk>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#include <string.h>
#include "cols.h"
cols_t *cols_split(const char *line, cols_t *cols, char delim)
{
if ( !cols ) cols = (cols_t*) calloc(1,sizeof(cols_t));
if ( cols->rmme ) free(cols->rmme);
cols->n = 0;
cols->rmme = strdup(line);
char *ss = cols->rmme;
while (1)
{
char *se = ss;
while ( *se && *se!=delim ) se++;
char tmp = *se;
*se = 0;
cols->n++;
if ( cols->n > cols->m )
{
cols->m += 10;
cols->off = (char**) realloc(cols->off, sizeof(*cols->off)*cols->m);
}
cols->off[ cols->n - 1 ] = ss;
if ( !tmp ) break;
ss = se + 1;
}
return cols;
}
void cols_append(cols_t *cols, char *str)
{
if ( cols->rmme )
{
size_t str_len = strlen(str);
size_t lst_len = strlen(cols->off[ cols->n - 1 ]);
size_t tot_len = 2 + str_len + lst_len + (cols->off[ cols->n - 1 ] - cols->rmme);
cols_t *tmp_cols = (cols_t*)calloc(1,sizeof(cols_t));
tmp_cols->rmme = (char*) calloc(tot_len,1);
tmp_cols->off = (char**) calloc(cols->n+1,sizeof(*tmp_cols->off));
char *ptr = tmp_cols->rmme;
int i;
for (i=0; i<cols->n; i++)
{
size_t len = strlen(cols->off[i]);
memcpy(ptr, cols->off[i], len);
tmp_cols->off[i] = ptr;
ptr += len + 1;
}
memcpy(ptr, str, str_len);
tmp_cols->off[i] = ptr;
free(cols->off);
free(cols->rmme);
cols->rmme = tmp_cols->rmme;
cols->off = tmp_cols->off;
cols->n = cols->n+1;
cols->m = cols->n;
free(tmp_cols);
return;
}
cols->n++;
if ( cols->n > cols->m )
{
cols->m++;
cols->off = (char**) realloc(cols->off,sizeof(*cols->off)*cols->m);
}
cols->off[cols->n-1] = str;
}
void cols_clear(cols_t *cols)
{
if ( !cols ) return;
free(cols->rmme);
free(cols->off);
cols->rmme = NULL;
cols->off = NULL;
}
void cols_destroy(cols_t *cols)
{
if ( !cols ) return;
cols_clear(cols);
free(cols);
}
/*
Copyright (C) 2019 Genome Research Ltd.
Author: Petr Danecek <pd3@sanger.ac.uk>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#ifndef __COLS_H__
#define __COLS_H__
#include <stdlib.h>
typedef struct
{
int n,m;
char **off, *rmme;
}
cols_t;
/*
cols_split() can be called repeatedly to split new strings, memory is allocated
and deallocated automatically
*/
cols_t *cols_split(const char *line, cols_t *cols, char delim);
/*
Although cols_append() can be combined with cols_split(), it is much slower and
the string must exist throughout the life of cols unless initialized with cols_split().
*/
void cols_append(cols_t *cols, char *str);
void cols_clear(cols_t *cols);
void cols_destroy(cols_t *cols);
#endif
......@@ -50,7 +50,8 @@ PERL_LIBS = @PERL_LIBS@
PLATFORM = @PLATFORM@
PLUGINS_ENABLED = @enable_bcftools_plugins@
plugindir = @bcf_plugindir@
plugindir = @bcf_plugindir@
pluginpath = @bcf_pluginpath@
PLUGIN_EXT = @PLUGIN_EXT@
@Hsource@HTSDIR = @HTSDIR@
......@@ -58,9 +59,12 @@ PLUGIN_EXT = @PLUGIN_EXT@
@Hsource@include $(HTSDIR)/htslib_static.mk
@Hsource@HTSLIB = $(HTSDIR)/libhts.a
@Hsource@HTSLIB_LIB = $(HTSLIB) $(HTSLIB_static_LIBS)
@Hsource@HTSLIB_DLL = $(HTSDIR)/@HTSLIB_DLL@
@Hsource@HTSLIB_LDFLAGS = $(HTSLIB_static_LDFLAGS)
@Hsource@W32_PLUGIN_LIBS = libbcftools.a $(HTSLIB_DLL) $(ALL_LIBS)
@Hsource@BGZIP = $(HTSDIR)/bgzip
@Hsource@TABIX = $(HTSDIR)/tabix
HTSLIB_CPPFLAGS = @HTSLIB_CPPFLAGS@
@Hinstall@HTSLIB_LDFLAGS = @HTSLIB_LDFLAGS@
@Hinstall@HTSLIB_LIB = -lhts
@Hinstall@W32_PLUGIN_LIBS = libbcftools.a $(HTSLIB_LDFLAGS) $(HTSLIB_LIB) $(ALL_LIBS)
......@@ -71,11 +71,11 @@ AC_ARG_WITH([bcf-plugin-path],
no) with_bcf_plugin_path= ;;
esac],
[with_bcf_plugin_path=$with_bcf_plugin_dir])
AC_SUBST([PLUGINPATH], $with_bcf_plugin_path)
AC_SUBST([bcf_pluginpath], $with_bcf_plugin_path)
AC_ARG_ENABLE([perl-filters],
[AS_HELP_STRING([--enable-perl-filters],
[do not build support for PERL scripts in -i/-e filtering expressions])],
[build in support for PERL scripts in -i/-e filtering expressions])],
[], [enable_perl_filters=no])
AC_SUBST(enable_perl_filters)
......@@ -120,8 +120,20 @@ AS_IF([test "$enable_bcftools_plugins" != "no"], [dnl
[*-cygwin* | *-CYGWIN*],[dnl
host_result="Cygwin DLL"
PLATFORM=CYGWIN
PLUGIN_EXT=.cygdll],
PLUGIN_EXT=.cygdll
HTSLIB_DLL=libhts.dll.a
],
[*-msys* | *-MSYS* | *-mingw* | *-MINGW*],[dnl
host_result="MSYS dll"
PLATFORM=MSYS
PLUGIN_EXT=.dll
HTSLIB_DLL=hts.dll.a
# This also sets __USE_MINGW_ANSI_STDIO which in turn makes PRId64,
# %lld and %z printf formats work. It also enforces the snprintf to
# be C99 compliant so it returns the correct values (in kstring.c).
CPPFLAGS="$CPPCFLAGS -D_XOPEN_SOURCE=600"],
[*-darwin* | *-Darwin*],[dnl
host_result="Darwin dylib"
PLATFORM=Darwin
......@@ -136,15 +148,24 @@ AS_IF([test "$enable_bcftools_plugins" != "no"], [dnl
AC_SUBST([PLUGIN_EXT])
AC_DEFINE_UNQUOTED([PLUGIN_EXT], ["$PLUGIN_EXT"],
[Platform-dependent plugin filename extension.])
AC_SUBST([HTSLIB_DLL])
AC_DEFINE([ENABLE_BCF_PLUGINS], 1,
[Define if BCFtools should enable plugins.])
AC_SEARCH_LIBS([dlopen], [dl], [],
[AC_MSG_ERROR([dlopen() not found
AS_CASE([$PLUGIN_EXT],
[*cygdll | *so],
[AC_SEARCH_LIBS([dlopen], [dl], [],
[AC_MSG_ERROR([dlopen() not found
Plugin support requires dynamic linking facilities from the operating system.
Either configure with --disable-bcftools-plugins or resolve this error to
build bcftools.])])],
[*dll],
[AC_SEARCH_LIBS([LoadLibraryA], [], [],
[AC_MSG_ERROR([LoadLibraryA() not found
Plugin support requires dynamic linking facilities from the operating system.
Either configure with --disable-bcftools-plugins or resolve this error to
build bcftools.])])
build bcftools.])])],
)
AC_MSG_CHECKING([if the compiler accepts -rdynamic])
save_CFLAGS=$CFLAGS
......@@ -156,7 +177,6 @@ build bcftools.])])
[CC_RDYNAMIC_OPT=-rdynamic],[CC_RDYNAMIC_OPT=])
AC_SUBST([CC_RDYNAMIC_OPT])
])
save_LIBS=$LIBS
zlib_devel=ok
dnl Set a trivial non-empty INCLUDES to avoid excess default includes tests
......@@ -226,6 +246,23 @@ AS_IF([test "$enable_perl_filters" != "no" ], [dnl
dnl Apply value from HTS_PROG_CC_WERROR (if set)
AS_IF([test "x$hts_late_cflags" != x],[CFLAGS="$CFLAGS $hts_late_cflags"])
dnl Look for regcomp in various libraries (needed on windows/mingw).
AC_SEARCH_LIBS(regcomp, regex, [libregex=needed], [])
dnl Force POSIX mode on Windows/Mingw
test -n "$host_alias" || host_alias=unknown-`uname -s`
case $host_alias in
*-msys* | *-MSYS* | *-mingw* | *-MINGW*)
host_result="MSYS dll"
PLATFORM=MSYS
PLUGIN_EXT=.dll
# This also sets __USE_MINGW_ANSI_STDIO which in turn makes PRId64,
# %lld and %z printf formats work. It also enforces the snprintf to
# be C99 compliant so it returns the correct values (in kstring.c).
CPPFLAGS="$CPPCFLAGS -D_XOPEN_SOURCE=600"
;;
esac
AC_CONFIG_FILES([config.mk])
AC_OUTPUT
......
......@@ -50,6 +50,7 @@
#define PICK_ALT 2
#define PICK_LONG 4
#define PICK_SHORT 8
#define PICK_IUPAC 16
typedef struct
{
......@@ -76,11 +77,12 @@ typedef struct
int fa_src_pos; // last genomic coordinate read from the input fasta (0-based)
char prev_base; // this is only to validate the REF allele in the VCF - the modified fa_buf cannot be used for inserts following deletions, see 600#issuecomment-383186778
int prev_base_pos; // the position of prev_base
int prev_is_insert;
rbuf_t vcf_rbuf;
bcf1_t **vcf_buf;
int nvcf_buf, rid;
char *chr;
char *chr, *chr_prefix;
regidx_t *mask;
regitr_t *itr;
......@@ -98,7 +100,7 @@ typedef struct
FILE *fp_out;
FILE *fp_chain;
char **argv;
int argc, output_iupac, haplotype, allele, isample;
int argc, output_iupac, haplotype, allele, isample, napplied;
char *fname, *ref_fname, *sample, *output_fname, *mask_fname, *chain_fname, missing_allele;
}
args_t;
......@@ -207,7 +209,7 @@ static void init_data(args_t *args)
{
args->files = bcf_sr_init();
args->files->require_index = 1;
if ( !bcf_sr_add_reader(args->files,args->fname) ) error("Failed to open %s: %s\n", args->fname, bcf_sr_strerror(args->files->errnum));
if ( !bcf_sr_add_reader(args->files,args->fname) ) error("Failed to read from %s: %s\n", !strcmp("-",args->fname)?"standard input":args->fname, bcf_sr_strerror(args->files->errnum));
args->hdr = args->files->readers[0].header;
args->isample = -1;
if ( args->sample )
......@@ -299,7 +301,7 @@ static void init_region(args_t *args, char *line)
args->vcf_rbuf.n = 0;
bcf_sr_seek(args->files,line,args->fa_ori_pos);
if ( tmp_ptr ) *tmp_ptr = tmp;
fprintf(args->fp_out,">%s\n",line);
fprintf(args->fp_out,">%s%s\n",args->chr_prefix?args->chr_prefix:"",line);
if (args->chain_fname )
{
args->chain = init_chain(args->chain, args->fa_ori_pos);
......@@ -331,7 +333,7 @@ static void unread_vcf_line(args_t *args, bcf1_t **rec_ptr)
{
bcf1_t *rec = *rec_ptr;
if ( args->vcf_rbuf.n >= args->vcf_rbuf.m )
error("FIXME: too many overlapping records near %s:%d\n", bcf_seqname(args->hdr,rec),rec->pos+1);
error("FIXME: too many overlapping records near %s:%"PRId64"\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
// Insert the new record in the buffer. The line would be overwritten in
// the next bcf_sr_next_line call, therefore we need to swap it with an
......@@ -395,9 +397,18 @@ static void apply_variant(args_t *args, bcf1_t *rec)
if ( !fmt ) return;
if ( fmt->type!=BCF_BT_INT8 )
error("Todo: GT field represented with BCF_BT_INT8, too many alleles at %s:%d?\n",bcf_seqname(args->hdr,rec),rec->pos+1);
error("Todo: GT field represented with BCF_BT_INT8, too many alleles at %s:%"PRId64"?\n",bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
uint8_t *ptr = fmt->p + fmt->size*args->isample;
if ( args->haplotype )
enum { use_hap, use_iupac, pick_one } action = use_hap;
if ( args->allele==PICK_IUPAC )
{
if ( !bcf_gt_is_phased(ptr[0]) && !bcf_gt_is_phased(ptr[fmt->n-1]) ) action = use_iupac;
}
else if ( args->output_iupac ) action = use_iupac;
else if ( !args->haplotype ) action = pick_one;
if ( action==use_hap )
{
if ( args->haplotype > fmt->n )
{
......@@ -410,7 +421,7 @@ static void apply_variant(args_t *args, bcf1_t *rec)
{
if ( !warned_haplotype )
{
fprintf(stderr, "Can't apply %d-th haplotype at %s:%d. (This warning is printed only once.)\n", args->haplotype,bcf_seqname(args->hdr,rec),rec->pos+1);
fprintf(stderr, "Can't apply %d-th haplotype at %s:%"PRId64". (This warning is printed only once.)\n", args->haplotype,bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
warned_haplotype = 1;
}
return;
......@@ -428,7 +439,7 @@ static void apply_variant(args_t *args, bcf1_t *rec)
ialt = bcf_gt_allele(ialt);
}
}
else if ( args->output_iupac )
else if ( action==use_iupac )
{
ialt = ptr[0];
if ( bcf_gt_is_missing(ialt) || ialt==bcf_int32_vector_end )
......@@ -456,7 +467,7 @@ static void apply_variant(args_t *args, bcf1_t *rec)
if ( ialt>=0 )
{
if ( rec->n_allele <= ialt || rec->n_allele <= jalt ) error("Invalid VCF, too few ALT alleles at %s:%d\n", bcf_seqname(args->hdr,rec),rec->pos+1);
if ( rec->n_allele <= ialt || rec->n_allele <= jalt ) error("Invalid VCF, too few ALT alleles at %s:%"PRId64"\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
if ( ialt!=jalt && !rec->d.allele[ialt][1] && !rec->d.allele[jalt][1] ) // is this a het snp?
{
char ial = rec->d.allele[ialt][0];
......@@ -488,7 +499,7 @@ static void apply_variant(args_t *args, bcf1_t *rec)
{
if ( ptr[i]==(uint8_t)bcf_int8_vector_end ) break;
jalt = bcf_gt_allele(ptr[i]);
if ( rec->n_allele <= jalt ) error("Broken VCF, too few alts at %s:%d\n", bcf_seqname(args->hdr,rec),rec->pos+1);
if ( rec->n_allele <= jalt ) error("Broken VCF, too few alts at %s:%"PRId64"\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
if ( args->allele & (PICK_LONG|PICK_SHORT) )
{
int len = jalt==0 ? rec->rlen : strlen(rec->d.allele[jalt]);
......@@ -510,7 +521,7 @@ static void apply_variant(args_t *args, bcf1_t *rec)
}
}
if ( !ialt ) return; // ref allele
if ( rec->n_allele <= ialt ) error("Broken VCF, too few alts at %s:%d\n", bcf_seqname(args->hdr,rec),rec->pos+1);
if ( rec->n_allele <= ialt ) error("Broken VCF, too few alts at %s:%"PRId64"\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
}
else if ( args->output_iupac && !rec->d.allele[0][1] && !rec->d.allele[1][1] )
{
......@@ -531,18 +542,29 @@ static void apply_variant(args_t *args, bcf1_t *rec)
ialt = 1;
}
// Overlapping variant? Can be still OK iff this is an insertion
if ( rec->pos <= args->fa_frz_pos && (rec->pos!=args->fa_frz_pos || rec->d.allele[0][0]!=rec->d.allele[ialt][0]) )
// Overlapping variant?
if ( rec->pos <= args->fa_frz_pos )
{
fprintf(stderr,"The site %s:%d overlaps with another variant, skipping...\n", bcf_seqname(args->hdr,rec),rec->pos+1);
return;
// Can be still OK iff this is an insertion (and which does not follow another insertion, see #888).
// This still may not be enough for more complicated cases with multiple duplicate positions
// and other types in between. In such case let the user normalize the VCF and remove duplicates.
int overlap = 0;
if ( rec->pos < args->fa_frz_pos || !(bcf_get_variant_type(rec,ialt) & VCF_INDEL) ) overlap = 1;
else if ( rec->d.var[ialt].n <= 0 || args->prev_is_insert ) overlap = 1;
if ( overlap )
{
fprintf(stderr,"The site %s:%"PRId64" overlaps with another variant, skipping...\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
return;
}
}
int len_diff = 0, alen = 0;
int idx = rec->pos - args->fa_ori_pos + args->fa_mod_off;
if ( idx<0 )
{
fprintf(stderr,"Warning: ignoring overlapping variant starting at %s:%d\n", bcf_seqname(args->hdr,rec),rec->pos+1);
fprintf(stderr,"Warning: ignoring overlapping variant starting at %s:%"PRId64"\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
return;
}
if ( rec->rlen > args->fa_buf.l - idx )
......@@ -552,17 +574,17 @@ static void apply_variant(args_t *args, bcf1_t *rec)
if ( alen > rec->rlen )
{
rec->d.allele[ialt][rec->rlen] = 0;
fprintf(stderr,"Warning: trimming variant starting at %s:%d\n", bcf_seqname(args->hdr,rec),rec->pos+1);
fprintf(stderr,"Warning: trimming variant starting at %s:%"PRId64"\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
}
}
if ( idx>=args->fa_buf.l )
error("FIXME: %s:%d .. idx=%d, ori_pos=%d, len=%"PRIu64", off=%d\n",bcf_seqname(args->hdr,rec),rec->pos+1,idx,args->fa_ori_pos,(uint64_t)args->fa_buf.l,args->fa_mod_off);
error("FIXME: %s:%"PRId64" .. idx=%d, ori_pos=%d, len=%"PRIu64", off=%d\n",bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1,idx,args->fa_ori_pos,(uint64_t)args->fa_buf.l,args->fa_mod_off);
// sanity check the reference base
if ( rec->d.allele[ialt][0]=='<' )
{
if ( strcasecmp(rec->d.allele[ialt], "<DEL>") )
error("Symbolic alleles other than <DEL> are currently not supported: %s at %s:%d\n",rec->d.allele[ialt],bcf_seqname(args->hdr,rec),rec->pos+1);
error("Symbolic alleles other than <DEL> are currently not supported: %s at %s:%"PRId64"\n",rec->d.allele[ialt],bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
assert( rec->d.allele[0][1]==0 ); // todo: for now expecting strlen(REF) = 1
len_diff = 1-rec->rlen;
rec->d.allele[ialt] = rec->d.allele[0]; // according to VCF spec, REF must precede the event
......@@ -570,7 +592,7 @@ static void apply_variant(args_t *args, bcf1_t *rec)
}
else if ( strncasecmp(rec->d.allele[0],args->fa_buf.s+idx,rec->rlen) )
{
// This is hacky, handle a special case: if insert follows a deletion (AAC>A, C>CAA),
// This is hacky, handle a special case: if SNP or an insert follows a deletion (AAC>A, C>CAA),
// the reference base in fa_buf is lost and the check fails. We do not keep a buffer
// with the original sequence as it should not be necessary, we should encounter max
// one base overlap
......@@ -591,11 +613,11 @@ static void apply_variant(args_t *args, bcf1_t *rec)
args->fa_buf.s[idx+rec->rlen] = 0;
}
error(
"The fasta sequence does not match the REF allele at %s:%d:\n"
" .vcf: [%s]\n"
"The fasta sequence does not match the REF allele at %s:%"PRId64":\n"
" .vcf: [%s] <- (REF)\n"
" .vcf: [%s] <- (ALT)\n"
" .fa: [%s]%c%s\n",
bcf_seqname(args->hdr,rec),rec->pos+1, rec->d.allele[0], rec->d.allele[ialt], args->fa_buf.s+idx,
bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1, rec->d.allele[0], rec->d.allele[ialt], args->fa_buf.s+idx,
tmp?tmp:' ',tmp?args->fa_buf.s+idx+rec->rlen+1:""
);
}
......@@ -618,19 +640,31 @@ static void apply_variant(args_t *args, bcf1_t *rec)
// deletion or same size event
for (i=0; i<alen; i++)
args->fa_buf.s[idx+i] = rec->d.allele[ialt][i];
if ( len_diff )
{
args->prev_base = rec->d.allele[0][rec->rlen - 1];
args->prev_base_pos = rec->pos + rec->rlen - 1;
memmove(args->fa_buf.s+idx+alen,args->fa_buf.s+idx+rec->rlen,args->fa_buf.l-idx-rec->rlen);
}
args->prev_base = rec->d.allele[0][rec->rlen - 1];
args->prev_base_pos = rec->pos + rec->rlen - 1;
args->prev_is_insert = 0;
}
else
{
args->prev_is_insert = 1;
args->prev_base_pos = rec->pos;
// insertion
ks_resize(&args->fa_buf, args->fa_buf.l + len_diff);
memmove(args->fa_buf.s + idx + rec->rlen + len_diff, args->fa_buf.s + idx + rec->rlen, args->fa_buf.l - idx - rec->rlen);
for (i=0; i<alen; i++)
// This can get tricky, make sure the bases unchanged by the insertion do not overwrite preceeding variants.
// For example, here we want to get TAA:
// POS REF ALT
// 1 C T
// 1 C CAA
int ibeg = 0;
while ( ibeg<alen && rec->d.allele[0][ibeg]==rec->d.allele[ialt][ibeg] && rec->pos + ibeg <= args->prev_base_pos ) ibeg++;
for (i=ibeg; i<alen; i++)
args->fa_buf.s[idx+i] = rec->d.allele[ialt][i];
}
if (args->chain && len_diff != 0)
......@@ -650,6 +684,7 @@ static void apply_variant(args_t *args, bcf1_t *rec)
args->fa_buf.l += len_diff;
args->fa_mod_off += len_diff;
args->fa_frz_pos = rec->pos + rec->rlen - 1;
args->napplied++;
}
......@@ -755,6 +790,7 @@ static void consensus(args_t *args)
flush_fa_buffer(args, 0);
bgzf_close(fasta);
free(str.s);
fprintf(stderr,"Applied %d variants\n", args->napplied);
}
static void usage(args_t *args)
......@@ -772,17 +808,19 @@ static void usage(args_t *args)
fprintf(stderr, " -f, --fasta-ref <file> reference sequence in fasta format\n");
fprintf(stderr, " -H, --haplotype <which> choose which allele to use from the FORMAT/GT field, note\n");
fprintf(stderr, " the codes are case-insensitive:\n");
fprintf(stderr, " 1: first allele from GT\n");
fprintf(stderr, " 2: second allele\n");
fprintf(stderr, " 1: first allele from GT, regardless of phasing\n");
fprintf(stderr, " 2: second allele from GT, regardless of phasing\n");
fprintf(stderr, " R: REF allele in het genotypes\n");
fprintf(stderr, " A: ALT allele\n");
fprintf(stderr, " LR,LA: longer allele and REF/ALT if equal length\n");
fprintf(stderr, " SR,SA: shorter allele and REF/ALT if equal length\n");
fprintf(stderr, " 1pIu,2pIu: first/second allele for phased and IUPAC code for unphased GTs\n");
fprintf(stderr, " -i, --include <expr> select sites for which the expression is true (see man page for details)\n");
fprintf(stderr, " -I, --iupac-codes output variants in the form of IUPAC ambiguity codes\n");
fprintf(stderr, " -m, --mask <file> replace regions with N\n");
fprintf(stderr, " -M, --missing <char> output <char> instead of skipping the missing genotypes\n");
fprintf(stderr, " -o, --output <file> write output to a file [standard output]\n");
fprintf(stderr, " -p, --prefix <string> prefix to add to output sequence names\n");
fprintf(stderr, " -s, --sample <name> apply variants of the given sample\n");
fprintf(stderr, "Examples:\n");
fprintf(stderr, " # Get the consensus for one region. The fasta header lines are then expected\n");
......@@ -809,13 +847,15 @@ int main_consensus(int argc, char *argv[])
{"mask",1,0,'m'},
{"missing",1,0,'M'},
{"chain",1,0,'c'},
{"prefix",required_argument,0,'p'},
{0,0,0,0}
};
int c;
while ((c = getopt_long(argc, argv, "h?s:1Ii:e:H:f:o:m:c:M:",loptions,NULL)) >= 0)
while ((c = getopt_long(argc, argv, "h?s:1Ii:e:H:f:o:m:c:M:p:",loptions,NULL)) >= 0)
{
switch (c)
{
case 'p': args->chr_prefix = optarg; break;
case 's': args->sample = optarg; break;
case 'o': args->output_fname = optarg; break;
case 'I': args->output_iupac = 1; break;
......@@ -837,10 +877,14 @@ int main_consensus(int argc, char *argv[])
else if ( !strcasecmp(optarg,"LA") ) args->allele |= PICK_LONG|PICK_ALT;
else if ( !strcasecmp(optarg,"SR") ) args->allele |= PICK_SHORT|PICK_REF;
else if ( !strcasecmp(optarg,"SA") ) args->allele |= PICK_SHORT|PICK_ALT;
else if ( !strcasecmp(optarg,"1pIu") ) args->allele |= PICK_IUPAC, args->haplotype = 1;
else if ( !strcasecmp(optarg,"2pIu") ) args->allele |= PICK_IUPAC, args->haplotype = 2;
else
{
args->haplotype = optarg[0] - '0';
if ( args->haplotype <=0 ) error("Expected positive integer with --haplotype\n");
char *tmp;
args->haplotype = strtol(optarg, &tmp, 10);
if ( tmp==optarg || *tmp ) error("Error: Could not parse --haplotype %s, expected numeric argument\n", optarg);
if ( args->haplotype <=0 ) error("Error: Expected positive integer with --haplotype\n");
}
break;
default: usage(args); break;
......
This diff is collapsed.
This diff is collapsed.
bcftools (1.10-1) unstable; urgency=medium
* New upstream release.
* Restore i386 as libhts/tabix has restored it.
* Add myself as an uploader
-- Michael R. Crusoe <michael.crusoe@gmail.com> Mon, 16 Dec 2019 16:17:57 +0100
bcftools (1.9-2) unstable; urgency=medium
* Team upload.
......
Source: bcftools
Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.org>
Uploaders: Michael R. Crusoe <michael.crusoe@gmail.com>
Section: science
Priority: optional
Build-Depends: debhelper (>= 11~),
zlib1g-dev,
libbz2-dev,
liblzma-dev,
libhts-dev (>= 1.5-3),
libhts-dev (>= 1.10),
libgsl-dev,
tabix <!nocheck>,
libio-pty-perl <!nocheck>
......@@ -16,7 +17,7 @@ Vcs-Git: https://salsa.debian.org/med-team/bcftools.git
Homepage: http://samtools.github.io/bcftools/
Package: bcftools
Architecture: any-amd64 arm64 armel armhf mips mips64el mipsel ppc64el s390x alpha hppa ia64 m68k powerpc powerpcspe ppc64 riscv64 sh4 sparc64 x32
Architecture: any
Depends: ${shlibs:Depends},
${misc:Depends},
${perl:Depends}
......
......@@ -9,9 +9,9 @@ Description: Skip single test causing new failures
.
FIXME: This workaround needs to be reconsidered in future versions
--- a/test/test.pl
+++ b/test/test.pl
@@ -132,7 +132,7 @@ test_vcf_query($opts,in=>'view.filter',o
--- bcftools.orig/test/test.pl
+++ bcftools/test/test.pl
@@ -143,7 +143,7 @@
test_vcf_query($opts,in=>'query.filter.3',out=>'query.51.out',args=>q[-f'[\\t%GT\\n]\\n' -i'GT~"1" && GT~"2"']);
test_vcf_query($opts,in=>'query.filter.3',out=>'query.52.out',args=>q[-f'[\\t%GT\\n]\\n' -i'GT~"1" & GT~"2"']);
test_vcf_query($opts,in=>'query.filter.3',out=>'query.53.out',args=>q[-f'%POS[\\t%GT]\\n' -i'COUNT(GT="het")=1']);
......
......@@ -8,14 +8,14 @@ Description: Set the bcftools plugin path to search system directories
Author: Afif Elghraoui <afif@ghraoui.name>
Forwarded: not-needed
Last-Update: 2015-11-09
--- a/test/test.pl
+++ b/test/test.pl
@@ -932,7 +932,7 @@ sub test_vcf_plugin
--- bcftools.orig/test/test.pl
+++ bcftools/test/test.pl
@@ -1152,7 +1152,7 @@
{
my ($opts,%args) = @_;
if ( !$$opts{test_plugins} ) { return; }
- $ENV{BCFTOOLS_PLUGINS} = "$$opts{bin}/plugins";
+ $ENV{BCFTOOLS_PLUGINS} = "$$opts{bin}/plugins:";
if ( !exists($args{args}) ) { $args{args} = ''; }
$args{args} =~ s/{PATH}/$$opts{path}/g;
$args{cmd} =~ s/{PATH}/$$opts{path}/g;
my $wpath = $$opts{path};
if ($^O =~ /^msys/) {
This diff is collapsed.