Skip to content
Commits on Source (4)
......@@ -5,6 +5,9 @@ Requirements
============
Building HTSlib requires a few programs and libraries to be present.
See the "System Specific Details" below for guidance on how to install
these.
At least the following are required:
GNU make
......@@ -29,7 +32,7 @@ library itself, and include files needed to compile code that uses functions
from the library. Note that some Linux distributions put include files in
a development ('-dev' or '-devel') package separate from the main library.
libz (required)
zlib (required)
libbz2 (required, unless configured with --disable-bz2)
liblzma (required, unless configured with --disable-lzma)
libcurl (optional, but strongly recommended)
......@@ -200,3 +203,42 @@ For example,
make DESTDIR=/tmp/staging prefix=/opt
would install into bin, lib, etc subdirectories under /tmp/staging/opt.
System Specific Details
=======================
Installing the prerequisites is system dependent and there is more
than one correct way of satisfying these, including downloading them
from source, compiling and installing them yourself.
For people with super-user access, we provide an example set of commands
below for installing the dependencies on a variety of operating system
distributions. Note these are not specific recommendations on distribution,
compiler or SSL implementation. It is assumed you already have the core set
of packages for the given distribution - the lists may be incomplete if
this is not the case.
Debian / Ubuntu
---------------
sudo apt-get update # Ensure the package list is up to date
sudo apt-get install autoconf automake make gcc perl zlib1g-dev libbz2-dev liblzma-dev libcurl4-gnutls-dev libssl-dev
Note: libcurl4-openssl-dev can be used as an alternative to libcurl4-gnutls-dev.
RedHat / CentOS
---------------
sudo yum install autoconf automake make gcc perl-Data-Dumper zlib-devel bzip2 bzip2-devel xz-devel curl-devel openssl-devel
Alpine Linux
------------
sudo apk update # Ensure the package list is up to date
sudo apk add autoconf automake make gcc musl-dev perl bash zlib-dev bzip2-dev xz-dev curl-dev libressl-dev
OpenSUSE
--------
sudo zypper install autoconf automake make gcc perl zlib-devel libbz2-devel xz-devel libcurl-devel libopenssl-devel
......@@ -3,7 +3,7 @@ according to the terms of the following MIT/Expat license.]
The MIT/Expat License
Copyright (C) 2012-2014 Genome Research Ltd.
Copyright (C) 2012-2018 Genome Research Ltd.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
......@@ -29,7 +29,7 @@ according to the terms of the following Modified 3-Clause BSD license.]
The Modified-BSD License
Copyright (C) 2012-2014 Genome Research Ltd.
Copyright (C) 2012-2018 Genome Research Ltd.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
......
......@@ -174,8 +174,9 @@ cram_misc_h = cram/misc.h $(cram_os_h)
cram_os_h = cram/os.h $(htslib_hts_endian_h)
cram_sam_header_h = cram/sam_header.h cram/string_alloc.h cram/pooled_alloc.h $(htslib_khash_h) $(htslib_kstring_h)
cram_samtools_h = cram/cram_samtools.h $(htslib_sam_h) $(cram_sam_header_h)
cram_structs_h = cram/cram_structs.h $(htslib_thread_pool_h) cram/string_alloc.h $(htslib_khash_h)
cram_structs_h = cram/cram_structs.h $(htslib_thread_pool_h) cram/string_alloc.h cram/mFILE.h $(htslib_khash_h)
cram_open_trace_file_h = cram/open_trace_file.h cram/mFILE.h
bcf_sr_sort_h = bcf_sr_sort.h $(htslib_synced_bcf_reader_h) $(htslib_kbitset_h)
hfile_internal_h = hfile_internal.h $(htslib_hfile_h) $(textutils_internal_h)
hts_internal_h = hts_internal.h $(htslib_hts_h) $(textutils_internal_h)
textutils_internal_h = textutils_internal.h $(htslib_kstring_h)
......@@ -288,23 +289,23 @@ hts-$(LIBHTS_SOVERSION).dll: $(LIBHTS_OBJS)
$(CC) -shared $(LDFLAGS) -o $@ $< hts.dll.a $(LIBS)
bgzf.o bgzf.pico: bgzf.c config.h $(htslib_hts_h) $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_thread_pool_h) cram/pooled_alloc.h $(htslib_khash_h)
errmod.o errmod.pico: errmod.c config.h $(htslib_hts_h) $(htslib_ksort_h)
bgzf.o bgzf.pico: bgzf.c config.h $(htslib_hts_h) $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_thread_pool_h) $(htslib_hts_endian_h) cram/pooled_alloc.h $(htslib_khash_h)
errmod.o errmod.pico: errmod.c config.h $(htslib_hts_h) $(htslib_ksort_h) $(htslib_hts_os_h)
kstring.o kstring.pico: kstring.c config.h $(htslib_kstring_h)
knetfile.o knetfile.pico: knetfile.c config.h $(htslib_hts_log_h) $(htslib_knetfile_h)
hfile.o hfile.pico: hfile.c config.h $(htslib_hfile_h) $(hfile_internal_h) $(hts_internal_h) $(htslib_khash_h)
hfile_gcs.o hfile_gcs.pico: hfile_gcs.c config.h $(htslib_hts_h) $(htslib_kstring_h) $(hfile_internal_h)
hfile_libcurl.o hfile_libcurl.pico: hfile_libcurl.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h)
hfile_libcurl.o hfile_libcurl.pico: hfile_libcurl.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_h)
hfile_net.o hfile_net.pico: hfile_net.c config.h $(hfile_internal_h) $(htslib_knetfile_h)
hfile_s3.o hfile_s3.pico: hfile_s3.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h)
hts.o hts.pico: hts.c config.h $(htslib_hts_h) $(htslib_bgzf_h) $(cram_h) $(hfile_internal_h) $(htslib_hfile_h) version.h $(hts_internal_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_ksort_h)
hts.o hts.pico: hts.c config.h $(htslib_hts_h) $(htslib_bgzf_h) $(cram_h) $(htslib_hfile_h) $(htslib_hts_endian_h) version.h $(hts_internal_h) $(hfile_internal_h) $(htslib_hts_os_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_ksort_h)
hts_os.o hts_os.pico: hts_os.c config.h os/rand.c
vcf.o vcf.pico: vcf.c config.h $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_hfile_h) $(hts_internal_h) $(htslib_khash_str2int_h) $(htslib_kstring_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_hts_endian_h)
sam.o sam.pico: sam.c config.h $(htslib_sam_h) $(htslib_bgzf_h) $(cram_h) $(hts_internal_h) $(htslib_hfile_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_kstring_h) $(htslib_hts_endian_h)
tbx.o tbx.pico: tbx.c config.h $(htslib_tbx_h) $(htslib_bgzf_h) $(hts_internal_h) $(htslib_khash_h)
tbx.o tbx.pico: tbx.c config.h $(htslib_tbx_h) $(htslib_bgzf_h) $(htslib_hts_endian_h) $(hts_internal_h) $(htslib_khash_h)
faidx.o faidx.pico: faidx.c config.h $(htslib_bgzf_h) $(htslib_faidx_h) $(htslib_hfile_h) $(htslib_khash_h) $(htslib_kstring_h) $(hts_internal_h)
bcf_sr_sort.o bcf_sr_sort.pico: bcf_sr_sort.c config.h bcf_sr_sort.h $(htslib_kseq_h) $(htslib_khash_str2int_h)
synced_bcf_reader.o synced_bcf_reader.pico: synced_bcf_reader.c config.h bcf_sr_sort.h $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(htslib_bgzf_h) $(htslib_thread_pool_h)
bcf_sr_sort.o bcf_sr_sort.pico: bcf_sr_sort.c config.h $(bcf_sr_sort_h) $(htslib_khash_str2int_h)
synced_bcf_reader.o synced_bcf_reader.pico: synced_bcf_reader.c config.h $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(htslib_bgzf_h) $(htslib_thread_pool_h) $(bcf_sr_sort_h)
vcf_sweep.o vcf_sweep.pico: vcf_sweep.c config.h $(htslib_vcf_sweep_h) $(htslib_bgzf_h)
vcfutils.o vcfutils.pico: vcfutils.c config.h $(htslib_vcfutils_h) $(htslib_kbitset_h)
kfunc.o kfunc.pico: kfunc.c config.h $(htslib_kfunc_h)
......@@ -325,11 +326,11 @@ cram/cram_io.o cram/cram_io.pico: cram/cram_io.c config.h os/lzma_stub.h $(cram_
cram/cram_samtools.o cram/cram_samtools.pico: cram/cram_samtools.c config.h $(cram_h) $(htslib_sam_h)
cram/cram_stats.o cram/cram_stats.pico: cram/cram_stats.c config.h $(cram_h) $(cram_os_h)
cram/files.o cram/files.pico: cram/files.c config.h $(cram_misc_h)
cram/mFILE.o cram/mFILE.pico: cram/mFILE.c config.h $(cram_os_h) cram/mFILE.h
cram/open_trace_file.o cram/open_trace_file.pico: cram/open_trace_file.c config.h $(cram_os_h) $(cram_open_trace_file_h) $(cram_misc_h) $(htslib_hfile_h)
cram/mFILE.o cram/mFILE.pico: cram/mFILE.c config.h $(htslib_hts_log_h) $(cram_os_h) cram/mFILE.h
cram/open_trace_file.o cram/open_trace_file.pico: cram/open_trace_file.c config.h $(cram_os_h) $(cram_open_trace_file_h) $(cram_misc_h) $(htslib_hfile_h) $(htslib_hts_log_h)
cram/pooled_alloc.o cram/pooled_alloc.pico: cram/pooled_alloc.c config.h cram/pooled_alloc.h $(cram_misc_h)
cram/rANS_static.o cram/rANS_static.pico: cram/rANS_static.c config.h cram/rANS_static.h cram/rANS_byte.h
cram/sam_header.o cram/sam_header.pico: cram/sam_header.c config.h $(cram_sam_header_h) cram/string_alloc.h
cram/sam_header.o cram/sam_header.pico: cram/sam_header.c config.h $(htslib_hts_log_h) $(cram_sam_header_h) cram/string_alloc.h
cram/string_alloc.o cram/string_alloc.pico: cram/string_alloc.c config.h cram/string_alloc.h
thread_pool.o thread_pool.pico: thread_pool.c config.h $(thread_pool_internal_h)
......@@ -359,7 +360,7 @@ check test: $(BUILT_PROGRAMS) $(BUILT_TEST_PROGRAMS)
test/hfile
test/test_bgzf test/bgziptest.txt
cd test/tabix && ./test-tabix.sh tabix.tst
REF_PATH=: test/sam test/ce.fa test/faidx.fa
REF_PATH=: test/sam test/ce.fa test/faidx.fa test/fastqs.fq
test/test-regidx
cd test && REF_PATH=: ./test.pl $${TEST_OPTS:-}
......@@ -399,18 +400,18 @@ test/test-bcf-sr: test/test-bcf-sr.o libhts.a
test/test-bcf-translate: test/test-bcf-translate.o libhts.a
$(CC) $(LDFLAGS) -o $@ test/test-bcf-translate.o libhts.a -lz $(LIBS) -lpthread
test/hts_endian.o: test/hts_endian.c $(htslib_hts_endian_h)
test/hts_endian.o: test/hts_endian.c config.h $(htslib_hts_endian_h)
test/fieldarith.o: test/fieldarith.c config.h $(htslib_sam_h)
test/hfile.o: test/hfile.c config.h $(htslib_hfile_h) $(htslib_hts_defs_h)
test/sam.o: test/sam.c config.h $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstring_h)
test/test_bgzf.o: test/test_bgzf.c $(htslib_bgzf_h) $(htslib_hfile_h)
test/test-realn.o: test/test_realn.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_faidx_h)
test/test_bgzf.o: test/test_bgzf.c config.h $(htslib_bgzf_h) $(htslib_hfile_h) $(hfile_internal_h)
test/test-realn.o: test/test_realn.c config.h $(htslib_hts_h) $(htslib_sam_h) $(htslib_faidx_h)
test/test-regidx.o: test/test-regidx.c config.h $(htslib_regidx_h) $(hts_internal_h)
test/test_view.o: test/test_view.c config.h $(cram_h) $(htslib_sam_h)
test/test-vcf-api.o: test/test-vcf-api.c config.h $(htslib_hts_h) $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_kseq_h)
test/test-vcf-sweep.o: test/test-vcf-sweep.c config.h $(htslib_vcf_sweep_h)
test/test-bcf-sr.o: test/test-bcf-sr.c config.h $(htslib_vcf_sweep_h) bcf_sr_sort.h
test/test-bcf-translate.o: test/test-bcf-translate.c config.h
test/test-bcf-sr.o: test/test-bcf-sr.c config.h $(htslib_synced_bcf_reader_h)
test/test-bcf-translate.o: test/test-bcf-translate.c config.h $(htslib_vcf_h)
test/thrash_threads1: test/thrash_threads1.o libhts.a
......
Noteworthy changes in release 1.9 (18th July 2018)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* If `./configure` fails, `make` will stop working until either configure
is re-run successfully, or `make distclean` is used. This makes
configuration failures more obvious. (#711, thanks to John Marshall)
* The default SAM version has been changed to 1.6. This is in line with the
latest version specification and indicates that HTSlib supports the
CG tag used to store long CIGAR data in BAM format.
* bgzip integrity check option '--test' (#682, thanks to @sd4B75bJ, @jrayner)
* Faidx can now index fastq files as well as fasta. The fastq index adds
an extra column to the `.fai` index which gives the offset to the quality
values. New interfaces have been added to `htslib/faidx.h` to read the
fastq index and retrieve the quality values. It is possible to open
a fastq index as if fasta (only sequences will be returned), but not
the other way round. (#701)
* New API interfaces to add or update integer, float and array aux tags. (#694)
* Add `level=<number>` option to `hts_set_opt()` to allow the compression
level to be set. Setting `level=0` enables uncompressed output. (#715)
* Improved bgzip error reporting.
* Better error reporting when CRAM reference files can't be opened. (#706)
* Fixes to make tests work properly on Windows/MinGW - mainly to handle
line ending differences. (#716)
* Efficiency improvements:
- Small speed-up for CRAM indexing.
- Reduce the number of unnecessary wake-ups in the thread pool. (#703)
- Avoid some memory copies when writing data, notably for uncompressed
BGZF output. (#703)
* Bug fixes:
- Fix multi-region iterator bugs on CRAM files. (#684)
- Fixed multi-region iterator bug that caused some reads to be skipped
incorrectly when reading BAM files. (#687)
- Fixed synced_bcf_reader() bug when reading contigs multiple times. (#691,
reported by @freeseek)
- Fixed bug where bcf_hdr_set_samples() did not update the sample dictionary
when removing samples. (#692, reported by @freeseek)
- Fixed bug where the VCF record ref length was calculated incorrectly
if an INFO END tag was present. (71b00a)
- Fixed warnings found when compiling with gcc 8.1.0. (#700)
- sam_hdr_read() and sam_hdr_write() will now return an error code
if passed a NULL file pointer, instead of crashing.
- Fixed possible negative array look-up in sam_parse1() that somehow
escaped previous fuzz testing. (#731, reported by @fCorleone)
- Fixed bug where cram range queries could incorrectly report an error
when using multiple threads. (#734, reported by Brent Pedersen)
- Fixed very rare rANS normalisation bug that could cause an assertion
failure when writing CRAM files. (#739, reported by @carsonhh)
Noteworthy changes in release 1.8 (3rd April 2018)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
......
......@@ -22,6 +22,8 @@
THE SOFTWARE.
*/
#include <config.h>
#include <strings.h>
#include "bcf_sr_sort.h"
......@@ -645,6 +647,10 @@ sr_sort_t *bcf_sr_sort_init(sr_sort_t *srt)
memset(srt,0,sizeof(sr_sort_t));
return srt;
}
void bcf_sr_sort_reset(sr_sort_t *srt)
{
srt->chr = NULL;
}
void bcf_sr_sort_destroy(sr_sort_t *srt)
{
free(srt->active);
......
......@@ -97,6 +97,7 @@ typedef struct
sr_sort_t;
sr_sort_t *bcf_sr_sort_init(sr_sort_t *srt);
void bcf_sr_sort_reset(sr_sort_t *srt);
int bcf_sr_sort_next(bcf_srs_t *readers, sr_sort_t *srt, const char *chr, int pos);
int bcf_sr_sort_set_active(sr_sort_t *srt, int i);
int bcf_sr_sort_add_active(sr_sort_t *srt, int i);
......
......@@ -427,6 +427,15 @@ int bgzf_compress(void *_dst, size_t *dlen, const void *src, size_t slen, int le
z_stream zs;
uint8_t *dst = (uint8_t*)_dst;
if (level == 0) {
// Uncompressed data
if (*dlen < slen+5 + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH) return -1;
dst[BLOCK_HEADER_LENGTH] = 1; // BFINAL=1, BTYPE=00; see RFC1951
u16_to_le(slen, &dst[BLOCK_HEADER_LENGTH+1]); // length
u16_to_le(~slen, &dst[BLOCK_HEADER_LENGTH+3]); // ones-complement length
memcpy(dst + BLOCK_HEADER_LENGTH+5, src, slen);
*dlen = slen+5 + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;
} else {
// compress the body
zs.zalloc = NULL; zs.zfree = NULL;
zs.msg = NULL;
......@@ -448,6 +457,8 @@ int bgzf_compress(void *_dst, size_t *dlen, const void *src, size_t slen, int le
return -1;
}
*dlen = zs.total_out + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;
}
// write the header
memcpy(dst, g_magic, BLOCK_HEADER_LENGTH); // the last two bytes are a place holder for the length of the block
packInt16(&dst[16], *dlen - 1); // write the compressed length; -1 to fit 2 bytes
......@@ -975,7 +986,7 @@ ssize_t bgzf_read(BGZF *fp, void *data, size_t length)
if (available <= 0) {
int ret = bgzf_read_block(fp);
if (ret != 0) {
hts_log_error("Read block operation failed with error %d after %zd of %zu bytes", ret, bytes_read, length);
hts_log_error("Read block operation failed with error %d after %zd of %zu bytes", fp->errcode, bytes_read, length);
fp->errcode |= BGZF_ERR_ZLIB;
return -1;
}
......@@ -1023,6 +1034,39 @@ void *bgzf_encode_func(void *arg) {
return arg;
}
// Optimisation for compression level 0 (uncompressed deflate blocks)
// Avoids memcpy of the data from uncompressed to compressed buffer.
void *bgzf_encode_level0_func(void *arg) {
bgzf_job *j = (bgzf_job *)arg;
uint32_t crc;
j->comp_len = j->uncomp_len + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH + 5;
// Data will have already been copied in to
// j->comp_data + BLOCK_HEADER_LENGTH + 5
// Add preamble
memcpy(j->comp_data, g_magic, BLOCK_HEADER_LENGTH);
u16_to_le(j->comp_len-1, j->comp_data + 16);
// Deflate uncompressed data header
j->comp_data[BLOCK_HEADER_LENGTH] = 1; // BFINAL=1, BTYPE=00; see RFC1951
u16_to_le(j->uncomp_len, j->comp_data + BLOCK_HEADER_LENGTH + 1);
u16_to_le(~j->uncomp_len, j->comp_data + BLOCK_HEADER_LENGTH + 3);
// Trailer (CRC, uncompressed length)
#ifdef HAVE_LIBDEFLATE
crc = libdeflate_crc32(0, j->comp_data + BLOCK_HEADER_LENGTH + 5,
j->uncomp_len);
#else
crc = crc32(crc32(0L, NULL, 0L),
(Bytef*)j->comp_data + BLOCK_HEADER_LENGTH + 5, j->uncomp_len);
#endif
u32_to_le(crc, j->comp_data + j->comp_len - 8);
u32_to_le(j->uncomp_len, j->comp_data + j->comp_len - 4);
return arg;
}
// Our input block has already been decoded by bgzf_mt_read_block().
// We need to split that into a fetch block (compressed) and make this
// do the actual decompression step.
......@@ -1414,10 +1458,16 @@ static int mt_queue(BGZF *fp)
j->fp = fp;
j->errcode = 0;
j->uncomp_len = fp->block_offset;
if (fp->compress_level == 0) {
memcpy(j->comp_data + BLOCK_HEADER_LENGTH + 5, fp->uncompressed_block,
j->uncomp_len);
hts_tpool_dispatch(mt->pool, mt->out_queue, bgzf_encode_level0_func, j);
} else {
memcpy(j->uncomp_data, fp->uncompressed_block, j->uncomp_len);
// Need non-block vers & job_pending?
hts_tpool_dispatch(mt->pool, mt->out_queue, bgzf_encode_func, j);
}
fp->block_offset = 0;
return 0;
......
.TH bgzip 1 "3 April 2018" "htslib-1.8" "Bioinformatics tools"
.TH bgzip 1 "18 July 2018" "htslib-1.9" "Bioinformatics tools"
.SH NAME
.PP
bgzip \- Block compression/decompression utility
......@@ -174,4 +174,5 @@ by Heng Li for remote file access and in-memory caching.
.SH SEE ALSO
.PP
.BR gzip (1), tabix (1)
.BR gzip (1),
.BR tabix (1)
......@@ -32,6 +32,7 @@
#include <errno.h>
#include <stdarg.h>
#include <getopt.h>
#include <inttypes.h>
#include <sys/stat.h>
#include "htslib/bgzf.h"
#include "htslib/hts.h"
......@@ -85,13 +86,14 @@ static int bgzip_main_usage(void)
fprintf(stderr, " -g, --rebgzip use an index file to bgzip a file\n");
fprintf(stderr, " -s, --size INT decompress INT bytes (uncompressed size)\n");
fprintf(stderr, " -@, --threads INT number of compression threads to use [1]\n");
fprintf(stderr, " -t, --test test integrity of compressed file");
fprintf(stderr, "\n");
return 1;
}
int main(int argc, char **argv)
{
int c, compress, compress_level = -1, pstdout, is_forced, index = 0, rebgzip = 0, reindex = 0;
int c, compress, compress_level = -1, pstdout, is_forced, test, index = 0, rebgzip = 0, reindex = 0;
BGZF *fp;
void *buffer;
long start, end, size;
......@@ -112,12 +114,13 @@ int main(int argc, char **argv)
{"rebgzip",no_argument,NULL,'g'},
{"size", required_argument, NULL, 's'},
{"threads", required_argument, NULL, '@'},
{"test", no_argument, NULL, 't'},
{"version", no_argument, NULL, 1},
{NULL, 0, NULL, 0}
};
compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0;
while((c = getopt_long(argc, argv, "cdh?fb:@:s:iI:l:gr",loptions,NULL)) >= 0){
compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0; test = 0;
while((c = getopt_long(argc, argv, "cdh?fb:@:s:iI:l:grt",loptions,NULL)) >= 0){
switch(c){
case 'd': compress = 0; break;
case 'c': pstdout = 1; break;
......@@ -130,6 +133,7 @@ int main(int argc, char **argv)
case 'g': rebgzip = 1; break;
case 'r': reindex = 1; compress = 0; break;
case '@': threads = atoi(optarg); break;
case 't': test = 1; compress = 0; reindex = 0; break;
case 1:
printf(
"bgzip (htslib) %s\n"
......@@ -292,7 +296,7 @@ int main(int argc, char **argv)
}
char *name;
int len = strlen(argv[optind]);
if ( strcmp(argv[optind]+len-3,".gz") )
if ( strcmp(argv[optind]+len-3,".gz") && !test)
{
fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]);
return 1;
......@@ -303,7 +307,7 @@ int main(int argc, char **argv)
return 1;
}
if (pstdout) {
if (pstdout || test) {
f_dst = fileno(stdout);
}
else {
......@@ -332,6 +336,12 @@ int main(int argc, char **argv)
return 1;
}
}
if (!fp->is_compressed) {
fprintf(stderr, "[bgzip] Expected compressed file -- ignored\n");
return 1;
}
if (threads > 1)
bgzf_mt(fp, threads, 256);
......@@ -348,9 +358,9 @@ int main(int argc, char **argv)
if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE);
else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));
if (c == 0) break;
if (c < 0) error("Could not read %d bytes: Error %d\n", (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start), fp->errcode);
if (c < 0) error("Error %d in block starting at offset %" PRId64 "(%" PRIX64 ")\n", fp->errcode, fp->block_address, fp->block_address);
start += c;
if ( write(f_dst, buffer, c) != c ) {
if ( !test && write(f_dst, buffer, c) != c ) {
#ifdef _WIN32
if (GetLastError() != ERROR_NO_DATA)
#endif
......@@ -360,7 +370,7 @@ int main(int argc, char **argv)
}
free(buffer);
if (bgzf_close(fp) < 0) error("Close failed: Error %d\n",fp->errcode);
if (!pstdout) unlink(argv[optind]);
if (!pstdout && !test) unlink(argv[optind]);
return 0;
}
}
# Configure script for htslib, a C library for high-throughput sequencing data.
#
# Copyright (C) 2015-2017 Genome Research Ltd.
# Copyright (C) 2015-2018 Genome Research Ltd.
#
# Author: John Marshall <jm18@sanger.ac.uk>
#
......@@ -32,7 +32,7 @@ AC_CONFIG_HEADERS(config.h)
m4_include([m4/hts_prog_cc_warnings.m4])
dnl Copyright notice to be copied into the generated configure script
AC_COPYRIGHT([Portions copyright (C) 2016 Genome Research Ltd.
AC_COPYRIGHT([Portions copyright (C) 2018 Genome Research Ltd.
This configure script is free software: you are free to change and
redistribute it. There is NO WARRANTY, to the extent permitted by law.])
......@@ -48,6 +48,16 @@ AH_TOP([/* If you use configure, this file provides @%:@defines reflecting your
the PACKAGE_* defines are unused and are overridden by the more
accurate PACKAGE_VERSION as computed by the Makefile. */])
dnl Variant of AC_MSG_ERROR that ensures subsequent make(1) invocations fail
dnl until the configuration error is resolved and configure is run again.
AC_DEFUN([MSG_ERROR],
[cat > config.mk <<'EOF'
ifneq ($(MAKECMDGOALS),distclean)
$(error Resolve configure error first)
endif
EOF
AC_MSG_ERROR([$1], [$2])])
AC_PROG_CC
AC_PROG_RANLIB
......@@ -106,7 +116,7 @@ AC_ARG_WITH([plugin-dir],
[AS_HELP_STRING([--with-plugin-dir=DIR],
[plugin installation location [LIBEXECDIR/htslib]])],
[case $withval in
yes|no) AC_MSG_ERROR([no directory specified for --with-plugin-dir]) ;;
yes|no) MSG_ERROR([no directory specified for --with-plugin-dir]) ;;
esac],
[with_plugin_dir='$(libexecdir)/htslib'])
AC_SUBST([plugindir], $with_plugin_dir)
......@@ -115,7 +125,7 @@ AC_ARG_WITH([plugin-path],
[AS_HELP_STRING([--with-plugin-path=PATH],
[default HTS_PATH plugin search path [PLUGINDIR]])],
[case $withval in
yes) AC_MSG_ERROR([no path specified for --with-plugin-path]) ;;
yes) MSG_ERROR([no path specified for --with-plugin-path]) ;;
no) with_plugin_path= ;;
esac],
[with_plugin_path=$with_plugin_dir])
......@@ -166,7 +176,7 @@ AC_CHECK_DECL([fdatasync(int)], [AC_CHECK_FUNCS(fdatasync)])
if test $enable_plugins != no; then
AC_SEARCH_LIBS([dlopen], [dl], [],
[AC_MSG_ERROR([dlopen() not found
[MSG_ERROR([dlopen() not found
Plugin support requires dynamic linking facilities from the operating system.
Either configure with --disable-plugins or resolve this error to build HTSlib.])])
......@@ -183,7 +193,7 @@ Either configure with --disable-plugins or resolve this error to build HTSlib.])
fi
AC_SEARCH_LIBS([log], [m], [],
[AC_MSG_ERROR([log() not found
[MSG_ERROR([log() not found
HTSLIB requires a working floating-point math library.
FAILED. This error must be resolved in order to build HTSlib successfully.])])
......@@ -194,7 +204,7 @@ AC_CHECK_HEADER([zlib.h], [], [zlib_devel=missing], [;])
AC_CHECK_LIB(z, inflate, [], [zlib_devel=missing])
if test $zlib_devel != ok; then
AC_MSG_ERROR([zlib development files not found
MSG_ERROR([zlib development files not found
HTSlib uses compression routines from the zlib library <http://zlib.net>.
Building HTSlib requires zlib development files to be installed on the build
......@@ -210,14 +220,14 @@ AC_SEARCH_LIBS([recv], [socket ws2_32], [
if test "$ac_cv_search_recv" != "none required"
then
static_LIBS="$static_LIBS $ac_cv_search_recv"
fi], [AC_MSG_ERROR([unable to find the recv() function])])
fi], [MSG_ERROR([unable to find the recv() function])])
if test "$enable_bz2" != no; then
bz2_devel=ok
AC_CHECK_HEADER([bzlib.h], [], [bz2_devel=missing], [;])
AC_CHECK_LIB([bz2], [BZ2_bzBuffToBuffCompress], [], [bz2_devel=missing])
if test $bz2_devel != ok; then
AC_MSG_ERROR([libbzip2 development files not found
MSG_ERROR([libbzip2 development files not found
The CRAM format may use bzip2 compression, which is implemented in HTSlib
by using compression routines from libbzip2 <http://www.bzip.org/>.
......@@ -245,7 +255,7 @@ if test "$enable_lzma" != no; then
AC_CHECK_HEADERS([lzma.h], [], [lzma_devel=header-missing], [;])
AC_CHECK_LIB([lzma], [lzma_easy_buffer_encode], [], [lzma_devel=missing])
if test $lzma_devel = missing; then
AC_MSG_ERROR([liblzma development files not found
MSG_ERROR([liblzma development files not found
The CRAM format may use LZMA2 compression, which is implemented in HTSlib
by using compression routines from liblzma <http://tukaani.org/xz/>.
......@@ -271,7 +281,7 @@ AS_IF([test "x$with_libdeflate" != "xno"],
private_LIBS="$private_LIBS -ldeflate"
static_LIBS="$static_LIBS -ldeflate"],
[AS_IF([test "x$with_libdeflate" != "xcheck"],
[AC_MSG_ERROR([libdeflate development files not found: $libdeflate
[MSG_ERROR([libdeflate development files not found: $libdeflate
You requested libdeflate, but do not have the required header / library
files. The source for libdeflate is available from
......@@ -292,7 +302,7 @@ if test "$enable_libcurl" != no; then
[message="library not found"])
case "$enable_libcurl" in
check) AC_MSG_WARN([libcurl not enabled: $message]) ;;
*) AC_MSG_ERROR([libcurl $message
*) MSG_ERROR([libcurl $message
Support for HTTPS and other SSL-based URLs requires routines from the libcurl
library <http://curl.haxx.se/libcurl/>. Building HTSlib with libcurl enabled
......@@ -321,7 +331,7 @@ if test "$enable_gcs" != no; then
else
case "$enable_gcs" in
check) AC_MSG_WARN([GCS support not enabled: requires libcurl support]) ;;
*) AC_MSG_ERROR([GCS support not enabled
*) MSG_ERROR([GCS support not enabled
Support for Google Cloud Storage URLs requires libcurl support to be enabled
in HTSlib. Configure with --enable-libcurl in order to use GCS URLs.])
......@@ -339,7 +349,7 @@ if test "$enable_s3" != no; then
else
case "$enable_s3" in
check) AC_MSG_WARN([S3 support not enabled: requires libcurl support]) ;;
*) AC_MSG_ERROR([S3 support not enabled
*) MSG_ERROR([S3 support not enabled
Support for Amazon AWS S3 URLs requires libcurl support to be enabled
in HTSlib. Configure with --enable-libcurl in order to use S3 URLs.])
......@@ -362,7 +372,7 @@ if test $need_crypto != no; then
[case "$need_crypto" in
check) AC_MSG_WARN([S3 support not enabled: requires SSL development files])
s3=disabled ;;
*) AC_MSG_ERROR([SSL development files not found
*) MSG_ERROR([SSL development files not found
Support for AWS S3 URLs requires routines from an SSL library. Building
HTSlib with libcurl enabled requires SSL development files to be installed
......
......@@ -3212,8 +3212,10 @@ static cram_slice *cram_next_slice(cram_fd *fd, cram_container **cp) {
hts_tpool_result *res;
cram_decode_job *j;
if (fd->ooc && hts_tpool_process_empty(fd->rqueue))
if (fd->ooc && hts_tpool_process_empty(fd->rqueue)) {
fd->eof = 1;
return NULL;
}
res = hts_tpool_next_result_wait(fd->rqueue);
......
......@@ -2886,6 +2886,7 @@ static int process_one_read(cram_fd *fd, cram_container *c,
kh_val(s->pair[sec], k) = rnum;
} else {
new = 1;
k = 0; // Prevents false-positive warning from gcc -Og
}
if (new == 0) {
......
......@@ -588,6 +588,9 @@ int cram_index_build(cram_fd *fd, const char *fn_base, const char *fn_idx) {
BGZF *fp;
kstring_t fn_idx_str = {0};
// Useful for cram_index_build_multiref
cram_set_option(fd, CRAM_OPT_REQUIRED_FIELDS, SAM_RNAME | SAM_POS | SAM_CIGAR);
if (! fn_idx) {
kputs(fn_base, &fn_idx_str);
kputs(".crai", &fn_idx_str);
......