Skip to content
Commits on Source (5)
......@@ -30,6 +30,8 @@ RANLIB = ranlib
htslib_default_libs = -lz -lm -lbz2 -llzma -lcurl
CPPFLAGS =
# TODO: make the 64-bit support for VCF optional via configure, for now add -DVCF_ALLOW_INT64
# to CFLAGS manually, here or in config.mk if the latter exists.
# TODO: probably update cram code to make it compile cleanly with -Wc++-compat
# For testing strict C99 support add -std=c99 -D_XOPEN_SOURCE=600
#CFLAGS = -g -Wall -O2 -pedantic -std=c99 -D_XOPEN_SOURCE=600
......
Noteworthy changes in release a.b
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Noteworthy changes in release 1.10.1 (17th December 2019)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The support for 64-bit coordinates in VCF brought problems for files
not conforming to VCF/BCF specification. While previous versions would
make out-of-range values silently overflow creating nonsense values
but parseable file, the version 1.10 would silently create an invalid BCF.
Noteworthy changes in release 1.10 (6th December 2019)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
......
......@@ -9,7 +9,9 @@ which have, or are expected to have, chromosomes longer than two gigabases.
Currently 64 bit positions can only be stored in SAM and VCF format files.
Binary BAM, CRAM and BCF cannot be used due to limitations in the formats
themselves. As SAM and VCF are text formats, they have no limit on the
size of numeric values.
size of numeric values. Note that while 64 bit positions are supported by
default for SAM, for VCF they must be enabled explicitly at compile time
by editing Makefile and adding -DVCF_ALLOW_INT64=1 to CFLAGS.
# Compatibility issues to check
......
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.68 for HTSlib 1.10.
# Generated by GNU Autoconf 2.68 for HTSlib 1.10.1.
#
# Report bugs to <samtools-help@lists.sourceforge.net>.
#
......@@ -565,8 +565,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='HTSlib'
PACKAGE_TARNAME='htslib'
PACKAGE_VERSION='1.10'
PACKAGE_STRING='HTSlib 1.10'
PACKAGE_VERSION='1.10.1'
PACKAGE_STRING='HTSlib 1.10.1'
PACKAGE_BUGREPORT='samtools-help@lists.sourceforge.net'
PACKAGE_URL='http://www.htslib.org/'
......@@ -1245,7 +1245,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures HTSlib 1.10 to adapt to many kinds of systems.
\`configure' configures HTSlib 1.10.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
......@@ -1306,7 +1306,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of HTSlib 1.10:";;
short | recursive ) echo "Configuration of HTSlib 1.10.1:";;
esac
cat <<\_ACEOF
......@@ -1413,7 +1413,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
HTSlib configure 1.10
HTSlib configure 1.10.1
generated by GNU Autoconf 2.68
Copyright (C) 2010 Free Software Foundation, Inc.
......@@ -1742,7 +1742,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by HTSlib $as_me 1.10, which was
It was created by HTSlib $as_me 1.10.1, which was
generated by GNU Autoconf 2.68. Invocation command line was
$ $0 $@
......@@ -5804,7 +5804,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by HTSlib $as_me 1.10, which was
This file was extended by HTSlib $as_me 1.10.1, which was
generated by GNU Autoconf 2.68. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
......@@ -5867,7 +5867,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
HTSlib config.status 1.10
HTSlib config.status 1.10.1
configured by $0, generated by GNU Autoconf 2.68,
with options \\"\$ac_cs_config\\"
......
htslib (1.10-3) UNRELEASED; urgency=medium
htslib (1.10.1-1) unstable; urgency=medium
* New upstream version
* for some very strange reason /usr/include/htslib/cram/cram.h is using the
private interface header.h. This is provided now in the
libhts-private-dev package rather than htslib-test
......@@ -10,7 +11,7 @@ htslib (1.10-3) UNRELEASED; urgency=medium
* Fix Perl interpreter path
* Fix permissions of script
-- Andreas Tille <tille@debian.org> Tue, 17 Dec 2019 11:31:46 +0100
-- Andreas Tille <tille@debian.org> Tue, 17 Dec 2019 14:38:13 +0100
htslib (1.10-2) unstable; urgency=medium
......
......@@ -3,9 +3,9 @@ Description: Build with -fPIC instead of -fpic
linking the library on s390x and sparc64.
Author: Adrian Bunk <bunk@debian.org>
--- htslib.orig/Makefile
+++ htslib/Makefile
@@ -34,7 +34,7 @@
--- a/Makefile
+++ b/Makefile
@@ -36,7 +36,7 @@ CPPFLAGS =
# For testing strict C99 support add -std=c99 -D_XOPEN_SOURCE=600
#CFLAGS = -g -Wall -O2 -pedantic -std=c99 -D_XOPEN_SOURCE=600
CFLAGS = -g -Wall -O2 -fvisibility=hidden
......
Author: Michael R. Crusoe <michael.crusoe@gmail.com>
Description: Don't check git tags for versioning
--- htslib.orig/version.sh
+++ htslib/version.sh
--- a/version.sh
+++ b/version.sh
@@ -27,11 +27,11 @@
VERSION=1.10
VERSION=1.10.1
# If we have a git clone, then check against the current tag
-if [ -e .git ]
......
......@@ -85,7 +85,7 @@ override_dh_link:
find debian/htslib-test -name header.h
dh_link -phtslib-test usr/include/htslib/cram/header.h usr/share/htslib-test/header.h
override_dh_fixperms
override_dh_fixperms:
dh_fixperms
find debian -name simple_test_driver.sh -exec chmod +x \{\} \;
......
......@@ -421,7 +421,7 @@ const char *hts_version(void);
// Immediately after release, bump ZZ to 90 to distinguish in-development
// Git repository builds from the release; you may wish to increment this
// further when significant features are merged.
#define HTS_VERSION 101000
#define HTS_VERSION 101001
/*!
@abstract Determine format by peeking at the start of a file
......
This diff is collapsed.
......@@ -1390,10 +1390,10 @@ static inline int bcf_enc_int1(kstring_t *s, int32_t x)
@param[out] q Location to store an updated value for p
@return The integer value, or zero if @p type is not valid.
If @p type is not one of BCF_BT_INT8, BCF_BT_INT16 or BCF_BT_INT32, zero
will be returned and @p *q will not be updated. Otherwise, the integer
value will be returned and @p *q will be set to the memory location
immediately following the integer value.
If @p type is not one of BCF_BT_INT8, BCF_BT_INT16, BCF_BT_INT32 or
BCF_BT_INT64, zero will be returned and @p *q will not be updated.
Otherwise, the integer value will be returned and @p *q will be set
to the memory location immediately following the integer value.
Cautious callers can detect invalid type codes by checking that *q has
actually been updated.
......@@ -1411,7 +1411,7 @@ static inline int64_t bcf_dec_int1(const uint8_t *p, int type, uint8_t **q)
*q = (uint8_t*)p + 4;
return le_to_i32(p);
} else if (type == BCF_BT_INT64) {
*q = (uint8_t*)p + 4;
*q = (uint8_t*)p + 8;
return le_to_i64(p);
} else { // Invalid type.
return 0;
......
1 10010000110 . G <DEL> 0 . SVTYPE=DEL;SVLEN=-890;END=10010001000 PL 0,1,45
1 10010000110 . G <DEL> 0 . SVTYPE=DEL;SVLEN=-890;END=10010001000;QS=1,0 PL 0,1,45
......@@ -213,4 +213,4 @@
1 10010000107 . G <*> 0 . DP=1;I16=0,1,0,0,33,1089,0,0,29,841,0,0,2,4,0,0;QS=1,0;MQ0F=0 PL 0,3,29
1 10010000108 . C <*> 0 . DP=1;I16=0,1,0,0,32,1024,0,0,29,841,0,0,1,1,0,0;QS=1,0;MQ0F=0 PL 0,3,29
1 10010000109 . A <*> 0 . DP=1;I16=0,1,0,0,35,1225,0,0,29,841,0,0,0,0,0,0;QS=1,0;MQ0F=0 PL 0,3,29
1 10010000110 . G <DEL> 0 . SVTYPE=DEL;SVLEN=-890;END=10010001000 PL 0,1,45
1 10010000110 . G <DEL> 0 . SVTYPE=DEL;SVLEN=-890;END=10010001000;QS=1,0 PL 0,1,45
......@@ -660,18 +660,19 @@ sub test_view
testv $opts, "./test_view $tv_args -M -p longrefs/longref_multi.tmp.sam longrefs/longref.tmp.sam.gz CHROMOSOME_I:10000000000-10000000003 CHROMOSOME_I:10000000100-10000000110";
testv $opts, "./compare_sam.pl longrefs/longref_multi.expected.sam longrefs/longref_multi.tmp.sam";
# VCF round trip
unlink("longrefs/index.tmp.vcf.gz.csi"); # To stop vcf_hdr_read from reading a stale index
testv $opts, "./test_view $tv_args -z -p longrefs/index.tmp.vcf.gz -x longrefs/index.tmp.vcf.gz.csi.otf -m 14 longrefs/index.vcf";
testv $opts, "./test_view $tv_args -p longrefs/index.tmp.vcf_ longrefs/index.tmp.vcf.gz";
testv $opts, "cmp longrefs/index.vcf longrefs/index.tmp.vcf_";
# Build index and compare with on-the-fly one made earlier.
test_compare $opts, "$$opts{path}/test_index -c longrefs/index.tmp.vcf.gz", "longrefs/index.tmp.vcf.gz.csi.otf", "longrefs/index.tmp.vcf.gz.csi", gz=>1;
# test_view can't do indexed look-ups on vcf, but we can use tabix
test_compare $opts, "$$opts{bin}/tabix longrefs/index.tmp.vcf.gz 1:10010000100-10010000105 > longrefs/index.tmp.tabix1.vcf", "longrefs/index.expected1.vcf", "longrefs/index.tmp.tabix1.vcf", fix_newlines => 1;
test_compare $opts, "$$opts{bin}/tabix longrefs/index.tmp.vcf.gz 1:10010000120-10010000130 > longrefs/index.tmp.tabix2.vcf", "longrefs/index.expected2.vcf", "longrefs/index.tmp.tabix2.vcf", fix_newlines => 1;
# 64-bit positions are currently not compiled in by default for VCF
# # VCF round trip
# unlink("longrefs/index.tmp.vcf.gz.csi"); # To stop vcf_hdr_read from reading a stale index
# testv $opts, "./test_view $tv_args -z -p longrefs/index.tmp.vcf.gz -x longrefs/index.tmp.vcf.gz.csi.otf -m 14 longrefs/index.vcf";
# testv $opts, "./test_view $tv_args -p longrefs/index.tmp.vcf_ longrefs/index.tmp.vcf.gz";
# testv $opts, "cmp longrefs/index.vcf longrefs/index.tmp.vcf_";
#
# # Build index and compare with on-the-fly one made earlier.
# test_compare $opts, "$$opts{path}/test_index -c longrefs/index.tmp.vcf.gz", "longrefs/index.tmp.vcf.gz.csi.otf", "longrefs/index.tmp.vcf.gz.csi", gz=>1;
#
# # test_view can't do indexed look-ups on vcf, but we can use tabix
# test_compare $opts, "$$opts{bin}/tabix longrefs/index.tmp.vcf.gz 1:10010000100-10010000105 > longrefs/index.tmp.tabix1.vcf", "longrefs/index.expected1.vcf", "longrefs/index.tmp.tabix1.vcf", fix_newlines => 1;
# test_compare $opts, "$$opts{bin}/tabix longrefs/index.tmp.vcf.gz 1:10010000120-10010000130 > longrefs/index.tmp.tabix2.vcf", "longrefs/index.expected2.vcf", "longrefs/index.tmp.tabix2.vcf", fix_newlines => 1;
if ($test_view_failures == 0) {
passed($opts, "large position tests");
......
......@@ -59,10 +59,26 @@ HTSLIB_EXPORT
uint32_t bcf_float_vector_end = 0x7F800002;
HTSLIB_EXPORT
uint8_t bcf_type_shift[] = { 0, 0, 1, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
uint8_t bcf_type_shift[] = { 0, 0, 1, 2, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
static bcf_idinfo_t bcf_idinfo_def = { .info = { 15, 15, 15 }, .hrec = { NULL, NULL, NULL}, .id = -1 };
/*
Partial support for 64-bit POS and Number=1 INFO tags.
Notes:
- the support for 64-bit values is motivated by POS and INFO/END for large genomes
- the use of 64-bit values does not conform to the specification
- cannot output 64-bit BCF and if it does, it is not compatible with anything
- experimental, use at your risk
*/
#ifdef VCF_ALLOW_INT64
#define BCF_MAX_BT_INT64 (0x7fffffffffffffff) /* INT64_MAX, for internal use only */
#define BCF_MIN_BT_INT64 -9223372036854775800LL /* INT64_MIN + 8, for internal use only */
#endif
#define BCF_IS_64BIT (1<<30)
static const char *dump_char(char *buffer, char c)
{
switch (c) {
......@@ -1251,6 +1267,14 @@ static int bcf_dec_typed_int1_safe(uint8_t *p, uint8_t *end, uint8_t **q,
if (end - p < 4) return -1;
*q = p + 4;
*val = le_to_i32(p);
#ifdef VCF_ALLOW_INT64
} else if (t == BCF_BT_INT64) {
// This case should never happen because there should be no 64-bit BCFs
// at all, definitely not coming from htslib
if (end - p < 8) return -1;
*q = p + 8;
*val = le_to_i64(p);
#endif
} else {
return -1;
}
......@@ -1290,6 +1314,9 @@ static int bcf_record_check(const bcf_hdr_t *hdr, bcf1_t *rec) {
uint32_t i, reports;
const uint32_t is_integer = ((1 << BCF_BT_INT8) |
(1 << BCF_BT_INT16) |
#ifdef VCF_ALLOW_INT64
(1 << BCF_BT_INT64) |
#endif
(1 << BCF_BT_INT32));
const uint32_t is_valid_type = (is_integer |
(1 << BCF_BT_NULL) |
......@@ -1728,6 +1755,12 @@ int bcf_write(htsFile *hfp, bcf_hdr_t *h, bcf1_t *v)
}
bcf1_sync(v); // check if the BCF record was modified
if ( v->unpacked & BCF_IS_64BIT )
{
hts_log_error("Data contains 64-bit values not representable in BCF. Please use VCF instead");
return -1;
}
BGZF *fp = hfp->fp.bgzf;
union {
uint32_t i;
......@@ -2040,6 +2073,7 @@ int bcf_enc_vint(kstring_t *s, int n, int32_t *a, int wsize)
return 0; // FIXME: check for errs in this function
}
#ifdef VCF_ALLOW_INT64
static int bcf_enc_long1(kstring_t *s, int64_t x) {
uint32_t e = 0;
if (x <= BCF_MAX_BT_INT32 && x >= BCF_MIN_BT_INT32)
......@@ -2057,6 +2091,7 @@ static int bcf_enc_long1(kstring_t *s, int64_t x) {
}
return e == 0 ? 0 : -1;
}
#endif
static inline int serialize_float_array(kstring_t *s, size_t n, const float *a) {
uint8_t *p;
......@@ -2169,6 +2204,7 @@ static int vcf_parse_format(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, char *p
{
if ( !bcf_hdr_nsamples(h) ) return 0;
static int extreme_int_warned = 0;
char *r, *t;
int j, l, m, g;
khint_t k;
......@@ -2362,7 +2398,23 @@ static int vcf_parse_format(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, char *p
int32_t *x = (int32_t*)(z->buf + z->size * m);
for (l = 0;; ++t) {
if (*t == '.') x[l++] = bcf_int32_missing, ++t; // ++t to skip "."
else x[l++] = strtol(t, &t, 10);
else
{
errno = 0;
char *te;
long int tmp_val = strtol(t, &te, 10);
if ( te==t || errno!=0 || tmp_val<BCF_MIN_BT_INT32 || tmp_val>BCF_MAX_BT_INT32 )
{
if ( !extreme_int_warned )
{
hts_log_warning("Extreme FORMAT/%s value encountered and set to missing at %s:%"PRIhts_pos,h->id[BCF_DT_ID][fmt[j-1].key].key,bcf_seqname(h,v), v->pos+1);
extreme_int_warned = 1;
}
tmp_val = bcf_int32_missing;
}
x[l++] = tmp_val;
t = te;
}
if (*t != ',') break;
}
if ( !l ) x[l++] = bcf_int32_missing;
......@@ -2469,6 +2521,7 @@ static int vcf_parse_format(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, char *p
int vcf_parse(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v)
{
static int extreme_int_warned = 0, negative_rlen_warned = 0;
int i = 0;
char *p, *q, *r, *t;
kstring_t *str;
......@@ -2526,6 +2579,8 @@ int vcf_parse(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v)
} else {
v->pos -= 1;
}
if (v->pos >= INT32_MAX)
v->unpacked |= BCF_IS_64BIT;
} else if (i == 2) { // ID
if (strcmp(p, ".")) bcf_enc_vchar(str, q - p, p);
else bcf_enc_size(str, 0, BCF_BT_CHAR);
......@@ -2672,31 +2727,77 @@ int vcf_parse(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v)
val_a = z;
}
if ((y>>4&0xf) == BCF_HT_INT) {
// Allow first value only to be 64 bit
// (for large END value)
int64_t v64 = strtoll(val, &te, 10);
if ( te==val ) { // conversion failed
val_a[0] = bcf_int32_missing;
v64 = bcf_int64_missing;
} else {
val_a[0] = v64 >= BCF_MIN_BT_INT32 && v64 <= BCF_MAX_BT_INT32 ? v64 : bcf_int32_missing;
i = 0, t = val;
int64_t val1;
#ifdef VCF_ALLOW_INT64
int is_int64 = 0;
if ( n_val==1 )
{
errno = 0;
long long int tmp_val = strtoll(val, &te, 10);
if ( te==val ) tmp_val = bcf_int32_missing;
else if ( te==val || errno!=0 || tmp_val<BCF_MIN_BT_INT64 || tmp_val>BCF_MAX_BT_INT64 )
{
if ( !extreme_int_warned )
{
hts_log_warning("Extreme INFO/%s value encountered and set to missing at %s:%"PRIhts_pos,key,bcf_seqname(h,v), v->pos+1);
extreme_int_warned = 1;
}
for (t = te; *t && *t != ','; t++);
if (*t == ',') ++t;
for (i = 1; i < n_val; ++i, ++t)
tmp_val = bcf_int32_missing;
}
else
is_int64 = 1;
val1 = tmp_val;
t = te;
i = 1; // this is just to avoid adding another nested block...
}
#endif
for (; i < n_val; ++i, ++t)
{
val_a[i] = strtol(t, &te, 10);
if ( te==t ) // conversion failed
val_a[i] = bcf_int32_missing;
errno = 0;
long int tmp_val = strtol(t, &te, 10);
if ( te==t ) tmp_val = bcf_int32_missing;
else if ( errno!=0 || tmp_val<BCF_MIN_BT_INT32 || tmp_val>BCF_MAX_BT_INT32 )
{
if ( !extreme_int_warned )
{
hts_log_warning("Extreme INFO/%s value encountered and set to missing at %s:%"PRIhts_pos,key,bcf_seqname(h,v), v->pos+1);
extreme_int_warned = 1;
}
tmp_val = bcf_int32_missing;
}
val_a[i] = tmp_val;
for (t = te; *t && *t != ','; t++);
}
if (n_val == 1) {
bcf_enc_long1(str, v64);
#ifdef VCF_ALLOW_INT64
if ( is_int64 )
{
v->unpacked |= BCF_IS_64BIT;
bcf_enc_long1(str, val1);
}
else
bcf_enc_int1(str, (int32_t)val1);
#else
val1 = val_a[0];
bcf_enc_int1(str, (int32_t)val1);
#endif
} else {
bcf_enc_vint(str, n_val, val_a, -1);
}
if (strcmp(key, "END") == 0)
v->rlen = v64 - v->pos;
if (n_val==1 && strcmp(key, "END") == 0)
{
if ( val1 <= v->pos )
{
if ( !negative_rlen_warned )
{
hts_log_warning("INFO/END=%"PRIhts_pos" is smaller than POS at %s:%"PRIhts_pos,val1,bcf_seqname(h,v),v->pos+1);
negative_rlen_warned = 1;
}
}
else
v->rlen = val1 - v->pos;
}
} else if ((y>>4&0xf) == BCF_HT_REAL) {
float *val_f = (float *)val_a;
for (i = 0, t = val; i < n_val; ++i, ++t)
......@@ -3835,6 +3936,7 @@ int bcf_update_info(const bcf_hdr_t *hdr, bcf1_t *line, const char *key, const v
else
bcf_enc_vchar(&str, strlen((char*)values), (char*)values);
}
#ifdef VCF_ALLOW_INT64
else if ( type==BCF_HT_LONG )
{
if (n != 1) {
......@@ -3843,6 +3945,7 @@ int bcf_update_info(const bcf_hdr_t *hdr, bcf1_t *line, const char *key, const v
}
bcf_enc_long1(&str, *(int64_t *) values);
}
#endif
else
{
hts_log_error("The type %d not implemented yet", type);
......
......@@ -24,7 +24,7 @@
# DEALINGS IN THE SOFTWARE.
# Master version, for use in tarballs or non-git source copies
VERSION=1.10
VERSION=1.10.1
# If we have a git clone, then check against the current tag
if [ -e .git ]
......