1f1e3f63 · 1903fd40 · b6934ff8 · 6c6c75b9 · 3fe4cf8e · 89e267a4
--- a/.gitattributes
+++ b/.gitattributes
-# Omit these files from release tarballs.
-.git*           export-ignore
-/.travis.yml    export-ignore
-README.md       export-ignore
--- a/.gitignore
+++ b/.gitignore
-*.o
-*.pico
-*-uninstalled.pc
-/version.h
-
-lib*.a
-lib*.dylib
-lib*.so
-lib*.so.*
-
-/bgzip
-/tabix
-/test/fieldarith
-/test/hfile
-/test/sam
-/test/test-vcf-api
-/test/test-vcf-sweep
-/test/test_view
-/test/*.tmp
-/test/*.tmp.*
-
-/TAGS
--- a/.travis.yml
+++ b/.travis.yml
-# Control file for continuous integration testing at http://travis-ci.org/
-
-language: c
-compiler:
-  - clang
-  - gcc
-
-script: make -e && make test
--- a/INSTALL
+++ b/INSTALL
-System Requirements
-===================
+                    Building and Installing HTSlib
+                    ==============================

-HTSlib depends on the zlib library <http://zlib.net>.  Building HTSlib requires
-zlib development files to be installed on the build machine; you may need to
-ensure a package such as zlib1g-dev (on Debian or Ubuntu Linux) or zlib-devel
-(on RPM/yum-based distributions) is installed.
+Requirements
+============

+Building HTSlib requires a few programs and libraries to be present.
+See the "System Specific Details" below for guidance on how to install
+these.

-Compilation
-===========
+At least the following are required:

-'cd' to the htslib-1.x directory containing the package's source and type
-'make' to compile HTSlib.
+    GNU make
+    C compiler (e.g. gcc or clang)

+In addition, building the configure script requires:

-Installation
-============
+    autoheader
+    autoconf
+
+Running the configure script uses awk, along with a number of
+standard UNIX tools (cat, cp, grep, mv, rm, sed, among others).  Almost
+all installations will have these already.
+
+Running the test harness (make test) uses:
+
+    bash
+    perl
+
+HTSlib uses the following external libraries.  Building requires both the
+library itself, and include files needed to compile code that uses functions
+from the library.  Note that some Linux distributions put include files in
+a development ('-dev' or '-devel') package separate from the main library.
+
+    zlib       (required)
+    libbz2     (required, unless configured with --disable-bz2)
+    liblzma    (required, unless configured with --disable-lzma)
+    libcurl    (optional, but strongly recommended)
+    libcrypto  (optional for Amazon S3 support; not needed on MacOS)
+
+Disabling libbzip2 and liblzma will make some CRAM files unreadable, so
+is not recommended.
+
+Using libcurl provides HTSlib with better network protocol support, for
+example it enables the use of https:// URLs.  It is also required if
+direct access to Amazon S3 or Google Cloud Storage is enabled.
+
+Amazon S3 support requires an HMAC function to calculate a message
+authentication code.  On MacOS, the CCHmac function from the standard
+library is used.  Systems that do not have CChmac will get this from
+libcrypto.  libcrypto is part of OpenSSL or one of its derivatives (LibreSSL
+or BoringSSL).
+
+On Microsoft Windows we recommend use of Mingw64/Msys2.  Note that
+currently for the test harness to work you will need to override the
+test temporary directory with e.g.: make check TEST_OPTS="-t C:/msys64/tmp/_"
+Whilst the code may work on Windows with other environments, these have
+not be verified.
+
+Building Configure
+==================
+
+This step is only needed if configure.ac has been changed, or if configure
+does not exist (for example, when building from a git clone).  The
+configure script and config.h.in can be built by running:
+
+    autoheader
+    autoconf
+
+If you have a full GNU autotools install, you can alternatively run:
+
+    autoreconf
+
+Basic Installation
+==================
+
+To build and install HTSlib, 'cd' to the htslib-1.x directory containing
+the package's source and type the following commands:
+
+    ./configure
+    make
+    make install
+
+The './configure' command checks your build environment and allows various
+optional functionality to be enabled (see Configuration below).  If you
+don't want to select any optional functionality, you may wish to omit
+configure and just type 'make; make install' as for previous versions
+of HTSlib.  However if the build fails you should run './configure' as
+it can diagnose the common reasons for build failures.
+
+The 'make' command builds the HTSlib library and and various useful
+utilities: bgzip, htsfile, and tabix.  If compilation fails you should
+run './configure' as it can diagnose problems with your build environment
+that cause build failures.
+
+The 'make install' command installs the libraries, library header files,
+utilities, several manual pages, and a pkgconfig file to /usr/local.
+The installation location can be changed by configuring with --prefix=DIR
+or via 'make prefix=DIR install' (see Installation Locations below).
+
+
+Configuration
+=============
+
+By default, './configure' examines your build environment, checking for
+requirements such as the zlib development files, and arranges for a plain
+HTSlib build.  The following configure options can be used to enable
+various features and specify further optional external requirements:
+
+--enable-plugins
+    Use plugins to implement exotic file access protocols and other
+    specialised facilities.  This enables such facilities to be developed
+    and packaged outwith HTSlib, and somewhat isolates HTSlib-using programs
+    from their library dependencies.  By default (or with --disable-plugins),
+    any enabled pluggable facilities (such as libcurl file access) are built
+    directly within HTSlib.
+
+    The <https://github.com/samtools/htslib-plugins> repository contains
+    several additional plugins, including the iRODS (<http://irods.org/>)
+    file access plugin previously distributed with HTSlib.
+
+--with-plugin-dir=DIR
+    Specifies the directory into which plugins built while building HTSlib
+    should be installed; by default, LIBEXECDIR/htslib.
+
+--with-plugin-path=DIR:DIR:DIR...
+    Specifies the list of directories that HTSlib will search for plugins.
+    By default, only the directory specified via --with-plugin-dir will be
+    searched; you can use --with-plugin-path='DIR:$(plugindir):DIR' and so
+    on to cause additional directories to be searched.
+
+--enable-libcurl
+    Use libcurl (<http://curl.haxx.se/>) to implement network access to
+    remote files via FTP, HTTP, HTTPS, etc.  By default, HTSlib uses its
+    own simple networking code to provide access via FTP and HTTP only.
+
+--enable-gcs
+    Implement network access to Google Cloud Storage.  By default or with
+    --enable-gcs=check, this is enabled when libcurl is enabled.
+
+--enable-s3
+    Implement network access to Amazon AWS S3.  By default or with
+    --enable-s3=check, this is enabled when libcurl is enabled.
+
+--disable-bz2
+    Bzip2 is an optional compression codec format for CRAM, included
+    in HTSlib by default.  It can be disabled with --disable-bz2, but
+    be aware that not all CRAM files may be possible to decode.
+
+--disable-lzma
+    LZMA is an optional compression codec for CRAM, included in HTSlib
+    by default.  It can be disabled with --disable-lzma, but be aware
+    that not all CRAM files may be possible to decode.
+
+--with-libdeflate
+    Libdeflate is a heavily optimized library for DEFLATE-based compression
+    and decompression.  It also includes a fast crc32 implementation.
+    By default, ./configure will probe for libdeflate and use it if
+    available.  To prevent this, use --without-libdeflate.
+
+The configure script also accepts the usual options and environment variables
+for tuning installation locations and compilers: type './configure --help'
+for details.  For example,
+
+    ./configure CC=icc --prefix=/opt/icc-compiled
+
+would specify that HTSlib is to be built with icc and installed into bin,
+lib, etc subdirectories under /opt/icc-compiled.
+
+If dependencies have been installed in non-standard locations (i.e. not on
+the normal include and library search paths) then the CPPFLAGS and LDFLAGS
+environment variables can be used to set the options needed to find them.
+For example, NetBSD users may use:
+
+    ./configure CPPFLAGS=-I/usr/pkg/include \
+                LDFLAGS='-L/usr/pkg/lib -Wl,-R/usr/pkg/lib'
+
+to allow compiling and linking against dependencies installed via the ports
+collection.
+
+Installation Locations
+======================
+
+By default, 'make install' installs HTSlib libraries under /usr/local/lib,
+HTSlib header files under /usr/local/include, utility programs under
+/usr/local/bin, etc.  (To be precise, the header files are installed within
+a fixed 'htslib' subdirectory under the specified .../include location.)
+
+You can specify a different location to install HTSlib by configuring
+with --prefix=DIR or specify locations for particular parts of HTSlib by
+configuring with --libdir=DIR and so on.  Type './configure --help' for
+the full list of such install directory options.
+
+Alternatively you can specify different locations at install time by
+typing 'make prefix=DIR install' or 'make libdir=DIR install' and so on.
+Consult the list of prefix/exec_prefix/etc variables near the top of the
+Makefile for the full list of such variables that can be overridden.
+
+You can also specify a staging area by typing 'make DESTDIR=DIR install',
+possibly in conjunction with other --prefix or prefix=DIR settings.
+For example,
+
+    make DESTDIR=/tmp/staging prefix=/opt
+
+would install into bin, lib, etc subdirectories under /tmp/staging/opt.
+
+
+System Specific Details
+=======================
+
+Installing the prerequisites is system dependent and there is more
+than one correct way of satisfying these, including downloading them
+from source, compiling and installing them yourself.
+
+For people with super-user access, we provide an example set of commands
+below for installing the dependencies on a variety of operating system
+distributions.  Note these are not specific recommendations on distribution,
+compiler or SSL implementation.  It is assumed you already have the core set
+of packages for the given distribution - the lists may be incomplete if
+this is not the case.
+
+Debian / Ubuntu
+---------------
+
+sudo apt-get update  # Ensure the package list is up to date
+sudo apt-get install autoconf automake make gcc perl zlib1g-dev libbz2-dev liblzma-dev libcurl4-gnutls-dev libssl-dev
+
+Note: libcurl4-openssl-dev can be used as an alternative to libcurl4-gnutls-dev.
+
+RedHat / CentOS
+---------------
+
+sudo yum install autoconf automake make gcc perl-Data-Dumper zlib-devel bzip2 bzip2-devel xz-devel curl-devel openssl-devel
+
+Alpine Linux
+------------
+
+sudo apk update  # Ensure the package list is up to date
+sudo apk add autoconf automake make gcc musl-dev perl bash zlib-dev bzip2-dev xz-dev curl-dev libressl-dev

-Type 'make install' to install the bgzip and tabix utilities, library headers,
-library archives, several manual pages, and a pkgconfig file to /usr/local.
+OpenSUSE
+--------

-Type 'make prefix=/path/to/dir install' to install everything under your
-choice of installation directory.  The install target also understands
-DESTDIR and the other usual installation directory variables.
+sudo zypper install autoconf automake make gcc perl zlib-devel libbz2-devel xz-devel libcurl-devel libopenssl-devel
--- a/LICENSE
+++ b/LICENSE
@@ -3,7 +3,7 @@ according to the terms of the following MIT/Expat license.]

 The MIT/Expat License

-Copyright (C) 2012-2014 Genome Research Ltd.
+Copyright (C) 2012-2018 Genome Research Ltd.

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -29,7 +29,7 @@ according to the terms of the following Modified 3-Clause BSD license.]

 The Modified-BSD License

-Copyright (C) 2012-2014 Genome Research Ltd.
+Copyright (C) 2012-2018 Genome Research Ltd.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:

--- a/Makefile
+++ b/Makefile
--- a/NEWS
+++ b/NEWS
+Noteworthy changes in release 1.9 (18th July 2018)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* If `./configure` fails, `make` will stop working until either configure
+  is re-run successfully, or `make distclean` is used.  This makes
+  configuration failures more obvious.  (#711, thanks to John Marshall)
+
+* The default SAM version has been changed to 1.6.  This is in line with the
+  latest version specification and indicates that HTSlib supports the
+  CG tag used to store long CIGAR data in BAM format.
+
+* bgzip integrity check option '--test' (#682, thanks to @sd4B75bJ, @jrayner)
+
+* Faidx can now index fastq files as well as fasta.  The fastq index adds
+  an extra column to the `.fai` index which gives the offset to the quality
+  values.  New interfaces have been added to `htslib/faidx.h` to read the
+  fastq index and retrieve the quality values.  It is possible to open
+  a fastq index as if fasta (only sequences will be returned), but not
+  the other way round. (#701)
+
+* New API interfaces to add or update integer, float and array aux tags. (#694)
+
+* Add `level=<number>` option to `hts_set_opt()` to allow the compression
+  level to be set.  Setting `level=0` enables uncompressed output. (#715)
+
+* Improved bgzip error reporting.
+
+* Better error reporting when CRAM reference files can't be opened. (#706)
+
+* Fixes to make tests work properly on Windows/MinGW - mainly to handle
+  line ending differences. (#716)
+
+* Efficiency improvements:
+
+  - Small speed-up for CRAM indexing.
+
+  - Reduce the number of unnecessary wake-ups in the thread pool. (#703)
+
+  - Avoid some memory copies when writing data, notably for uncompressed
+    BGZF output. (#703)
+
+* Bug fixes:
+
+  - Fix multi-region iterator bugs on CRAM files. (#684)
+
+  - Fixed multi-region iterator bug that caused some reads to be skipped
+    incorrectly when reading BAM files. (#687)
+
+  - Fixed synced_bcf_reader() bug when reading contigs multiple times. (#691,
+    reported by @freeseek)
+
+  - Fixed bug where bcf_hdr_set_samples() did not update the sample dictionary
+    when removing samples. (#692, reported by @freeseek)
+
+  - Fixed bug where the VCF record ref length was calculated incorrectly
+    if an INFO END tag was present. (71b00a)
+
+  - Fixed warnings found when compiling with gcc 8.1.0. (#700)
+
+  - sam_hdr_read() and sam_hdr_write() will now return an error code
+    if passed a NULL file pointer, instead of crashing.
+
+  - Fixed possible negative array look-up in sam_parse1() that somehow
+    escaped previous fuzz testing. (#731, reported by @fCorleone)
+
+  - Fixed bug where cram range queries could incorrectly report an error
+    when using multiple threads. (#734, reported by Brent Pedersen)
+
+  - Fixed very rare rANS normalisation bug that could cause an assertion
+    failure when writing CRAM files.  (#739, reported by @carsonhh)
+
+Noteworthy changes in release 1.8 (3rd April 2018)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* The URL to get sequences from the EBI reference server has been changed
+  to https://.  This is because the EBI no longer serve sequences via
+  plain HTTP - requests to the http:// endpoint just get redirected.
+  HTSlib needs to be linked against libcurl to download https:// URLs,
+  so CRAM users who want to get references from the EBI will need to
+  run configure and ensure libcurl support is enabled using the
+  --enable-libcurl option.
+
+* Added libdeflate as a build option for alternative faster compression and
+  decompression.  Results vary by CPU but compression should be twice as fast
+  and decompression faster.
+
+* It is now possible to set the compression level in bgzip.  (#675; thanks
+  to Nathan Weeks).
+
+* bgzip now gets its own manual page.
+
+* CRAM encoding now stored MD and NM tags verbatim where the reference
+  contains 'N' characters, to work around ambiguities in the SAM
+  specification (samtools #717/762).
+  Also added "store_md" and "store_nm" cram-options for forcing these
+  tags to be stored at all locations.  This is best when combined with
+  a subsequent decode_md=0 option while reading CRAM.
+
+* Multiple CRAM bug fixes, including a fix to free and the subsequent reuse of
+  references with `-T ref.fa`. (#654; reported by Chris Saunders)
+
+* CRAM multi-threading bugs fixed: don't try to call flush on reading;
+  processing of multiple range queries; problems with multi-slice containers.
+
+* Fixed crashes caused when decoding some cramtools produced CRAM files.
+
+* Fixed a couple of minor rANS issues with handling invalid data.
+
+* Fixed bug where probaln_glocal() tried to allocate far more memory than
+  needed when the query sequence was much longer than the reference.  This
+  caused crashes in samtools and bcftools mpileup when used on data with very
+  long reads. (#572, problem reported by Felix Bemm via minimap2).
+
+* sam_prop_realn() now returns -1 (the same value as for unmapped reads)
+  on reads that do not include at least one 'M', 'X' or '=' CIGAR operator,
+  and no longer adds BQ or ZQ tags.  BAQ adjustments are only made to bases
+  covered by these operators so there is no point in trying to align
+  reads that do not have them. (#572)
+
+Noteworthy changes in release 1.7 (26th January 2018)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* BAM: HTSlib now supports BAMs which include CIGARs with more than 
+  65535 operations as per HTS-Specs 18th November (dab57f4 and 2f915a8).
+
+* BCF/VCF:
+  - Removed the need for long double in pileup calculations.
+  - Sped up the synced reader in some situations.
+  - Bug fixing: removed memory leak in bcf_copy.
+
+* CRAM:
+  - Added support for HTS_IDX_START in cram iterators.
+  - Easier to build when lzma header files are absent.
+  - Bug fixing: a region query with REQUIRED_FIELDS option to
+    disable sequence retrieval now gives correct results.
+  - Bug fixing: stop queries to regions starting after the last
+    read on a chromosome from incorrectly reporting errors
+    (#651, #653; reported by Imran Haque and @egafni via pysam).
+
+* Multi-region iterator: The new structure takes a list of regions and
+  iterates over all, deduplicating reads in the process, and producing a 
+  full list of file offset intervals. This is usually much faster than 
+  repeatedly using the old single-region iterator on a series of regions.
+
+* Curl improvements:
+  - Add Bearer token support via HTS_AUTH_LOCATION env (#600).
+  - Use CURL_CA_BUNDLE environment variable to override the CA (#622; 
+    thanks to Garret Kelly & David Alexander).
+  - Speed up (removal of excessive waiting) for both http(s) and ftp.
+  - Avoid repeatedly reconnecting by removal of unnecessary seeks.
+  - Bug fixing: double free when libcurl_open fails.
+
+* BGZF block caching, if enabled, now performs far better (#629; reported
+  by Ram Yalamanchili).
+
+* Added an hFILE layer for in-memory I/O buffers (#590; thanks to Thomas 
+  Hickman).
+
+* Tidied up the drand48 support (intended for systems that do not
+  provide this function).
+
+Noteworthy changes in release 1.6 (28th September 2017)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* Fixed bug where iterators on CRAM files did not propagate error return
+  values to the caller correctly.  Thanks go to Chris Saunders.
+
+* Overhauled Windows builds.  Building with msys2/mingw64 now works
+  correctly and passes all tests.
+
+* More improvements to logging output (thanks again to Anders Kaplan).
+
+* Return codes from sam_read1() when reading cram have been made
+  consistent with those returned when reading sam/bam.  Thanks to
+  Chris Saunders (#575).
+
+* BGZF CRC32 checksums are now always verified.
+
+* It's now possible to set nthreads = 1 for cram files.
+
+* hfile_libcurl has been modified to make it thread-safe.  It's also
+  better at handling web servers that do not honour byte range requests
+  when attempting to seek - it now sets errno to ESPIPE and keeps
+  the existing connection open so callers can revert to streaming mode
+  it they want to.
+
+* hfile_s3 now recalculates access tokens if they have become stale.  This
+  fixes a reported problem where authentication failed after a file
+  had been in use for more than 15 minutes.
+
+* Fixed bug where remote index fetches would fail to notice errors when
+  writing files.
+
+* bam_read1() now checks that the query sequence length derived from the
+  CIGAR alignment matches the sequence length in the BAM record.
+
+Noteworthy changes in release 1.5 (21st June 2017)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* Added a new logging API: hts_log(), along with hts_log_error(),
+  hts_log_warn() etc. convenience macros.  Thanks go to Anders Kaplan
+  for the implementation. (#499, #543, #551)
+
+* Added a new file I/O option "block_size" (HTS_OPT_BLOCK_SIZE) to
+  alter the hFILE buffer size.
+
+* Fixed various bugs, including compilation issues samtools/bcftools#610,
+  samtools/bcftools#611 and robustness to corrupted data #537, #538,
+  #541, #546, #548, #549, #554.
+
+
+Noteworthy changes in release 1.4.1  (8th May 2017)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This is primarily a security bug fix update.
+
+* Fixed SECURITY (CVE-2017-1000206) issue with buffer overruns with malicious data. (#514).
+
+* S3 support for non Amazon AWS endpoints. (#506)
+
+* Support for variant breakpoints in bcftools. (#516)
+
+* Improved handling of BCF NaNs. (#485)
+
+* Compilation / portability improvements. (#255, #423, #498, #488)
+
+* Miscellaneous bug fixes (#482, #521, #522, #523, #524).
+
+* Sanitise headers (#509)
+
+
+Release 1.4 (13 March 2017)
+
+* Incompatible changes: several functions and data types have been changed
+  in this release, and the shared library soversion has been bumped to 2.
+
+  - bam_pileup1_t has an additional field (which holds user data)
+  - bam1_core_t has been modified to allow for >64K CIGAR operations
+    and (along with bam1_t) so that CIGAR entries are aligned in memory
+  - hopen() has vararg arguments for setting URL scheme-dependent options
+  - the various tbx_conf_* presets are now const
+  - auxiliary fields in bam1_t are now always stored in little-endian byte
+    order (previously this depended on if you read a bam, sam or cram file)
+  - index metadata (accessible via hts_idx_get_meta()) is now always
+    stored in little-endian byte order (previously this depended on if
+    the index was in tbi or csi format)
+  - bam_aux2i() now returns an int64_t value
+  - fai_load() will no longer save local copies of remote fasta indexes
+  - hts_idx_get_meta() now takes a uint32_t * for l_meta (was int32_t *)
+
+* HTSlib now links against libbz2 and liblzma by default.  To remove these
+  dependencies, run configure with options --disable-bz2 and --disable-lzma,
+  but note that this may make some CRAM files produced elsewhere unreadable. 
+
+* Added a thread pool interface and replaced the bgzf multi-threading
+  code to use this pool.  BAM and CRAM decoding is now multi-threaded
+  too, using the pool to automatically balance the number of threads
+  between decode, encode and any data processing jobs.
+
+* New errmod_cal(), probaln_glocal(), sam_cap_mapq(), and sam_prob_realn()
+  functions, previously internal to SAMtools, have been added to HTSlib.
+
+* Files can now be accessed via Google Cloud Storage using gs: URLs, when
+  HTSlib is configured to use libcurl for network file access rather than
+  the included basic knetfile networking.
+
+* S3 file access now also supports the "host_base" setting in the
+  $HOME/.s3cfg configuration file.
+
+* Data URLs ("data:,text") now follow the standard RFC 2397 format and may
+  be base64-encoded (when written as "data:;base64,text") or may include
+  percent-encoded characters.  HTSlib's previous over-simplified "data:text"
+  format is no longer supported -- you will need to add an initial comma.
+
+* When plugins are enabled, S3 support is now provided by a separate
+  hfile_s3 plugin rather than by hfile_libcurl itself as previously.
+  When --enable-libcurl is used, by default both GCS and S3 support
+  and plugins will also be built; they can be individually disabled
+  via --disable-gcs and --disable-s3.
+
+* The iRODS file access plugin has been moved to a separate repository.
+  Configure no longer has a --with-irods option; instead build the plugin
+  found at <https://github.com/samtools/htslib-plugins>.
+
+* APIs to portably read and write (possibly unaligned) data in little-endian
+  byte order have been added.
+
+* New functions bam_auxB_len(), bam_auxB2i() and bam_auxB2f() have been
+  added to make accessing array-type auxiliary data easier.  bam_aux2i()
+  can now return the full range of values that can be stored in an integer
+  tag (including unsigned 32 bit tags).  bam_aux2f() will return the value
+  of integer tags (as a double) as well as floating-point ones.  All of
+  the bam_aux2 and bam_auxB2 functions will set errno if the requested
+  conversion is not valid.
+
+* New functions fai_load3() and fai_build3() allow fasta indexes to be
+  stored in a different location to the indexed fasta file.
+
+* New functions bgzf_index_dump_hfile() and bgzf_index_load_hfile()
+  allow bgzf index files (.gzi) to be written to / read from an existing
+  hFILE handle.
+
+* hts_idx_push() will report when trying to add a range to an index that
+  is beyond the limits that the given index can handle.  This means trying
+  to index chromosomes longer than 2^29 bases with a .bai or .tbi index
+  will report an error instead of apparantly working but creating an invalid
+  index entry.
+
+* VCF formatting is now approximately 4x faster.  (Whether this is
+  noticable depends on what was creating the VCF.)
+
+* CRAM lossy_names mode now works with TLEN of 0 or TLEN within +/- 1
+  of the computed value.  Note in these situations TLEN will be
+  generated / fixed during CRAM decode.
+
+* CRAM now supports bzip2 and lzma codecs.  Within htslib these are
+  disabled by default, but can be enabled by specifying "use_bzip2" or
+  "use_lzma" in an hts_opt_add() call or via the mode string of the
+  hts_open_format() function.
+
+Noteworthy changes in release 1.3.2  (13 September 2016)
+
+* Corrected bin calculation when converting directly from CRAM to BAM.
+  Previously a small fraction of converted reads would fail Picard's
+  validation with "bin field of BAM record does not equal value computed"
+  (SAMtools issue #574).
+
+* Plugins can now signal to HTSlib which of RTLD_LOCAL and RTLD_GLOBAL
+  they wish to be opened with -- previously they were always RTLD_LOCAL.
+
+
+Noteworthy changes in release 1.3.1  (22 April 2016)
+
+* Improved error checking and reporting, especially of I/O errors when
+  writing output files (#17, #315, PR #271, PR #317).
+
+* Build fixes for 32-bit systems; be sure to run configure to enable
+  large file support and access to 2GiB+ files.
+
+* Numerous VCF parsing fixes (#321, #322, #323, #324, #325; PR #370).
+  Particular thanks to Kostya Kortchinsky of the Google Security Team
+  for testing and numerous input parsing bug reports.
+
+* HTSlib now prints an informational message when initially creating a
+  CRAM reference cache in the default location under your $HOME directory.
+  (No message is printed if you are using $REF_CACHE to specify a location.)
+
+* Avoided rare race condition when caching downloaded CRAM reference sequence
+  files, by using distinctive names for temporary files (in addition to O_EXCL,
+  which has always been used).  Occasional corruption would previously occur
+  when multiple tools were simultaneously caching the same reference sequences
+  on an NFS filesystem that did not support O_EXCL (PR #320).
+
+* Prevented race condition in file access plugin loading (PR #341).
+
+* Fixed mpileup memory leak, so no more "[bam_plp_destroy] memory leak [...]
+  Continue anyway" warning messages (#299).
+
+* Various minor CRAM fixes.
+
+* Fixed documentation problems #348 and #358.
+
+
+Noteworthy changes in release 1.3  (15 December 2015)
+
+* Files can now be accessed via HTTPS and Amazon S3 in addition to HTTP
+  and FTP, when HTSlib is configured to use libcurl for network file access
+  rather than the included basic knetfile networking.
+
+* HTSlib can be built to use remote access hFILE backends (such as iRODS
+  and libcurl) via a plugin mechanism.  This allows other backends to be
+  easily added and facilitates building tools that use HTSlib, as they
+  don't need to be linked with the backends' various required libraries.
+
+* When writing CRAM output, sam_open() etc now default to writing CRAM v3.0
+  rather than v2.1.
+
+* fai_build() and samtools faidx now accept initial whitespace in ">"
+  headers (e.g., "> chr1 description" is taken to refer to "chr1").
+
+* tabix --only-header works again (was broken in 1.2.x; #249).
+
+* HTSlib's configure script and Makefile now fully support the standard
+  convention of allowing CC/CPPFLAGS/CFLAGS/LDFLAGS/LIBS to be overridden
+  as needed.  Previously the Makefile listened to $(LDLIBS) instead; if you
+  were overriding that, you should now override LIBS rather than LDLIBS.
+
+* Fixed bugs #168, #172, #176, #197, #206, #225, #245, #265, #295, and #296.
+
+
+Noteworthy changes in release 1.2.1  (3 February 2015)
+
+* Reinstated hts_file_type() and FT_* macros, which were available until 1.1
+  but briefly removed in 1.2.  This function is deprecated and will be removed
+  in a future release -- you should use hts_detect_format() etc instead
+
+
+Noteworthy changes in release 1.2  (2 February 2015)
+
+* HTSlib now has a configure script which checks your build environment
+  and allows for selection of optional extras.  See INSTALL for details
+
+* By default, reference sequences are fetched from the EBI CRAM Reference
+  Registry and cached in your $HOME cache directory.  This behaviour can
+  be controlled by setting REF_PATH and REF_CACHE enviroment variables
+  (see the samtools(1) man page for details)
+
+* Numerous CRAM improvements:
+  - Support for CRAM v3.0, an upcoming revision to CRAM supporting
+    better compression and per-container checksums
+  - EOF checking for v2.1 and v3.0 (similar to checking BAM EOF blocks)
+  - Non-standard values for PNEXT and TLEN fields are now preserved
+  - hts_set_fai_filename() now provides a reference file when encoding
+  - Generated read names are now numbered from 1, rather than being
+    labelled 'slice:record-in-slice'
+  - Multi-threading and speed improvements
+
+* New htsfile command for identifying file formats, and corresponding
+  file format detection APIs
+
+* New tabix --regions FILE, --targets FILE options for filtering via BED files
+
+* Optional iRODS file access, disabled by default.  Configure with --with-irods
+  to enable accessing iRODS data objects directly via 'irods:DATAOBJ'
+
+* All occurences of 2^29 in the source have been eliminated, so indexing
+  and querying against reference sequences larger than 512Mbp works (when
+  using CSI indices)
+
+* Support for plain GZIP compression in various places
+
+* VCF header editing speed improvements
+
+* Added seq_nt16_int[] (equivalent to the samtools API's bam_nt16_nt4_table)
+
+* Reinstated faidx_fetch_nseq(), which was accidentally removed from 1.1.
+  Now faidx_fetch_nseq() and faidx_nseq() are equivalent; eventually
+  faidx_fetch_nseq() will be deprecated and removed [#156]
+
+* Fixed bugs #141, #152, #155, #158, #159, and various memory leaks
--- a/README.md
+++ b/README.md
-HTSlib is an implementation of a unified C library for accessing common file
-formats, such as [SAM, CRAM and VCF][1], used for high-throughput sequencing
-data, and is the core library used by [samtools][2] and [bcftools][3].
-HTSlib only depends on [zlib][4].
-It is known to be compatible with gcc, g++ and clang.
-
-HTSlib implements a generalized BAM index, with file extension `.csi`
-(coordinate-sorted index). The HTSlib file reader first looks for the new index
-and then for the old if the new index is absent.
-
-This project also includes the popular tabix indexer, which indexes both `.tbi`
-and `.csi` formats, and the bgzip compression utility.
-
-[1]: http://samtools.github.io/hts-specs/
-[2]: http://samtools.github.io/bcftools/
-[3]: http://github.com/samtools/samtools
-[4]: http://zlib.net/
--- a/bcf_sr_sort.c
+++ b/bcf_sr_sort.c
--- a/bcf_sr_sort.h
+++ b/bcf_sr_sort.h
+/*
+    Copyright (C) 2017 Genome Research Ltd.
+
+    Author: Petr Danecek <pd3@sanger.ac.uk>
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+    THE SOFTWARE.
+*/
+
+/*
+    Reorder duplicate lines so that compatible variant types are
+    returned together by bcf_sr_next_line()
+
+    - readers grouped by variants. Even with many readers there will be
+      typically only several groups
+
+*/
+
+#ifndef __BCF_SR_SORT_H__
+#define __BCF_SR_SORT_H__
+
+#include "htslib/synced_bcf_reader.h"
+#include "htslib/kbitset.h"
+
+typedef struct
+{
+    int nrec, mrec;
+    bcf1_t **rec;
+}
+vcf_buf_t;
+
+typedef struct
+{
+    char *str;      // "A>C" for biallelic records or "A>C,A>CC" for multiallelic records
+    int type;       // VCF_SNP, VCF_REF, etc.
+    int nalt;       // number of alternate alleles in this record
+    int nvcf, mvcf, *vcf;   // the list of readers with the same variants
+    bcf1_t **rec;           // list of VCF records in the readers
+    kbitset_t *mask;        // which groups contain the variant
+}
+var_t;
+
+typedef struct
+{
+    char *key;              // only for debugging
+    int nvar, mvar, *var;   // the variants and their type
+    int nvcf;               // number of readers with the same variants
+}
+grp_t;
+
+typedef struct
+{
+    int nvar, mvar, *var;   // list of compatible variants that can be output together
+    int cnt;                // number of readers in this group
+    kbitset_t *mask;        // which groups are populated in this set (replace with expandable bitmask)
+}
+varset_t;
+
+typedef struct
+{
+    uint8_t score[256];
+    int nvar, mvar;
+    var_t *var;             // list of all variants from all readers
+    int nvset, mvset;
+    int mpmat, *pmat;       // pairing matrix, i-th vset and j-th group accessible as i*ngrp+j
+    int ngrp, mgrp;
+    int mcnt, *cnt;         // number of VCF covered by a varset
+    grp_t *grp;             // list of VCF representatives, each with a unique combination of duplicate lines
+    varset_t *vset;         // list of variant sets - combinations of compatible variants across multiple groups ready for output
+    vcf_buf_t *vcf_buf;     // records sorted in output order, for each VCF
+    bcf_srs_t *sr;
+    void *grp_str2int;
+    void *var_str2int;
+    kstring_t str;
+    int moff, noff, *off, mcharp;
+    char **charp;
+    const char *chr;
+    int pos, nsr, msr;
+    int pair;
+    int nactive, mactive, *active;  // list of readers with lines at the current pos
+}
+sr_sort_t;
+
+sr_sort_t *bcf_sr_sort_init(sr_sort_t *srt);
+void bcf_sr_sort_reset(sr_sort_t *srt);
+int bcf_sr_sort_next(bcf_srs_t *readers, sr_sort_t *srt, const char *chr, int pos);
+int bcf_sr_sort_set_active(sr_sort_t *srt, int i);
+int bcf_sr_sort_add_active(sr_sort_t *srt, int i);
+void bcf_sr_sort_destroy(sr_sort_t *srt);
+void bcf_sr_sort_remove_reader(bcf_srs_t *readers, sr_sort_t *srt, int i);
+
+#endif
--- a/bgzf.c
+++ b/bgzf.c
--- a/bgzip.1
+++ b/bgzip.1
+.TH bgzip 1 "18 July 2018" "htslib-1.9" "Bioinformatics tools"
+.SH NAME
+.PP
+bgzip \- Block compression/decompression utility
+.\"
+.\" Copyright (C) 2009-2011 Broad Institute.
+.\" Copyright (C) 2018 Genome Research Limited.
+.\"
+.\" Author: Heng Li <lh3@sanger.ac.uk>
+.\"
+.\" Permission is hereby granted, free of charge, to any person obtaining a
+.\" copy of this software and associated documentation files (the "Software"),
+.\" to deal in the Software without restriction, including without limitation
+.\" the rights to use, copy, modify, merge, publish, distribute, sublicense,
+.\" and/or sell copies of the Software, and to permit persons to whom the
+.\" Software is furnished to do so, subject to the following conditions:
+.\"
+.\" The above copyright notice and this permission notice shall be included in
+.\" all copies or substantial portions of the Software.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+.\" DEALINGS IN THE SOFTWARE.
+.\"
+.
+.\" For code blocks and examples (cf groff's Ultrix-specific man macros)
+.de EX
+
+.  in +\\$1
+.  nf
+.  ft CR
+..
+.de EE
+.  ft
+.  fi
+.  in
+
+..
+.SH SYNOPSIS
+.PP
+.B bgzip
+.RB [ -cdfhir ]
+.RB [ -b
+.IR virtualOffset ]
+.RB [ -I
+.IR index_name ]
+.RB [ -l
+.IR compression_level ]
+.RB [ -s
+.IR size ]
+.RB [ -@
+.IR threads ]
+.RI [ file ]
+.PP
+.SH DESCRIPTION
+.PP
+Bgzip compresses files in a similar manner to, and compatible with, gzip(1).
+The file is compressed into a series of small (less than 64K) 'BGZF' blocks.
+This allows indexes to be built against the compressed file and used to
+retrieve portions of the data without having to decompress the entire file. 
+
+If no files are specified on the command line, bgzip will compress (or
+decompress if the -d option is used) standard input to standard output.
+If a file is specified, it will be compressed (or decompressed with -d).
+If the -c option is used, the result will be written to standard output,
+otherwise when compressing bgzip will write to a new file with a .gz
+suffix and remove the original.  When decompressing the input file must
+have a .gz suffix, which will be removed to make the output name.  Again
+after decompression completes the input file will be removed.
+
+.SH OPTIONS
+.TP 10
+.BI "-b, --offset " INT
+Decompress to standard output from virtual file position (0-based uncompressed
+offset).
+Implies -c and -d.
+.TP
+.B "-c, --stdout"
+Write to standard output, keep original files unchanged.
+.TP
+.B "-d, --decompress"
+Decompress.
+.TP
+.B "-f, --force"
+Overwrite files without asking.
+.TP
+.B "-h, --help"
+Displays a help message.
+.TP
+.B "-i, --index"
+Create a BGZF index while compressing.
+Unless the -I option is used, this will have the name of the compressed
+file with .gzi appended to it.
+.TP
+.BI "-I, --index-name " FILE
+Index file name.
+.TP
+.BI "-l, --compress-level " INT
+Compression level to use when compressing.
+From 0 to 9, or -1 for the default level set by the compression library. [-1]
+.TP
+.B "-r, --reindex"
+Rebuild the index on an existing compressed file.
+.TP
+.B "-g, --rebgzip"
+Try to use an existing index to create a compressed file with matching
+block offsets.
+Note that this assumes that the same compression library and level are in use
+as when making the original file.
+Don't use it unless you know what you're doing.
+.TP
+.BI "-s, --size " INT
+Decompress INT bytes (uncompressed size) to standard output.
+Implies -c.
+.TP
+.BI "-@, --threads " INT
+Number of threads to use [1].
+.PP
+
+.SH BGZF FORMAT
+The BGZF format written by bgzip is described in the SAM format specification
+available from http://samtools.github.io/hts-specs/SAMv1.pdf.
+
+It makes use of a gzip feature which allows compressed files to be
+concatenated.
+The input data is divided into blocks which are no larger than 64 kilobytes
+both before and after compression (including compression headers).
+Each block is compressed into a gzip file.
+The gzip header includes an extra sub-field with identifier 'BC' and the length
+of the compressed block, including all headers.
+
+.SH GZI FORMAT
+The index format is a binary file listing pairs of compressed and
+uncompressed offsets in a BGZF file.
+Each compressed offset points to the start of a BGZF block.
+The uncompressed offset is the corresponding location in the uncompressed
+data stream.
+
+All values are stored as little-endian 64-bit unsigned integers.
+
+The file contents are:
+.EX 4
+uint64_t number_entries
+.EE
+followed by number_entries pairs of:
+.EX 4
+uint64_t compressed_offset
+uint64_t uncompressed_offset
+.EE
+
+.SH EXAMPLES
+.EX 4
+# Compress stdin to stdout
+bgzip < /usr/share/dict/words > /tmp/words.gz
+
+# Make a .gzi index
+bgzip -r /tmp/words.gz
+
+# Extract part of the data using the index
+bgzip -b 367635 -s 4 /tmp/words.gz 
+
+# Uncompress the whole file, removing the compressed copy
+bgzip -d /tmp/words.gz
+.EE
+
+.SH AUTHOR
+.PP
+The BGZF library was originally implemented by Bob Handsaker and modified
+by Heng Li for remote file access and in-memory caching.
+
+.SH SEE ALSO
+.PP
+.BR gzip (1),
+.BR tabix (1)
--- a/bgzip.c
+++ b/bgzip.c
 /* bgzip.c -- Block compression/decompression utility.

   Copyright (C) 2008, 2009 Broad Institute / Massachusetts Institute of Technology
-   Copyright (C) 2010, 2013, 2014 Genome Research Ltd.
+   Copyright (C) 2010, 2013-2018 Genome Research Ltd.

   Permission is hereby granted, free of charge, to any person obtaining a copy
   of this software and associated documentation files (the "Software"), to deal
@@ -22,6 +22,8 @@
   THE SOFTWARE.
 */

+#include <config.h>
+
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
@@ -30,11 +32,16 @@
 #include <errno.h>
 #include <stdarg.h>
 #include <getopt.h>
-#include <sys/select.h>
+#include <inttypes.h>
 #include <sys/stat.h>
 #include "htslib/bgzf.h"
 #include "htslib/hts.h"

+#ifdef _WIN32
+#  define WIN32_LEAN_AND_MEAN
+#  include <windows.h>
+#endif
+
 static const int WINDOW_SIZE = 64 * 1024;

 static void error(const char *format, ...)
@@ -46,27 +53,19 @@ static void error(const char *format, ...)
    exit(EXIT_FAILURE);
 }

-static int write_open(const char *fn, int is_forced)
+static int confirm_overwrite(const char *fn)
 {
-    int fd = -1;
+    int save_errno = errno;
+    int ret = 0;
+
+    if (isatty(STDIN_FILENO)) {
        char c;
-    if (!is_forced) {
-        if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0666)) < 0 && errno == EEXIST) {
        fprintf(stderr, "[bgzip] %s already exists; do you wish to overwrite (y or n)? ", fn);
-            if ( scanf("%c", &c) != 1 ) c = 'n';
-            if (c != 'Y' && c != 'y') {
-                fprintf(stderr, "[bgzip] not overwritten\n");
-                exit(EXIT_FAILURE);
-            }
-        }
+        if (scanf("%c", &c) == 1 && (c == 'Y' || c == 'y')) ret = 1;
    }
-    if (fd < 0) {
-        if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0666)) < 0) {
-            fprintf(stderr, "[bgzip] %s: Fail to write\n", fn);
-            exit(EXIT_FAILURE);
-        }
-    }
-    return fd;
+
+    errno = save_errno;
+    return ret;
 }

 static int bgzip_main_usage(void)
@@ -82,36 +81,46 @@ static int bgzip_main_usage(void)
    fprintf(stderr, "   -h, --help                 give this help\n");
    fprintf(stderr, "   -i, --index                compress and create BGZF index\n");
    fprintf(stderr, "   -I, --index-name FILE      name of BGZF index file [file.gz.gzi]\n");
+    fprintf(stderr, "   -l, --compress-level INT   Compression level to use when compressing; 0 to 9, or -1 for default [-1]\n");
    fprintf(stderr, "   -r, --reindex              (re)index compressed file\n");
+    fprintf(stderr, "   -g, --rebgzip              use an index file to bgzip a file\n");
    fprintf(stderr, "   -s, --size INT             decompress INT bytes (uncompressed size)\n");
+    fprintf(stderr, "   -@, --threads INT          number of compression threads to use [1]\n");
+    fprintf(stderr, "   -t, --test                 test integrity of compressed file");
    fprintf(stderr, "\n");
    return 1;
 }

 int main(int argc, char **argv)
 {
-    int c, compress, pstdout, is_forced, index = 0, reindex = 0;
+    int c, compress, compress_level = -1, pstdout, is_forced, test, index = 0, rebgzip = 0, reindex = 0;
    BGZF *fp;
    void *buffer;
    long start, end, size;
    char *index_fname = NULL;
+    int threads = 1;

-    static struct option loptions[] =
+    static const struct option loptions[] =
    {
-        {"help",0,0,'h'},
-        {"offset",1,0,'b'},
-        {"stdout",0,0,'c'},
-        {"decompress",0,0,'d'},
-        {"force",0,0,'f'},
-        {"index",0,0,'i'},
-        {"index-name",1,0,'I'},
-        {"reindex",0,0,'r'},
-        {"size",1,0,'s'},
-        {0,0,0,0}
+        {"help", no_argument, NULL, 'h'},
+        {"offset", required_argument, NULL, 'b'},
+        {"stdout", no_argument, NULL, 'c'},
+        {"decompress", no_argument, NULL, 'd'},
+        {"force", no_argument, NULL, 'f'},
+        {"index", no_argument, NULL, 'i'},
+        {"index-name", required_argument, NULL, 'I'},
+        {"compress-level", required_argument, NULL, 'l'},
+        {"reindex", no_argument, NULL, 'r'},
+        {"rebgzip",no_argument,NULL,'g'},
+        {"size", required_argument, NULL, 's'},
+        {"threads", required_argument, NULL, '@'},
+        {"test", no_argument, NULL, 't'},
+        {"version", no_argument, NULL, 1},
+        {NULL, 0, NULL, 0}
    };

-    compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0;
-    while((c  = getopt_long(argc, argv, "cdh?fb:s:iI:r",loptions,NULL)) >= 0){
+    compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0; test = 0;
+    while((c  = getopt_long(argc, argv, "cdh?fb:@:s:iI:l:grt",loptions,NULL)) >= 0){
        switch(c){
        case 'd': compress = 0; break;
        case 'c': pstdout = 1; break;
@@ -120,7 +129,16 @@ int main(int argc, char **argv)
        case 'f': is_forced = 1; break;
        case 'i': index = 1; break;
        case 'I': index_fname = optarg; break;
+        case 'l': compress_level = atol(optarg); break;
+        case 'g': rebgzip = 1; break;
        case 'r': reindex = 1; compress = 0; break;
+        case '@': threads = atoi(optarg); break;
+        case 't': test = 1; compress = 0; reindex = 0; break;
+        case 1:
+            printf(
+"bgzip (htslib) %s\n"
+"Copyright (C) 2018 Genome Research Ltd.\n", hts_version());
+            return EXIT_SUCCESS;
        case 'h':
        case '?': return bgzip_main_usage();
        }
@@ -133,7 +151,17 @@ int main(int argc, char **argv)
    if (compress == 1) {
        struct stat sbuf;
        int f_src = fileno(stdin);
-        int f_dst = fileno(stdout);
+        char out_mode[3] = "w\0";
+        char out_mode_exclusive[4] = "wx\0";
+
+        if (compress_level < -1 || compress_level > 9) {
+            fprintf(stderr, "[bgzip] Invalid compress-level: %d\n", compress_level);
+            return 1;
+        }
+        if (compress_level >= 0) {
+            out_mode[1] = compress_level + '0';
+            out_mode_exclusive[2] = compress_level + '0';
+        }

        if ( argc>optind )
        {
@@ -149,14 +177,20 @@ int main(int argc, char **argv)
            }

            if (pstdout)
-                f_dst = fileno(stdout);
+                fp = bgzf_open("-", out_mode);
            else
            {
                char *name = malloc(strlen(argv[optind]) + 5);
                strcpy(name, argv[optind]);
                strcat(name, ".gz");
-                f_dst = write_open(name, is_forced);
-                if (f_dst < 0) return 1;
+                fp = bgzf_open(name, is_forced? out_mode : out_mode_exclusive);
+                if (fp == NULL && errno == EEXIST && confirm_overwrite(name))
+                    fp = bgzf_open(name, out_mode);
+                if (fp == NULL) {
+                    fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno));
+                    free(name);
+                    return 1;
+                }
                free(name);
            }
        }
@@ -167,17 +201,48 @@ int main(int argc, char **argv)
            fprintf(stderr, "[bgzip] Index file name expected when writing to stdout\n");
            return 1;
        }
+        else
+            fp = bgzf_open("-", out_mode);
+
+        if ( index && rebgzip )
+        {
+            fprintf(stderr, "[bgzip] Can't produce a index and rebgzip simultaneously\n");
+            return 1;
+        }
+
+        if ( rebgzip && !index_fname )
+        {
+            fprintf(stderr, "[bgzip] Index file name expected when writing to stdout\n");
+            return 1;
+        }
+
+        if (threads > 1)
+            bgzf_mt(fp, threads, 256);

-        fp = bgzf_fdopen(f_dst, "w");
        if ( index ) bgzf_index_build_init(fp);
        buffer = malloc(WINDOW_SIZE);
+#ifdef _WIN32
+        _setmode(f_src, O_BINARY);
+#endif
+        if (rebgzip){
+            if ( bgzf_index_load(fp, index_fname, NULL) < 0 ) error("Could not load index: %s.gzi\n", argv[optind]);
+
+            while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0)
+                if (bgzf_block_write(fp, buffer, c) < 0) error("Could not write %d bytes: Error %d\n", c, fp->errcode);
+        }
+        else {
            while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0)
                if (bgzf_write(fp, buffer, c) < 0) error("Could not write %d bytes: Error %d\n", c, fp->errcode);
-        // f_dst will be closed here
+        }
        if ( index )
        {
-            if ( index_fname ) bgzf_index_dump(fp, index_fname, NULL);
-            else bgzf_index_dump(fp, argv[optind], ".gz.gzi");
+            if (index_fname) {
+                if (bgzf_index_dump(fp, index_fname, NULL) < 0)
+                    error("Could not write index to '%s'\n", index_fname);
+            } else {
+                if (bgzf_index_dump(fp, argv[optind], ".gz.gzi") < 0)
+                    error("Could not write index to '%s.gz.gzi'", argv[optind]);
+            }
        }
        if (bgzf_close(fp) < 0) error("Close failed: Error %d", fp->errcode);
        if (argc > optind && !pstdout) unlink(argv[optind]);
@@ -195,7 +260,7 @@ int main(int argc, char **argv)
        else
        {
            if ( !index_fname ) error("[bgzip] Index file name expected when reading from stdin\n");
-            fp = bgzf_fdopen(fileno(stdin), "r");
+            fp = bgzf_open("-", "r");
            if ( !fp ) error("[bgzip] Could not read from stdin: %s\n", strerror(errno));
        }

@@ -206,10 +271,13 @@ int main(int argc, char **argv)
        free(buffer);
        if ( ret<0 ) error("Is the file gzipped or bgzipped? The latter is required for indexing.\n");

-        if ( index_fname )
-            bgzf_index_dump(fp, index_fname, NULL);
-        else
-            bgzf_index_dump(fp, argv[optind], ".gzi");
+        if ( index_fname ) {
+            if (bgzf_index_dump(fp, index_fname, NULL) < 0)
+                error("Could not write index to '%s'\n", index_fname);
+        } else {
+            if (bgzf_index_dump(fp, argv[optind], ".gzi") < 0)
+                error("Could not write index to '%s.gzi'\n", argv[optind]);
+        }

        if ( bgzf_close(fp)<0 ) error("Close failed: Error %d\n",fp->errcode);
        return 0;
@@ -228,7 +296,7 @@ int main(int argc, char **argv)
            }
            char *name;
            int len = strlen(argv[optind]);
-            if ( strcmp(argv[optind]+len-3,".gz") )
+            if ( strcmp(argv[optind]+len-3,".gz") && !test)
            {
                fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]);
                return 1;
@@ -239,13 +307,21 @@ int main(int argc, char **argv)
                return 1;
            }

-            if (pstdout) {
+            if (pstdout || test) {
                f_dst = fileno(stdout);
            }
            else {
+                const int wrflags = O_WRONLY | O_CREAT | O_TRUNC;
                name = strdup(argv[optind]);
                name[strlen(name) - 3] = '\0';
-                f_dst = write_open(name, is_forced);
+                f_dst = open(name, is_forced? wrflags : wrflags|O_EXCL, 0666);
+                if (f_dst < 0 && errno == EEXIST && confirm_overwrite(name))
+                    f_dst = open(name, wrflags, 0666);
+                if (f_dst < 0) {
+                    fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno));
+                    free(name);
+                    return 1;
+                }
                free(name);
            }
        }
@@ -254,31 +330,47 @@ int main(int argc, char **argv)
        else
        {
            f_dst = fileno(stdout);
-            fp = bgzf_fdopen(fileno(stdin), "r");
+            fp = bgzf_open("-", "r");
            if (fp == NULL) {
                fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno));
                return 1;
            }
        }
+
+        if (!fp->is_compressed) {
+            fprintf(stderr, "[bgzip] Expected compressed file -- ignored\n");
+            return 1;
+        }
+
+        if (threads > 1)
+            bgzf_mt(fp, threads, 256);
+
        buffer = malloc(WINDOW_SIZE);
        if ( start>0 )
        {
            if ( bgzf_index_load(fp, argv[optind], ".gzi") < 0 ) error("Could not load index: %s.gzi\n", argv[optind]);
            if ( bgzf_useek(fp, start, SEEK_SET) < 0 ) error("Could not seek to %d-th (uncompressd) byte\n", start);
        }
+#ifdef _WIN32
+        _setmode(f_dst, O_BINARY);
+#endif
        while (1) {
            if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE);
            else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));
            if (c == 0) break;
-            if (c < 0) error("Could not read %d bytes: Error %d\n", (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start), fp->errcode);
+            if (c < 0) error("Error %d in block starting at offset %" PRId64 "(%" PRIX64 ")\n", fp->errcode, fp->block_address, fp->block_address);
            start += c;
-            if ( write(f_dst, buffer, c) != c ) error("Could not write %d bytes\n", c);
+            if ( !test && write(f_dst, buffer, c) != c ) {
+#ifdef _WIN32
+                if (GetLastError() != ERROR_NO_DATA)
+#endif
+                error("Could not write %d bytes\n", c);
+            }
            if (end >= 0 && start >= end) break;
        }
        free(buffer);
        if (bgzf_close(fp) < 0) error("Close failed: Error %d\n",fp->errcode);
-        if (!pstdout) unlink(argv[optind]);
+        if (!pstdout && !test) unlink(argv[optind]);
        return 0;
    }
-    return 0;
 }
--- a/config.h
+++ b/config.h
-#define _USE_KNETFILE
-#define BGZF_CACHE
-#define BGZF_MT
--- a/config.mk.in
+++ b/config.mk.in
+#  Optional configure Makefile overrides for htslib.
+#
+#    Copyright (C) 2015-2017 Genome Research Ltd.
+#
+#    Author: John Marshall <jm18@sanger.ac.uk>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# This is @configure_input@
+#
+# If you use configure, this file overrides variables and augments rules
+# in the Makefile to reflect your configuration choices.  If you don't run
+# configure, the main Makefile contains suitable conservative defaults.
+
+prefix       = @prefix@
+exec_prefix  = @exec_prefix@
+bindir       = @bindir@
+includedir   = @includedir@
+libdir       = @libdir@
+libexecdir   = @libexecdir@
+datarootdir  = @datarootdir@
+mandir       = @mandir@
+
+CC     = @CC@
+RANLIB = @RANLIB@
+
+CPPFLAGS = @CPPFLAGS@
+CFLAGS   = @CFLAGS@
+LDFLAGS  = @LDFLAGS@
+LIBS     = @LIBS@
+
+PLATFORM   = @PLATFORM@
+PLUGIN_EXT = @PLUGIN_EXT@
+
+# Lowercase here indicates these are "local" to config.mk
+plugin_OBJS =
+noplugin_LDFLAGS =
+noplugin_LIBS =
+
+# ifeq/.../endif, +=, and target-specific variables are GNU Make-specific.
+# If you don't have GNU Make, comment out this conditional and note that
+# to enable libcurl you will need to implement the following elsewhere.
+ifeq "libcurl-@libcurl@" "libcurl-enabled"
+
+LIBCURL_LIBS = -lcurl
+
+plugin_OBJS += hfile_libcurl.o
+
+hfile_libcurl$(PLUGIN_EXT): LIBS += $(LIBCURL_LIBS)
+
+noplugin_LIBS += $(LIBCURL_LIBS)
+
+endif
+
+ifeq "gcs-@gcs@" "gcs-enabled"
+plugin_OBJS += hfile_gcs.o
+endif
+
+ifeq "s3-@s3@" "s3-enabled"
+plugin_OBJS += hfile_s3.o
+
+CRYPTO_LIBS = @CRYPTO_LIBS@
+noplugin_LIBS += $(CRYPTO_LIBS)
+hfile_s3$(PLUGIN_EXT): LIBS += $(CRYPTO_LIBS)
+endif
+
+ifeq "plugins-@enable_plugins@" "plugins-yes"
+
+plugindir  = @plugindir@
+pluginpath = @pluginpath@
+
+LIBHTS_OBJS += plugin.o
+PLUGIN_OBJS += $(plugin_OBJS)
+
+plugin.o plugin.pico: CPPFLAGS += -DPLUGINPATH=\"$(pluginpath)\"
+
+# When built as separate plugins, these record their version themselves.
+hfile_gcs.o hfile_gcs.pico: version.h
+hfile_libcurl.o hfile_libcurl.pico: version.h
+hfile_s3.o hfile_s3.pico: version.h
+
+# Windows DLL plugins depend on the import library, built as a byproduct.
+$(plugin_OBJS:.o=.cygdll): cyghts-$(LIBHTS_SOVERSION).dll
+
+else
+
+LIBHTS_OBJS += $(plugin_OBJS)
+LDFLAGS += $(noplugin_LDFLAGS)
+LIBS += $(noplugin_LIBS)
+
+endif
--- a/configure.ac
+++ b/configure.ac
--- a/cram/cram.h
+++ b/cram/cram.h
@@ -31,25 +31,18 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 /*! \file
 * CRAM interface.
 *
- * Consider using the higher level scram_*() API for programs that wish to
- * be file format agnostic.
+ * Consider using the higher level hts_*() API for programs that wish to
+ * be file format agnostic (see htslib/hts.h).
 *
 * This API should be used for CRAM specific code. The specifics of the
 * public API are implemented in cram_io.h, cram_encode.h and cram_decode.h
 * although these should not be included directly (use this file instead).
 */

-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef SAMTOOLS
-#  include "cram/cram_samtools.h"
-#endif
-
 #ifndef _CRAM_H_
 #define _CRAM_H_

+#include "cram/cram_samtools.h"
 #include "cram/sam_header.h"
 #include "cram_structs.h"
 #include "cram_io.h"
@@ -59,8 +52,10 @@ extern "C" {
 #include "cram_codecs.h"
 #include "cram_index.h"

-#endif
+// Validate against the external cram.h,
+//
+// This contains duplicated portions from cram_io.h and cram_structs.h,
+// so we want to ensure that the prototypes match.
+#include "htslib/cram.h"

-#ifdef __cplusplus
-}
 #endif
--- a/cram/cram_codecs.c
+++ b/cram/cram_codecs.c
--- a/cram/cram_codecs.h
+++ b/cram/cram_codecs.h
--- a/cram/cram_decode.c
+++ b/cram/cram_decode.c