Skip to content
Commits on Source (3)
###########################################################################
# Note: This Makefile is intended for development. For production
# builds, use build_dynamic/Makefile.
###########################################################################
# Does not currently support -DCPU_CHECK_...
BASEFLAGS=-g -mavx2 -mbmi -mbmi2 -mlzcnt -DZSTD_MULTITHREAD
# BASEFLAGS=-g -msse4.2 -DZSTD_MULTITHREAD
# BASEFLAGS=-g -DZSTD_MULTITHREAD
include Makefile.src
# Respect the environment:
# Use defaults below only if not set in env or make arguments
CC ?= gcc
CXX ?= g++
CFLAGS ?= -O3
CXXFLAGS ?= -O2
ZLIB ?= ../zlib-1.2.11/libz-64.a
BLASFLAGS64 ?= -llapack -lf77blas -latlas
ARCH32 ?=
# Mandatory flags added to defaults or env settings
CFLAGS += -std=gnu99 $(BASEFLAGS) $(CWARN) $(CINCLUDE)
CXXFLAGS += -std=c++14 $(BASEFLAGS) $(CXXWARN)
LDFLAGS += -lm -lpthread -L. $(ZLIB) -lzstd
# Installation defaults
MKDIR ?= mkdir
INSTALL ?= install
STRIP_CMD ?= strip
PREFIX ?= /usr/local
DESTDIR ?= .
UNAME := $(shell uname)
ifeq ($(UNAME), Darwin)
BLASFLAGS=-framework Accelerate
BLASFLAGS64=-framework Accelerate
LDFLAGS=-ldl -lpthread -lz -lzstd
endif
%.o: %.c
$(CC) -c $(CFLAGS) $(ARCH32) -o $@ $<
%.o: %.cc
$(CXX) -c $(CXXFLAGS) $(ARCH32) -o $@ $<
all: plink2 pgen_compress
# for clean build, "make clean" first
# Run mkdir for both plink2 and pgen_compress as we don't know which
# target will run first
plink2: $(OBJ_NO_ZSTD)
$(MKDIR) -p bin
$(CXX) $(ARCH32) $(OBJ_NO_ZSTD) -o bin/plink2 $(BLASFLAGS64) $(LDFLAGS)
# basic pgenlib_internal.h usage example; also needed for tests
pgen_compress: plink2_base.o pgenlib_internal.o pgen_compress.o
$(MKDIR) -p bin
$(CXX) plink2_base.o pgenlib_internal.o pgen_compress.o \
-o bin/pgen_compress
.PHONY: install-strip install clean
install-strip: install
$(STRIP_CMD) $(DESTDIR)$(PREFIX)/bin/*
install: all
$(MKDIR) -p $(DESTDIR)$(PREFIX)/bin
$(INSTALL) -c bin/* $(DESTDIR)$(PREFIX)/bin
clean:
rm -f $(CLEAN)
CWARN = -Wall -Wextra -Wshadow -Wformat-security -Wdouble-promotion -Wfloat-conversion
CXXWARN = ${CWARN} -Wold-style-cast
# Necessary for older gcc versions.
CWARN2 = -Wall -Wextra -Wshadow -Wformat-security
CXXWARN2 = ${CWARN2} -Wold-style-cast
CSRC = SFMT.c libdeflate/lib/adler32.c libdeflate/lib/aligned_malloc.c libdeflate/lib/crc32.c libdeflate/lib/deflate_compress.c libdeflate/lib/deflate_decompress.c libdeflate/lib/gzip_compress.c libdeflate/lib/gzip_decompress.c libdeflate/lib/zlib_compress.c libdeflate/lib/zlib_decompress.c libdeflate/lib/x86/cpu_features.c
ZCSRC = zstd/lib/common/debug.c zstd/lib/common/entropy_common.c zstd/lib/common/zstd_common.c zstd/lib/common/error_private.c zstd/lib/common/xxhash.c zstd/lib/common/fse_decompress.c zstd/lib/common/pool.c zstd/lib/common/threading.c zstd/lib/compress/fse_compress.c zstd/lib/compress/hist.c zstd/lib/compress/huf_compress.c zstd/lib/compress/zstd_double_fast.c zstd/lib/compress/zstd_fast.c zstd/lib/compress/zstd_lazy.c zstd/lib/compress/zstd_ldm.c zstd/lib/compress/zstd_opt.c zstd/lib/compress/zstd_compress.c zstd/lib/compress/zstdmt_compress.c zstd/lib/decompress/huf_decompress.c zstd/lib/decompress/zstd_decompress.c zstd/lib/decompress/zstd_ddict.c zstd/lib/decompress/zstd_decompress_block.c
CCSRC = plink2_base.cc pgenlib_internal.cc plink2.cc plink2_adjust.cc plink2_bgzf.cc plink2_cmdline.cc plink2_common.cc plink2_compress_stream.cc plink2_data.cc plink2_decompress.cc plink2_export.cc plink2_fasta.cc plink2_filter.cc plink2_glm.cc plink2_help.cc plink2_import.cc plink2_ld.cc plink2_matrix.cc plink2_matrix_calc.cc plink2_misc.cc plink2_psam.cc plink2_pvar.cc plink2_random.cc plink2_set.cc plink2_stats.cc plink2_string.cc plink2_text.cc plink2_thread.cc plink2_zstfile.cc
OBJ_NO_ZSTD = $(CSRC:.c=.o) $(CCSRC:.cc=.o)
OBJ = $(CSRC:.c=.o) $(ZCSRC:.c=.o) $(CCSRC:.cc=.o)
CSRC2 = $(foreach fname,$(CSRC),../$(fname))
ZCSRC2 = $(foreach fname,$(ZCSRC),../$(fname))
CCSRC2 = $(foreach fname,$(CCSRC),../$(fname))
OBJ2 = $(notdir $(OBJ))
OBJ3 = $(CSRC2:.c=.o) $(ZCSRC2:.c=.o) $(CCSRC2:.cc=.o)
CINCLUDE = -Ilibdeflate -Ilibdeflate/common
CINCLUDE2 = -I../libdeflate -I../libdeflate/common
ZSTD_INCLUDE = -Izstd/lib -Izstd/lib/common
ZSTD_INCLUDE2 = -I../zstd/lib -I../zstd/lib/common
CLEAN = *.o libdeflate/lib/*.o libdeflate/lib/x86/*.o zstd/lib/common/*.o zstd/lib/compress/*.o zstd/lib/decompress/*.o bin/plink2 bin/pgen_compress
CLEAN3 = $(foreach expr,$(CLEAN),../$(expr))
......@@ -2,5 +2,5 @@ This provides a basic Python API for pgenlib; see python_api.txt for details.
Cython and NumPy must be installed.
Build this with e.g.
python setup.py build_ext
[sudo] python setup.py install
python3 setup.py build_ext
[sudo] python3 setup.py install
This diff is collapsed.
#!/usr/bin/python
#!/usr/bin/env python3
from distutils.core import setup
from distutils.extension import Extension
from setuptools import setup
from setuptools.extension import Extension
from Cython.Build import cythonize
import numpy as np
ext_modules = [
Extension('pgenlib',
sources = ['pgenlib.pyx', '../pgenlib_python_support.cpp', '../pgenlib_internal.cpp'],
sources = ['pgenlib.pyx', '../pgenlib_ffi_support.cc', '../pgenlib_internal.cc', '../plink2_base.cc'],
language = "c++",
# do not compile as c++11, since cython doesn't yet support
# overload of uint32_t operator
# extra_compile_args = ["-std=c++11", "-Wno-unused-function"],
# extra_link_args = ["-std=c++11"],
extra_compile_args = ["-std=c++98", "-Wno-unused-function"],
extra_compile_args = ["-std=c++98", "-Wno-unused-function", "-Wno-macro-redefined"],
extra_link_args = ["-std=c++98"],
include_dirs = [np.get_include()]
)
......
......@@ -260,7 +260,11 @@ inline static double sfmt_genrand_real2(sfmt_t * sfmt)
*/
inline static double sfmt_to_real3(uint32_t v)
{
#ifdef __cplusplus
return (static_cast<double>(v) + 0.5)*(1.0/4294967296.0);
#else
return (((double)v) + 0.5)*(1.0/4294967296.0);
#endif
/* divided by 2^32 */
}
......@@ -282,7 +286,7 @@ inline static double sfmt_genrand_real3(sfmt_t * sfmt)
*/
inline static double sfmt_to_res53(uint64_t v)
{
return v * (1.0/18446744073709551616.0L);
return v * 0.0000000000000000000542101086242752217003726400434970855712890625;
}
/**
......@@ -305,7 +309,11 @@ inline static double sfmt_genrand_res53(sfmt_t * sfmt)
*/
inline static double sfmt_to_res53_mix(uint32_t x, uint32_t y)
{
#ifdef __cplusplus
return sfmt_to_res53(x | (static_cast<uint64_t>(y) << 32));
#else
return sfmt_to_res53(x | ((uint64_t)y << 32));
#endif
}
/**
......
This diff is collapsed.
/* The MIT License
Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
2011, 2012 Attractive Chaos <attractor@live.co.uk>
Copyright (C) 2009, 2013, 2014 Genome Research Ltd
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* The BGZF library was originally written by Bob Handsaker from the Broad
* Institute. It was later improved by the SAMtools developers. */
#ifndef HTSLIB_BGZF_H
#define HTSLIB_BGZF_H
#include <stdint.h>
#include <stdio.h>
// do not use zstd wrapper here
#ifdef STATIC_ZLIB
#include "../zlib-1.2.11/zlib.h"
#else
#include <zlib.h>
#endif
#include <sys/types.h>
#define BGZF_BLOCK_SIZE 0xff00 // make sure compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE
#define BGZF_MAX_BLOCK_SIZE 0x10000
#define BGZF_ERR_ZLIB 1
#define BGZF_ERR_HEADER 2
#define BGZF_ERR_IO 4
#define BGZF_ERR_MISUSE 8
struct hFILE;
struct bgzf_mtaux_t;
typedef struct __bgzidx_t bgzidx_t;
struct BGZF {
int errcode:16, is_write:2, is_be:2, compress_level:9, is_compressed:2, is_gzip:1;
int cache_size;
int block_length, block_offset;
int64_t block_address, uncompressed_address;
void *uncompressed_block, *compressed_block;
void *cache; // a pointer to a hash table
struct hFILE *fp; // actual file handle
struct bgzf_mtaux_t *mt; // only used for multi-threading
bgzidx_t *idx; // BGZF index
int idx_build_otf; // build index on the fly, set by bgzf_index_build_init()
z_stream *gz_stream;// for gzip-compressed files
};
#ifndef HTS_BGZF_TYPEDEF
typedef struct BGZF BGZF;
#define HTS_BGZF_TYPEDEF
#endif
#ifndef KSTRING_T
#define KSTRING_T kstring_t
typedef struct __kstring_t {
size_t l, m;
char *s;
} kstring_t;
#endif
#ifdef __cplusplus
extern "C" {
#endif
/******************
* Basic routines *
******************/
/**
* Open an existing file descriptor for reading or writing.
*
* @param fd file descriptor
* @param mode mode matching /[rwag][u0-9]+/: 'r' for reading, 'w' for
* writing, 'a' for appending, 'g' for gzip rather than BGZF
* compression (with 'w' only), and digit specifies the zlib
* compression level.
* Note that there is a distinction between 'u' and '0': the
* first yields plain uncompressed output whereas the latter
* outputs uncompressed data wrapped in the zlib format.
* @return BGZF file handler; 0 on error
*/
BGZF* bgzf_dopen(int fd, const char *mode);
#define bgzf_fdopen(fd, mode) bgzf_dopen((fd), (mode)) // for backward compatibility
/**
* Open the specified file for reading or writing.
*/
BGZF* bgzf_open(const char* path, const char *mode);
/**
* Open an existing hFILE stream for reading or writing.
*/
BGZF* bgzf_hopen(struct hFILE *fp, const char *mode);
/**
* Close the BGZF and free all associated resources.
*
* @param fp BGZF file handler
* @return 0 on success and -1 on error
*/
int bgzf_close(BGZF *fp);
/**
* Read up to _length_ bytes from the file storing into _data_.
*
* @param fp BGZF file handler
* @param data data array to read into
* @param length size of data to read
* @return number of bytes actually read; 0 on end-of-file and -1 on error
*/
ssize_t bgzf_read(BGZF *fp, void *data, size_t length);
/**
* Write _length_ bytes from _data_ to the file. If no I/O errors occur,
* the complete _length_ bytes will be written (or queued for writing).
*
* @param fp BGZF file handler
* @param data data array to write
* @param length size of data to write
* @return number of bytes written (i.e., _length_); negative on error
*/
ssize_t bgzf_write(BGZF *fp, const void *data, size_t length);
/**
* Read up to _length_ bytes directly from the underlying stream without
* decompressing. Bypasses BGZF blocking, so must be used with care in
* specialised circumstances only.
*
* @param fp BGZF file handler
* @param data data array to read into
* @param length number of raw bytes to read
* @return number of bytes actually read; 0 on end-of-file and -1 on error
*/
ssize_t bgzf_raw_read(BGZF *fp, void *data, size_t length);
/**
* Write _length_ bytes directly to the underlying stream without
* compressing. Bypasses BGZF blocking, so must be used with care
* in specialised circumstances only.
*
* @param fp BGZF file handler
* @param data data array to write
* @param length number of raw bytes to write
* @return number of bytes actually written; -1 on error
*/
ssize_t bgzf_raw_write(BGZF *fp, const void *data, size_t length);
/**
* Write the data in the buffer to the file.
*/
int bgzf_flush(BGZF *fp);
/**
* Return a virtual file pointer to the current location in the file.
* No interpetation of the value should be made, other than a subsequent
* call to bgzf_seek can be used to position the file at the same point.
* Return value is non-negative on success.
*/
#define bgzf_tell(fp) (((fp)->block_address << 16) | ((fp)->block_offset & 0xFFFF))
/**
* Set the file to read from the location specified by _pos_.
*
* @param fp BGZF file handler
* @param pos virtual file offset returned by bgzf_tell()
* @param whence must be SEEK_SET
* @return 0 on success and -1 on error
*/
int64_t bgzf_seek(BGZF *fp, int64_t pos, int whence);
/**
* Check if the BGZF end-of-file (EOF) marker is present
*
* @param fp BGZF file handler opened for reading
* @return 1 if the EOF marker is present and correct;
* 2 if it can't be checked, e.g., because fp isn't seekable;
* 0 if the EOF marker is absent;
* -1 (with errno set) on error
*/
int bgzf_check_EOF(BGZF *fp);
/**
* Check if a file is in the BGZF format
*
* @param fn file name
* @return 1 if _fn_ is BGZF; 0 if not or on I/O error
*/
int bgzf_is_bgzf(const char *fn);
/*********************
* Advanced routines *
*********************/
/**
* Set the cache size. Only effective when compiled with -DBGZF_CACHE.
*
* @param fp BGZF file handler
* @param size size of cache in bytes; 0 to disable caching (default)
*/
void bgzf_set_cache_size(BGZF *fp, int size);
/**
* Flush the file if the remaining buffer size is smaller than _size_
* @return 0 if flushing succeeded or was not needed; negative on error
*/
int bgzf_flush_try(BGZF *fp, ssize_t size);
/**
* Read one byte from a BGZF file. It is faster than bgzf_read()
* @param fp BGZF file handler
* @return byte read; -1 on end-of-file or error
*/
int bgzf_getc(BGZF *fp);
/**
* Read one line from a BGZF file. It is faster than bgzf_getc()
*
* @param fp BGZF file handler
* @param delim delimitor
* @param str string to write to; must be initialized
* @return length of the string; 0 on end-of-file; negative on error
*/
int bgzf_getline(BGZF *fp, int delim, kstring_t *str);
/**
* Read the next BGZF block.
*/
int bgzf_read_block(BGZF *fp);
/**
* Enable multi-threading (only effective on writing and when the
* library was compiled with -DBGZF_MT)
* Modified to use bigstack for main allocations.
*
* @param fp BGZF file handler; must be opened for writing
* @param n_threads #threads used for writing
* @param n_sub_blks #blocks processed by each thread; a value 64-256 is recommended
*/
int bgzf_mt2(unsigned char* arena_top, int n_threads, int n_sub_blks, unsigned char** arena_bottom_ptr, BGZF* fp);
/*******************
* bgzidx routines *
*******************/
/**
* Position BGZF at the uncompressed offset
*
* @param fp BGZF file handler; must be opened for reading
* @param uoffset file offset in the uncompressed data
* @param where SEEK_SET supported atm
*
* Returns 0 on success and -1 on error.
*/
// int bgzf_useek(BGZF *fp, long uoffset, int where);
/**
* Position in uncompressed BGZF
*
* @param fp BGZF file handler; must be opened for reading
*
* Returns the current offset on success and -1 on error.
*/
long bgzf_utell(BGZF *fp);
/**
* Tell BGZF to build index while compressing.
*
* @param fp BGZF file handler; can be opened for reading or writing.
*
* Returns 0 on success and -1 on error.
*/
int bgzf_index_build_init(BGZF *fp);
/**
* Load BGZF index
*
* @param fp BGZF file handler
* @param bname base name
* @param suffix suffix to add to bname (can be NULL)
*
* Returns 0 on success and -1 on error.
*/
int bgzf_index_load(BGZF *fp, const char *bname, const char *suffix);
/**
* Save BGZF index
*
* @param fp BGZF file handler
* @param bname base name
* @param suffix suffix to add to bname (can be NULL)
*
* Returns 0 on success and -1 on error.
*/
int bgzf_index_dump(BGZF *fp, const char *bname, const char *suffix);
#ifdef __cplusplus
}
#endif
#endif
#!/bin/sh -e
##########################################################################
# Script description:
# Wrapper to provide build options to generic Makefile
#
# Arguments:
# 1. Build template: dev
# 2. Additional arguments are passed to make
# (meant to be used for make targets install, clean, ...)
#
# History:
# Date Name Modification
# 2018-05-06 Jason Bacon Begin
##########################################################################
##########################################################################
# Main
##########################################################################
case $(uname) in
Darwin)
BLASFLAGS64='-framework Accelerate'
LDFLAGS='-ldl -lpthread -lz'
MAKE=make
;;
*)
MAKE=make
;;
esac
# Defaults
BLASFLAGS64="$BLASFLAGS64 -llapack -lcblas -lblas"
if [ $# -ge 1 ]; then
template=$1
case $template in
avx2+atlas)
CFLAGS="$CFLAGS -mavx2 -mbmi -mbmi2 -mlzcnt"
BLASFLAGS64="$BLASFLAGS64 -llapack -lf77blas -latlas"
shift
;;
atlas)
BLASFLAGS64="$BLASFLAGS64 -llapack -lf77blas -latlas"
shift
;;
netlib)
BLASFLAGS64="$BLASFLAGS64 -llapack -lcblas -lblas"
shift
;;
*)
# Pass arg 1 to make
;;
esac
fi
CFLAGS="$CFLAGS -DNDEBUG -DZSTD_MULTITHREAD"
CXXFLAGS="$CXXFLAGS -DNDEBUG -DZSTD_MULTITHREAD"
set -x
export CFLAGS CXXFLAGS BLASFLAGS64 LDFLAGS ZLIB BLASFLAGS64
$MAKE -f Makefile "$@"
# Linux/OS X Makefile for PLINK 2.00.
#
# Compilation options (leave blank after "=" to disable, put "= 1" to enable):
# Do not use AVX2 instructions: NO_AVX2
# Do not use SSE4.2 instructions: NO_SSE42
# Print clear error message if SSE42/AVX2 needed but missing: CPU_CHECK
# Do not link to LAPACK: NO_LAPACK
# Use cblas_f77 instead of cblas: FORCE_CBLAS_F77
# Use only -O2 optimization for zstd (may be necessary for gcc 4.x): ZSTD_O2
# Statically link zlib: STATIC_ZLIB
# Statically link zstd: STATIC_ZSTD
# Link to MKL with 64-bit indexes (dynamically): DYNAMIC_MKL
# (this also requires MKLROOT and MKL_IOMP5_DIR to be defined, and
# LD_LIBRARY_PATH to include the appropriate directories)
# 32-bit binary (also sets STATIC_ZLIB, ZSTD_O2, and NO_SSE42): FORCE_32BIT
# (warning: you may need to add a zconf.h symlink to make that work)
# 32-bit binary (also sets STATIC_ZLIB and ZSTD_O2):
# FORCE_32BIT (warning: you may need to add a zconf.h symlink to make that
# work)
# Debug symbols: set DEBUG to -g
NO_AVX2 = 1
NO_SSE42 =
CPU_CHECK = 1
NO_LAPACK =
PREFER_CBLAS_F77 =
ZSTD_O2 = 1
STATIC_ZLIB =
STATIC_ZSTD = 1
DYNAMIC_MKL =
MKLROOT = /home/ubuntu/intel/mkl
MKL_IOMP5_DIR = /home/ubuntu/intel/compilers_and_libraries_2017.2.174/linux/compiler/lib/intel64
FORCE_32BIT =
DEBUG =
BASEFLAGS=-Wall -Wextra
BASEFLAGS=-DZSTD_MULTITHREAD
# ***** end configuration *****
LINKFLAGS=-lm -lpthread
BASEFLAGS += ${DEBUG}
include ../Makefile.src
LINKFLAGS=-lm -lpthread ${DEBUG}
ZLIB=
ARCH32=
CPUCHECK_FLAGS = ${DEBUG}
ifdef FORCE_32BIT
# this is targeted at Scientific Linux 6.
NO_SSE42 = 1
STATIC_ZLIB = 1
ZSTD_O2 = 1
ARCH32 = -m32 -march=i686
CXXFLAGS = -std=c++0x
else
ifdef NO_AVX2
ifndef NO_SSE42
BASEFLAGS += -msse4.2
ifdef CPU_CHECK
BASEFLAGS += -DCPU_CHECK_SSE42
CPUCHECK_FLAGS = -O2 -DCPU_CHECK_SSE42 ${CXXWARN2}
endif
endif
else
BASEFLAGS += -mavx2 -mbmi -mbmi2 -mlzcnt
ifdef CPU_CHECK
BASEFLAGS += -DCPU_CHECK_AVX2
CPUCHECK_FLAGS = -O2 -DCPU_CHECK_AVX2 ${CXXWARN2}
endif
endif
CXXFLAGS = -std=c++11
endif
BASEFLAGS += ${ARCH32}
......@@ -51,10 +79,6 @@ endif
# this actually needs to be named "CXXFLAGS"
CXXFLAGS += -O2
ifndef NO_SSE42
BASEFLAGS += -msse4.2
endif
ifdef FORCE_CBLAS_F77
BASEFLAGS += -DFORCE_CBLAS_F77
BLASFLAGS=-llapack -lf77blas -latlas
......@@ -69,6 +93,17 @@ else
LINKFLAGS += -lz
endif
SKIP_STATIC_ZSTD =
ifdef STATIC_ZSTD
BASEFLAGS += -DSTATIC_ZSTD
else
ZCSRC2 =
SKIP_STATIC_ZSTD = echo
OBJ = $(CSRC:.c=.o) $(CCSRC:.cc=.o)
OBJ2 = $(notdir $(OBJ))
LINKFLAGS += -lzstd
endif
UNAME := $(shell uname)
ifeq ($(UNAME), Darwin)
ifdef FORCE_32BIT
......@@ -78,6 +113,7 @@ ifeq ($(UNAME), Darwin)
$(error MKL is not currently supported on OS X)
endif
BLASFLAGS=-framework Accelerate
CXXFLAGS += -stdlib=libc++
else
ifdef DYNAMIC_MKL
ifdef NO_LAPACK
......@@ -86,7 +122,7 @@ else
ifdef FORCE_32BIT
$(error DYNAMIC_MKL + FORCE_32BIT not supported)
endif
BASEFLAGS = -DDYNAMIC_MKL -DLAPACK_ILP64 -I${MKLROOT}/include
BASEFLAGS += -DDYNAMIC_MKL -DLAPACK_ILP64 -I${MKLROOT}/include
BLASFLAGS = -L${MKLROOT}/lib/intel64 -L${MKL_IOMP5_DIR} -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5
LINKFLAGS += -ldl
endif
......@@ -97,29 +133,25 @@ ifdef NO_LAPACK
BLASFLAGS=
endif
ZSTD_INCLUDE = -I../zstd/lib -I../zstd/lib/common -I../zstd/zlibWrapper
ZCFLAGS += ${ZSTD_INCLUDE}
CFLAGS += ${BASEFLAGS}
ZCFLAGS += ${BASEFLAGS}
CXXFLAGS += ${BASEFLAGS}
CSRC = ../SFMT.c ../hfile.c ../bgzf.c
ZCSRC = ../zstd/zlibWrapper/zstd_zlibwrapper.c ../zstd/zlibWrapper/gzclose.c ../zstd/zlibWrapper/gzlib.c ../zstd/zlibWrapper/gzread.c ../zstd/zlibWrapper/gzwrite.c ../zstd/lib/common/entropy_common.c ../zstd/lib/common/zstd_common.c ../zstd/lib/common/error_private.c ../zstd/lib/common/xxhash.c ../zstd/lib/common/fse_decompress.c ../zstd/lib/compress/fse_compress.c ../zstd/lib/compress/huf_compress.c ../zstd/lib/compress/zstd_compress.c ../zstd/lib/decompress/huf_decompress.c ../zstd/lib/decompress/zstd_decompress.c
CPPSRC = ../pgenlib_internal.cpp ../plink2.cpp ../plink2_adjust.cpp ../plink2_common.cpp ../plink2_compress_stream.cpp ../plink2_data.cpp ../plink2_decompress.cpp ../plink2_filter.cpp ../plink2_glm.cpp ../plink2_help.cpp ../plink2_ld.cpp ../plink2_matrix.cpp ../plink2_matrix_calc.cpp ../plink2_misc.cpp ../plink2_psam.cpp ../plink2_pvar.cpp ../plink2_random.cpp ../plink2_set.cpp ../plink2_stats.cpp
CFLAGS += ${BASEFLAGS} ${CWARN2} ${CINCLUDE2}
ZCFLAGS += ${BASEFLAGS} ${CWARN2} ${ZSTD_INCLUDE2}
CXXFLAGS += ${BASEFLAGS} ${CXXWARN2}
OBJ = SFMT.o hfile.o bgzf.o zstd_zlibwrapper.o gzclose.o gzlib.o gzread.o gzwrite.o entropy_common.o zstd_common.o error_private.o xxhash.o fse_decompress.o fse_compress.o huf_compress.o zstd_compress.o huf_decompress.o zstd_decompress.o pgenlib_internal.o plink2.o plink2_adjust.o plink2_common.o plink2_compress_stream.o plink2_data.o plink2_decompress.o plink2_filter.o plink2_glm.o plink2_help.o plink2_ld.o plink2_matrix.o plink2_matrix_calc.o plink2_misc.o plink2_psam.o plink2_pvar.o plink2_random.o plink2_set.o plink2_stats.o
ifdef FORCE_32BIT
CXXFLAGS += -Wno-sign-compare
endif
all: plink2 pgen_compress
plink2: $(CSRC) $(ZCSRC) $(CPPSRC)
gcc $(CFLAGS) $(CSRC) -c
gcc $(ZCFLAGS) $(ZCSRC) -c
g++ $(CXXFLAGS) $(CPPSRC) -c
g++ $(OBJ) $(ARCH32) -o plink2 $(BLASFLAGS) $(LINKFLAGS)
plink2: $(CSRC2) $(ZCSRC2) $(CCSRC2) ../plink2_cpu.cc
gcc $(CFLAGS) $(CSRC2) -c
$(SKIP_STATIC_ZSTD) gcc $(ZCFLAGS) $(ZCSRC2) -c
g++ $(CXXFLAGS) $(CCSRC2) -c
g++ $(CPUCHECK_FLAGS) ../plink2_cpu.cc -c
g++ $(OBJ2) plink2_cpu.o $(ARCH32) -o plink2 $(BLASFLAGS) $(LINKFLAGS)
pgen_compress: ../pgenlib_internal.cpp ../pgen_compress.cpp
g++ $(CXXFLAGS) ../pgenlib_internal.cpp ../pgen_compress.cpp -o pgen_compress
pgen_compress: ../plink2_base.cc ../pgenlib_internal.cc ../pgen_compress.cc
g++ $(CXXFLAGS) ../plink2_base.cc ../pgenlib_internal.cc ../pgen_compress.cc -o pgen_compress
.PHONY: clean
clean:
......
# MinGW/MinGW-w64 Makefile for PLINK 2.00.
#
# Compilation options (leave blank after "=" to disable, put "= 1" to enable):
# Do not use AVX2 instructions: NO_AVX2
# Do not use SSE 4.2 instructions: NO_SSE42
# Print clear error message if SSE42/AVX2 needed but missing: CPU_CHECK
# Do not link to OpenBLAS: NO_OPENBLAS
# Use only -O2 optimization for zstd: ZSTD_O2
NO_SSE42 = 1
# Debug symbols: set DEBUG to -g
NO_AVX2 = 1
NO_SSE42 =
CPU_CHECK = 1
NO_OPENBLAS =
ZSTD_O2 = 1
DEBUG =
OPENBLAS_ROOT = ../../openblas
ZLIB_STATIC = ../../zlib-1.2.11/libz.a
BASEFLAGS=-Wall -Wextra
BASEFLAGS=
# ***** end configuration *****
BASEFLAGS += -DSTATIC_ZLIB -fno-exceptions
LINKFLAGS=-lm -static-libgcc -L. ${ZLIB_STATIC}
BASEFLAGS += ${DEBUG}
include ../Makefile.src
BASEFLAGS += -DSTATIC_ZLIB -DSTATIC_ZSTD -fno-exceptions
LINKFLAGS=${DEBUG} -lm -static-libgcc -L. ${ZLIB_STATIC}
ifdef NO_OPENBLAS
BASEFLAGS += -DNOLAPACK
BLASFLAGS=
......@@ -26,9 +36,22 @@ endif
CFLAGS=-O2 -std=gnu99
CXXFLAGS=-O2 -std=gnu++11
CPUCHECK_FLAGS = ${DEBUG}
ifdef NO_AVX2
ifndef NO_SSE42
BASEFLAGS += -msse4.2
BASEFLAGS += -msse4.2 -DZSTD_MULTITHREAD
ifdef CPU_CHECK
BASEFLAGS += -DCPU_CHECK_SSE42
CPUCHECK_FLAGS = -O2 -DCPU_CHECK_SSE42 ${CXXWARN2}
endif
endif
else
BASEFLAGS += -mavx2 -mbmi2 -mlzcnt -DZSTD_MULTITHREAD
ifdef CPU_CHECK
BASEFLAGS += -DCPU_CHECK_AVX2
CPUCHECK_FLAGS = -O2 -DCPU_CHECK_AVX2 ${CXXWARN2}
endif
endif
ifdef ZSTD_O2
......@@ -39,29 +62,26 @@ endif
BASEFLAGS += -I${OPENBLAS_ROOT}/include
CFLAGS += ${BASEFLAGS}
ZCFLAGS += ${BASEFLAGS}
CXXFLAGS += ${BASEFLAGS}
ZSTD_INCLUDE = -I../zstd/lib -I../zstd/lib/common -I../zstd/zlibWrapper
ZCFLAGS += ${ZSTD_INCLUDE}
CSRC = ../SFMT.c ../hfile.c ../bgzf.c
ZCSRC = ../zstd/zlibWrapper/zstd_zlibwrapper.c ../zstd/zlibWrapper/gzclose.c ../zstd/zlibWrapper/gzlib.c ../zstd/zlibWrapper/gzread.c ../zstd/zlibWrapper/gzwrite.c ../zstd/lib/common/entropy_common.c ../zstd/lib/common/zstd_common.c ../zstd/lib/common/error_private.c ../zstd/lib/common/xxhash.c ../zstd/lib/common/fse_decompress.c ../zstd/lib/compress/fse_compress.c ../zstd/lib/compress/huf_compress.c ../zstd/lib/compress/zstd_compress.c ../zstd/lib/decompress/huf_decompress.c ../zstd/lib/decompress/zstd_decompress.c
CPPSRC = ../pgenlib_internal.cpp ../plink2.cpp ../plink2_adjust.cpp ../plink2_common.cpp ../plink2_compress_stream.cpp ../plink2_data.cpp ../plink2_decompress.cpp ../plink2_filter.cpp ../plink2_glm.cpp ../plink2_help.cpp ../plink2_ld.cpp ../plink2_matrix.cpp ../plink2_matrix_calc.cpp ../plink2_misc.cpp ../plink2_psam.cpp ../plink2_pvar.cpp ../plink2_random.cpp ../plink2_set.cpp ../plink2_stats.cpp
OBJ = SFMT.o hfile.o bgzf.o zstd_zlibwrapper.o gzclose.o gzlib.o gzread.o gzwrite.o entropy_common.o zstd_common.o error_private.o xxhash.o fse_decompress.o fse_compress.o huf_compress.o zstd_compress.o huf_decompress.o zstd_decompress.o pgenlib_internal.o plink2.o plink2_adjust.o plink2_common.o plink2_compress_stream.o plink2_data.o plink2_decompress.o plink2_filter.o plink2_glm.o plink2_help.o plink2_ld.o plink2_matrix.o plink2_matrix_calc.o plink2_misc.o plink2_psam.o plink2_pvar.o plink2_random.o plink2_set.o plink2_stats.o
ifndef NO_AVX2
BASEFLAGS += -mbmi
endif
CFLAGS += ${BASEFLAGS} ${CWARN2} ${CINCLUDE2}
ZCFLAGS += ${CWARN2} ${ZSTD_INCLUDE2}
CXXFLAGS += ${BASEFLAGS} ${CXXWARN2}
all: plink2 pgen_compress
plink2: $(CSRC) $(ZCSRC) $(CPPSRC)
gcc $(CFLAGS) $(CSRC) -c
gcc $(ZCFLAGS) $(ZCSRC) -c
g++ $(CXXFLAGS) $(CPPSRC) -c
gfortran $(OBJ) -o plink2 $(BLASFLAGS) $(LINKFLAGS)
plink2: $(CSRC2) $(ZCSRC2) $(CCSRC2) ../plink2_cpu.cc
gcc $(CFLAGS) $(CSRC2) -c
gcc $(ZCFLAGS) $(ZCSRC2) -c
g++ $(CXXFLAGS) $(CCSRC2) -c
g++ $(CPUCHECK_FLAGS) ../plink2_cpu.cc -c
gfortran $(OBJ2) plink2_cpu.o -o plink2 $(BLASFLAGS) $(LINKFLAGS)
pgen_compress: ../pgenlib_internal.cpp ../pgen_compress.cpp
g++ $(CXXFLAGS) ../pgenlib_internal.cpp ../pgen_compress.cpp -o pgen_compress
pgen_compress: ../plink2_base.cc ../pgenlib_internal.cc ../pgen_compress.cc
g++ $(CXXFLAGS) ../plink2_base.cc ../pgenlib_internal.cc ../pgen_compress.cc -o pgen_compress
.PHONY: clean
clean:
......
plink2 (2.00~a-170717-1) UNRELEASED; urgency=medium
plink2 (2.00~a2-191022-1) UNRELEASED; urgency=medium
* Initial release. (Closes: #858947)
* TODO:
- add autopkgtest
- remove embedded zstd
-- Dylan Aïssi <bob.dybian@gmail.com> Tue, 18 Jul 2017 23:32:40 +0200
-- Dylan Aïssi <daissi@debian.org> Fri, 25 Oct 2019 23:03:56 +0200
Source: plink2
Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.org>
Uploaders: Dylan Aïssi <daissi@debian.org>
Section: science
Priority: optional
Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.org>
Uploaders: Dylan Aïssi <bob.dybian@gmail.com>
Build-Depends: debhelper (>= 10),
Build-Depends: debhelper-compat (= 12),
help2man,
libatlas-base-dev,
liblapack-dev,
zlib1g-dev
Standards-Version: 4.0.0
Vcs-Browser: https://anonscm.debian.org/git/debian-med/plink2.git
Vcs-Git: https://anonscm.debian.org/git/debian-med/plink2.git
Standards-Version: 4.4.1
Vcs-Browser: https://salsa.debian.org/med-team/plink2
Vcs-Git: https://salsa.debian.org/med-team/plink2.git
Homepage: https://www.cog-genomics.org/plink/2.0/
Package: plink2
Architecture: any-amd64 any-i386 armel armhf mipsel
Depends: ${misc:Depends}, ${shlibs:Depends}
Depends: ${misc:Depends},
${shlibs:Depends}
Description: whole-genome association analysis toolset
plink expects as input the data from SNP (single nucleotide polymorphism)
chips of many individuals and their phenotypical description of a disease.
......
/* hfile.c -- buffered low-level input/output streams.
Copyright (C) 2013-2015 Genome Research Ltd.
Author: John Marshall <jm18@sanger.ac.uk>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
#define _FILE_OFFSET_BITS 64
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <pthread.h>
#include "hfile.h"
#include "hfile_internal.h"
/* hFILE fields are used as follows:
char *buffer; // Pointer to the start of the I/O buffer
char *begin; // First not-yet-read character / unused position
char *end; // First unfilled/unfillable position
char *limit; // Pointer to the first position past the buffer
const hFILE_backend *backend; // Methods to refill/flush I/O buffer
off_t offset; // Offset within the stream of buffer position 0
int at_eof:1; // For reading, whether EOF has been seen
int has_errno; // Error number from the last failure on this stream
For reading, begin is the first unread character in the buffer and end is the
first unfilled position:
-----------ABCDEFGHIJKLMNO---------------
^buffer ^begin ^end ^limit
For writing, begin is the first unused position and end is unused so remains
equal to buffer:
ABCDEFGHIJKLMNOPQRSTUVWXYZ---------------
^buffer ^begin ^limit
^end
Thus if begin > end then there is a non-empty write buffer, if begin < end
then there is a non-empty read buffer, and if begin == end then both buffers
are empty. In all cases, the stream's file position indicator corresponds
to the position pointed to by begin. */
hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity)
{
hFILE *fp = (hFILE *) malloc(struct_size);
if (fp == NULL) goto error;
if (capacity == 0) capacity = 32768;
// FIXME For now, clamp input buffer sizes so mpileup doesn't eat memory
if (strchr(mode, 'r') && capacity > 32768) capacity = 32768;
fp->buffer = (char *) malloc(capacity);
if (fp->buffer == NULL) goto error;
fp->begin = fp->end = fp->buffer;
fp->limit = &fp->buffer[capacity];
fp->offset = 0;
fp->at_eof = 0;
fp->has_errno = 0;
return fp;
error:
hfile_destroy(fp);
return NULL;
}
void hfile_destroy(hFILE *fp)
{
int save = errno;
if (fp) free(fp->buffer);
free(fp);
errno = save;
}
static inline int writebuffer_is_nonempty(hFILE *fp)
{
return fp->begin > fp->end;
}
/* Refills the read buffer from the backend (once, so may only partially
fill the buffer), returning the number of additional characters read
(which might be 0), or negative when an error occurred. */
static ssize_t refill_buffer(hFILE *fp)
{
ssize_t n;
// Move any unread characters to the start of the buffer
if (fp->begin > fp->buffer) {
fp->offset += fp->begin - fp->buffer;
memmove(fp->buffer, fp->begin, fp->end - fp->begin);
fp->end = &fp->buffer[fp->end - fp->begin];
fp->begin = fp->buffer;
}
// Read into the available buffer space at fp->[end,limit)
if (fp->at_eof || fp->end == fp->limit) n = 0;
else {
n = fp->backend->read(fp, fp->end, fp->limit - fp->end);
if (n < 0) { fp->has_errno = errno; return n; }
else if (n == 0) fp->at_eof = 1;
}
fp->end += n;
return n;
}
/* Called only from hgetc(), when our buffer is empty. */
int hgetc2(hFILE *fp)
{
return (refill_buffer(fp) > 0)? (unsigned char) *(fp->begin++) : EOF;
}
ssize_t hpeek(hFILE *fp, void *buffer, size_t nbytes)
{
size_t n = fp->end - fp->begin;
while (n < nbytes) {
ssize_t ret = refill_buffer(fp);
if (ret < 0) return ret;
else if (ret == 0) break;
else n += ret;
}
if (n > nbytes) n = nbytes;
memcpy(buffer, fp->begin, n);
return n;
}
/* Called only from hread(); when called, our buffer is empty and nread bytes
have already been placed in the destination buffer. */
ssize_t hread2(hFILE *fp, void *destv, size_t nbytes, size_t nread)
{
const size_t capacity = fp->limit - fp->buffer;
char *dest = (char *) destv;
dest += nread, nbytes -= nread;
// Read large requests directly into the destination buffer
while (nbytes * 2 >= capacity && !fp->at_eof) {
ssize_t n = fp->backend->read(fp, dest, nbytes);
if (n < 0) { fp->has_errno = errno; return n; }
else if (n == 0) fp->at_eof = 1;
fp->offset += n;
dest += n, nbytes -= n;
nread += n;
}
while (nbytes > 0 && !fp->at_eof) {
size_t n;
ssize_t ret = refill_buffer(fp);
if (ret < 0) return ret;
n = fp->end - fp->begin;
if (n > nbytes) n = nbytes;
memcpy(dest, fp->begin, n);
fp->begin += n;
dest += n, nbytes -= n;
nread += n;
}
return nread;
}
/* Flushes the write buffer, fp->[buffer,begin), out through the backend
returning 0 on success or negative if an error occurred. */
static ssize_t flush_buffer(hFILE *fp)
{
const char *buffer = fp->buffer;
while (buffer < fp->begin) {
ssize_t n = fp->backend->write(fp, buffer, fp->begin - buffer);
if (n < 0) { fp->has_errno = errno; return n; }
buffer += n;
fp->offset += n;
}
fp->begin = fp->buffer; // Leave the buffer empty
return 0;
}
int hflush(hFILE *fp)
{
if (flush_buffer(fp) < 0) return EOF;
if (fp->backend->flush(fp) < 0) { fp->has_errno = errno; return EOF; }
return 0;
}
/* Called only from hputc(), when our buffer is already full. */
int hputc2(int c, hFILE *fp)
{
if (flush_buffer(fp) < 0) return EOF;
*(fp->begin++) = c;
return c;
}
/* Called only from hwrite() and hputs2(); when called, our buffer is full and
ncopied bytes from the source have already been copied to our buffer. */
ssize_t hwrite2(hFILE *fp, const void *srcv, size_t totalbytes, size_t ncopied)
{
const char *src = (const char *) srcv;
ssize_t ret;
const size_t capacity = fp->limit - fp->buffer;
size_t remaining = totalbytes - ncopied;
src += ncopied;
ret = flush_buffer(fp);
if (ret < 0) return ret;
// Write large blocks out directly from the source buffer
while (remaining * 2 >= capacity) {
ssize_t n = fp->backend->write(fp, src, remaining);
if (n < 0) { fp->has_errno = errno; return n; }
fp->offset += n;
src += n, remaining -= n;
}
// Just buffer any remaining characters
memcpy(fp->begin, src, remaining);
fp->begin += remaining;
return totalbytes;
}
/* Called only from hputs(), when our buffer is already full. */
int hputs2(const char *text, size_t totalbytes, size_t ncopied, hFILE *fp)
{
return (hwrite2(fp, text, totalbytes, ncopied) >= 0)? 0 : EOF;
}
off_t hseek(hFILE *fp, off_t offset, int whence)
{
off_t pos;
if (writebuffer_is_nonempty(fp)) {
int ret = flush_buffer(fp);
if (ret < 0) return ret;
}
else {
// Convert relative offsets from being relative to the hFILE's stream
// position (at begin) to being relative to the backend's physical
// stream position (at end, due to the buffering read-ahead).
if (whence == SEEK_CUR) offset -= fp->end - fp->begin;
}
pos = fp->backend->seek(fp, offset, whence);
if (pos < 0) { fp->has_errno = errno; return pos; }
// Seeking succeeded, so discard any non-empty read buffer
fp->begin = fp->end = fp->buffer;
fp->at_eof = 0;
fp->offset = pos;
return pos;
}
int hclose(hFILE *fp)
{
int err = fp->has_errno;
if (writebuffer_is_nonempty(fp) && hflush(fp) < 0) err = fp->has_errno;
if (fp->backend->close(fp) < 0) err = errno;
hfile_destroy(fp);
if (err) {
errno = err;
return EOF;
}
else return 0;
}
void hclose_abruptly(hFILE *fp)
{
int save = errno;
if (fp->backend->close(fp) < 0) { /* Ignore subsequent errors */ }
hfile_destroy(fp);
errno = save;
}
/***************************
* File descriptor backend *
***************************/
// #include <sys/socket.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
// #ifdef _WIN32
// #define HAVE_CLOSESOCKET
// #endif
/* For Unix, it doesn't matter whether a file descriptor is a socket.
However Windows insists on send()/recv() and its own closesocket()
being used when fd happens to be a socket. */
typedef struct {
hFILE base;
int fd;
// int is_socket:1;
} hFILE_fd;
static ssize_t fd_read(hFILE *fpv, void *buffer, size_t nbytes)
{
hFILE_fd *fp = (hFILE_fd *) fpv;
ssize_t n;
do {
/*
n = fp->is_socket? recv(fp->fd, buffer, nbytes, 0)
: read(fp->fd, buffer, nbytes);
*/
n = read(fp->fd, buffer, nbytes);
} while (n < 0 && errno == EINTR);
return n;
}
static ssize_t fd_write(hFILE *fpv, const void *buffer, size_t nbytes)
{
hFILE_fd *fp = (hFILE_fd *) fpv;
ssize_t n;
do {
/*
n = fp->is_socket? send(fp->fd, buffer, nbytes, 0)
: write(fp->fd, buffer, nbytes);
*/
n = write(fp->fd, buffer, nbytes);
} while (n < 0 && errno == EINTR);
return n;
}
static off_t fd_seek(hFILE *fpv, off_t offset, int whence)
{
hFILE_fd *fp = (hFILE_fd *) fpv;
return lseek(fp->fd, offset, whence);
}
static int fd_flush(hFILE *fpv)
{
hFILE_fd *fp = (hFILE_fd *) fpv;
#ifdef _WIN32
// See the patch at
// https://lists.gnu.org/archive/html/bug-gnulib/2008-10/msg00004.html .
HANDLE hh = (HANDLE)_get_osfhandle(fp->fd);
DWORD err;
if (hh == INVALID_HANDLE_VALUE) {
errno = EBADF;
return -1;
}
if (!FlushFileBuffers(hh)) {
err = GetLastError();
switch (err) {
case ERROR_INVALID_HANDLE:
errno = EINVAL;
break;
default:
errno = EIO;
}
return -1;
}
return 0;
#else
int ret;
do {
ret = fsync(fp->fd);
// Ignore invalid-for-fsync(2) errors due to being, e.g., a pipe,
// and operation-not-supported errors (Mac OS X)
if (ret < 0 && (errno == EINVAL || errno == ENOTSUP)) ret = 0;
} while (ret < 0 && errno == EINTR);
return ret;
#endif
}
static int fd_close(hFILE *fpv)
{
hFILE_fd *fp = (hFILE_fd *) fpv;
int ret;
do {
#ifdef HAVE_CLOSESOCKET
ret = fp->is_socket? closesocket(fp->fd) : close(fp->fd);
#else
ret = close(fp->fd);
#endif
} while (ret < 0 && errno == EINTR);
return ret;
}
static const struct hFILE_backend fd_backend =
{
fd_read, fd_write, fd_seek, fd_flush, fd_close
};
static size_t blksize(int fd)
{
struct stat sbuf;
if (fstat(fd, &sbuf) != 0) return 0;
#ifdef _WIN32
return 512;
#else
return sbuf.st_blksize;
#endif
}
static hFILE *hopen_fd(const char *filename, const char *mode)
{
hFILE_fd *fp = NULL;
int fd = open(filename, hfile_oflags(mode), 0666);
if (fd < 0) goto error;
fp = (hFILE_fd *) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));
if (fp == NULL) goto error;
fp->fd = fd;
// fp->is_socket = 0;
fp->base.backend = &fd_backend;
return &fp->base;
error:
if (fd >= 0) { int save = errno; (void) close(fd); errno = save; }
hfile_destroy((hFILE *) fp);
return NULL;
}
hFILE *hdopen(int fd, const char *mode)
{
hFILE_fd *fp = (hFILE_fd*) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));
if (fp == NULL) return NULL;
fp->fd = fd;
// fp->is_socket = (strchr(mode, 's') != NULL);
fp->base.backend = &fd_backend;
return &fp->base;
}
static hFILE *hopen_fd_stdinout(const char *mode)
{
int fd = (strchr(mode, 'r') != NULL)? STDIN_FILENO : STDOUT_FILENO;
// TODO Set binary mode (for Windows)
return hdopen(fd, mode);
}
int hfile_oflags(const char *mode)
{
int rdwr = 0, flags = 0;
const char *s;
for (s = mode; *s; s++)
switch (*s) {
case 'r': rdwr = O_RDONLY; break;
case 'w': rdwr = O_WRONLY; flags |= O_CREAT | O_TRUNC; break;
case 'a': rdwr = O_WRONLY; flags |= O_CREAT | O_APPEND; break;
case '+': rdwr = O_RDWR; break;
default: break;
}
#ifdef O_BINARY
flags |= O_BINARY;
#endif
return rdwr | flags;
}
/*********************
* In-memory backend *
*********************/
typedef struct {
hFILE base;
const char *buffer;
size_t length, pos;
} hFILE_mem;
/*
static ssize_t mem_read(hFILE *fpv, void *buffer, size_t nbytes)
{
hFILE_mem *fp = (hFILE_mem *) fpv;
size_t avail = fp->length - fp->pos;
if (nbytes > avail) nbytes = avail;
memcpy(buffer, fp->buffer + fp->pos, nbytes);
fp->pos += nbytes;
return nbytes;
}
static off_t mem_seek(hFILE *fpv, off_t offset, int whence)
{
hFILE_mem *fp = (hFILE_mem *) fpv;
size_t absoffset = (offset >= 0)? offset : -offset;
size_t origin;
switch (whence) {
case SEEK_SET: origin = 0; break;
case SEEK_CUR: origin = fp->pos; break;
case SEEK_END: origin = fp->length; break;
default: errno = EINVAL; return -1;
}
if ((offset < 0 && absoffset > origin) ||
(offset >= 0 && absoffset > fp->length - origin)) {
errno = EINVAL;
return -1;
}
fp->pos = origin + offset;
return fp->pos;
}
static int mem_close(hFILE *fpv)
{
return 0;
}
static const struct hFILE_backend mem_backend =
{
mem_read, NULL, mem_seek, NULL, mem_close
};
static hFILE *hopen_mem(const char *data, const char *mode)
{
// TODO Implement write modes, which will require memory allocation
if (strchr(mode, 'r') == NULL) { errno = EINVAL; return NULL; }
hFILE_mem *fp = (hFILE_mem *) hfile_init(sizeof (hFILE_mem), mode, 0);
if (fp == NULL) return NULL;
fp->buffer = data;
fp->length = strlen(data);
fp->pos = 0;
fp->base.backend = &mem_backend;
return &fp->base;
}
*/
/******************************
* hopen() backend dispatcher *
******************************/
hFILE *hopen(const char *fname, const char *mode)
{
// if (strncmp(fname, "http://", 7) == 0 ||
// strncmp(fname, "ftp://", 6) == 0) return hopen_net(fname, mode);
#ifdef HAVE_IRODS
// else if (strncmp(fname, "irods:", 6) == 0) return hopen_irods(fname, mode);
#endif
// else if (strncmp(fname, "data:", 5) == 0) return hopen_mem(fname + 5, mode);
if (strcmp(fname, "-") == 0) return hopen_fd_stdinout(mode);
else return hopen_fd(fname, mode);
}
/*
int hisremote(const char *fname)
{
// FIXME Make a new backend entry to return this
if (strncmp(fname, "http://", 7) == 0 ||
strncmp(fname, "https://", 8) == 0 ||
strncmp(fname, "ftp://", 6) == 0) return 1;
#ifdef HAVE_IRODS
else if (strncmp(fname, "irods:", 6) == 0) return 1;
#endif
else return 0;
}
*/
/* hfile.h -- buffered low-level input/output streams.
Copyright (C) 2013-2015 Genome Research Ltd.
Author: John Marshall <jm18@sanger.ac.uk>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
#ifndef HTSLIB_HFILE_H
#define HTSLIB_HFILE_H
#include <string.h>
#include <sys/types.h>
#include "hts_defs.h"
#ifdef _WIN32
#include <windows.h>
#endif
// #ifdef __cplusplus
// extern "C" {
// #endif
/* These fields are declared here solely for the benefit of the inline functions
below. They may change in future releases. User code should not use them
directly; you should imagine that hFILE is an opaque incomplete type. */
struct hFILE_backend;
typedef struct hFILE {
char *buffer, *begin, *end, *limit;
const struct hFILE_backend *backend;
off_t offset;
int at_eof:1;
int has_errno;
} hFILE;
/*!
@abstract Open the named file or URL as a stream
@return An hFILE pointer, or NULL (with errno set) if an error occurred.
*/
hFILE *hopen(const char *filename, const char *mode) HTS_RESULT_USED;
/*!
@abstract Associate a stream with an existing open file descriptor
@return An hFILE pointer, or NULL (with errno set) if an error occurred.
@notes For socket descriptors (on Windows), mode should contain 's'.
*/
hFILE *hdopen(int fd, const char *mode) HTS_RESULT_USED;
/*!
@abstract Report whether the file name or URL denotes remote storage
@return 0 if local, 1 if remote.
@notes "Remote" means involving e.g. explicit network access, with the
implication that callers may wish to cache such files' contents locally.
*/
// int hisremote(const char *filename) HTS_RESULT_USED;
/*!
@abstract Flush (for output streams) and close the stream
@return 0 if successful, or EOF (with errno set) if an error occurred.
*/
int hclose(hFILE *fp) HTS_RESULT_USED;
/*!
@abstract Close the stream, without flushing or propagating errors
@notes For use while cleaning up after an error only. Preserves errno.
*/
void hclose_abruptly(hFILE *fp);
/*!
@abstract Return the stream's error indicator
@return Non-zero (in fact, an errno value) if an error has occurred.
@notes This would be called herror() and return true/false to parallel
ferror(3), but a networking-related herror(3) function already exists. */
static inline int herrno(hFILE *fp)
{
return fp->has_errno;
}
/*!
@abstract Clear the stream's error indicator
*/
static inline void hclearerr(hFILE *fp)
{
fp->has_errno = 0;
}
/*!
@abstract Reposition the read/write stream offset
@return The resulting offset within the stream (as per lseek(2)),
or negative if an error occurred.
*/
off_t hseek(hFILE *fp, off_t offset, int whence) HTS_RESULT_USED;
/*!
@abstract Report the current stream offset
@return The offset within the stream, starting from zero.
*/
static inline off_t htell(hFILE *fp)
{
return fp->offset + (fp->begin - fp->buffer);
}
/*!
@abstract Read one character from the stream
@return The character read, or EOF on end-of-file or error
*/
static inline int hgetc(hFILE *fp)
{
extern int hgetc2(hFILE *);
return (fp->end > fp->begin)? (unsigned char) *(fp->begin++) : hgetc2(fp);
}
/*!
@abstract Peek at characters to be read without removing them from buffers
@param fp The file stream
@param buffer The buffer to which the peeked bytes will be written
@param nbytes The number of bytes to peek at; limited by the size of the
internal buffer, which could be as small as 4K.
@return The number of bytes peeked, which may be less than nbytes if EOF
is encountered; or negative, if there was an I/O error.
@notes The characters peeked at remain in the stream's internal buffer,
and will be returned by later hread() etc calls.
*/
ssize_t hpeek(hFILE *fp, void *buffer, size_t nbytes) HTS_RESULT_USED;
/*!
@abstract Read a block of characters from the file
@return The number of bytes read, or negative if an error occurred.
@notes The full nbytes requested will be returned, except as limited
by EOF or I/O errors.
*/
static inline ssize_t HTS_RESULT_USED
hread(hFILE *fp, void *buffer, size_t nbytes)
{
extern ssize_t hread2(hFILE *, void *, size_t, size_t);
size_t n = fp->end - fp->begin;
if (n > nbytes) n = nbytes;
memcpy(buffer, fp->begin, n);
fp->begin += n;
return (n == nbytes)? (ssize_t) n : hread2(fp, buffer, nbytes, n);
}
/*!
@abstract Write a character to the stream
@return The character written, or EOF if an error occurred.
*/
static inline int hputc(int c, hFILE *fp)
{
extern int hputc2(int, hFILE *);
if (fp->begin < fp->limit) *(fp->begin++) = c;
else c = hputc2(c, fp);
return c;
}
/*!
@abstract Write a string to the stream
@return 0 if successful, or EOF if an error occurred.
*/
static inline int hputs(const char *text, hFILE *fp)
{
extern int hputs2(const char *, size_t, size_t, hFILE *);
size_t nbytes = strlen(text), n = fp->limit - fp->begin;
if (n > nbytes) n = nbytes;
memcpy(fp->begin, text, n);
fp->begin += n;
return (n == nbytes)? 0 : hputs2(text, nbytes, n, fp);
}
/*!
@abstract Write a block of characters to the file
@return Either nbytes, or negative if an error occurred.
@notes In the absence of I/O errors, the full nbytes will be written.
*/
static inline ssize_t HTS_RESULT_USED
hwrite(hFILE *fp, const void *buffer, size_t nbytes)
{
extern ssize_t hwrite2(hFILE *, const void *, size_t, size_t);
size_t n = fp->limit - fp->begin;
if (n > nbytes) n = nbytes;
memcpy(fp->begin, buffer, n);
fp->begin += n;
return (n==nbytes)? (ssize_t) n : hwrite2(fp, buffer, nbytes, n);
}
/*!
@abstract For writing streams, flush buffered output to the underlying stream
@return 0 if successful, or EOF if an error occurred.
*/
int hflush(hFILE *fp) HTS_RESULT_USED;
// #ifdef __cplusplus
// }
// #endif
#endif
/* hfile_internal.h -- internal parts of low-level input/output streams.
Copyright (C) 2013-2015 Genome Research Ltd.
Author: John Marshall <jm18@sanger.ac.uk>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
#ifndef HFILE_INTERNAL_H
#define HFILE_INTERNAL_H
#include "hfile.h"
struct hFILE_backend {
/* As per read(2), returning the number of bytes read (possibly 0) or
negative (and setting errno) on errors. Front-end code will call this
repeatedly if necessary to attempt to get the desired byte count. */
ssize_t (*read)(hFILE *fp, void *buffer, size_t nbytes) HTS_RESULT_USED;
/* As per write(2), returning the number of bytes written or negative (and
setting errno) on errors. Front-end code will call this repeatedly if
necessary until the desired block is written or an error occurs. */
ssize_t (*write)(hFILE *fp, const void *buffer, size_t nbytes)
HTS_RESULT_USED;
/* As per lseek(2), returning the resulting offset within the stream or
negative (and setting errno) on errors. */
off_t (*seek)(hFILE *fp, off_t offset, int whence) HTS_RESULT_USED;
/* Performs low-level flushing, if any, e.g., fsync(2); for writing streams
only. Returns 0 for success or negative (and sets errno) on errors. */
int (*flush)(hFILE *fp) HTS_RESULT_USED;
/* Closes the underlying stream (for output streams, the buffer will
already have been flushed), returning 0 for success or negative (and
setting errno) on errors, as per close(2). */
int (*close)(hFILE *fp) HTS_RESULT_USED;
};
/* These are called from the hopen() dispatcher, and should call hfile_init()
to malloc a struct "derived" from hFILE and initialise it appropriately,
including setting base.backend to their own backend vector. */
hFILE *hopen_irods(const char *filename, const char *mode);
hFILE *hopen_net(const char *filename, const char *mode);
/* May be called by hopen_*() functions to decode a fopen()-style mode into
open(2)-style flags. */
int hfile_oflags(const char *mode);
/* Must be called by hopen_*() functions to allocate the hFILE struct and set
up its base. Capacity is a suggested buffer size (e.g., via fstat(2))
or 0 for a default-sized buffer. */
hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity);
/* May be called by hopen_*() functions to undo the effects of hfile_init()
in the event opening the stream subsequently fails. (This is safe to use
even if fp is NULL. This takes care to preserve errno.) */
void hfile_destroy(hFILE *fp);
#endif
/* hts.h -- format-neutral I/O, indexing, and iterator API functions.
Copyright (C) 2012-2014 Genome Research Ltd.
Copyright (C) 2012 Broad Institute.
Author: Heng Li <lh3@sanger.ac.uk>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
#ifndef HTSLIB_HTS_H
#define HTSLIB_HTS_H
#include <stddef.h>
#include <stdint.h>
#ifndef HTS_BGZF_TYPEDEF
typedef struct BGZF BGZF;
#define HTS_BGZF_TYPEDEF
#endif
struct cram_fd;
struct hFILE;
#ifndef KSTRING_T
#define KSTRING_T kstring_t
typedef struct __kstring_t {
size_t l, m;
char *s;
} kstring_t;
#endif
#ifndef kroundup32
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
#endif
/**
* hts_expand() - expands memory block pointed to by $ptr;
* hts_expand0() the latter sets the newly allocated part to 0.
*
* @param n requested number of elements of type type_t
* @param m size of memory allocated
*/
#define hts_expand(type_t, n, m, ptr) if ((n) > (m)) { \
(m) = (n); kroundup32(m); \
(ptr) = (type_t*)realloc((ptr), (m) * sizeof(type_t)); \
}
#define hts_expand0(type_t, n, m, ptr) if ((n) > (m)) { \
int t = (m); (m) = (n); kroundup32(m); \
(ptr) = (type_t*)realloc((ptr), (m) * sizeof(type_t)); \
memset(((type_t*)ptr)+t,0,sizeof(type_t)*((m)-t)); \
}
/************
* File I/O *
************/
// Add new entries only at the end (but before the *_maximum entry)
// of these enums, as their numbering is part of the htslib ABI.
enum htsFormatCategory {
unknown_category,
sequence_data, // Sequence data -- SAM, BAM, CRAM, etc
variant_data, // Variant calling data -- VCF, BCF, etc
index_file, // Index file associated with some data file
region_list, // Coordinate intervals or regions -- BED, etc
category_maximum = 32767
};
enum htsExactFormat {
unknown_format,
binary_format, text_format,
sam, bam, bai, cram, crai, vcf, bcf, csi, gzi, tbi, bed,
format_maximum = 32767
};
enum htsCompression {
no_compression, gzip, bgzf, custom,
compression_maximum = 32767
};
typedef struct htsFormat {
enum htsFormatCategory category;
enum htsExactFormat format;
struct { short major, minor; } version;
enum htsCompression compression;
short compression_level; // currently unused
void *specific; // currently unused
} htsFormat;
// Maintainers note htsFile cannot be an opaque structure because some of its
// fields are part of libhts.so's ABI (hence these fields must not be moved):
// - fp is used in the public sam_itr_next()/etc macros
// - is_bin is used directly in samtools <= 1.1 and bcftools <= 1.1
// - is_write and is_cram are used directly in samtools <= 1.1
// - fp is used directly in samtools (up to and including current develop)
// - line is used directly in bcftools (up to and including current develop)
typedef struct {
uint32_t is_bin:1, is_write:1, is_be:1, is_cram:1, dummy:28;
int64_t lineno;
kstring_t line;
char *fn, *fn_aux;
union {
BGZF *bgzf;
struct cram_fd *cram;
struct hFILE *hfile;
void *voidp;
} fp;
htsFormat format;
} htsFile;
// REQUIRED_FIELDS
enum sam_fields {
SAM_QNAME = 0x00000001,
SAM_FLAG = 0x00000002,
SAM_RNAME = 0x00000004,
SAM_POS = 0x00000008,
SAM_MAPQ = 0x00000010,
SAM_CIGAR = 0x00000020,
SAM_RNEXT = 0x00000040,
SAM_PNEXT = 0x00000080,
SAM_TLEN = 0x00000100,
SAM_SEQ = 0x00000200,
SAM_QUAL = 0x00000400,
SAM_AUX = 0x00000800,
SAM_RGAUX = 0x00001000,
};
enum cram_option {
CRAM_OPT_DECODE_MD,
CRAM_OPT_PREFIX,
CRAM_OPT_VERBOSITY,
CRAM_OPT_SEQS_PER_SLICE,
CRAM_OPT_SLICES_PER_CONTAINER,
CRAM_OPT_RANGE,
CRAM_OPT_VERSION,
CRAM_OPT_EMBED_REF,
CRAM_OPT_IGNORE_MD5,
CRAM_OPT_REFERENCE,
CRAM_OPT_MULTI_SEQ_PER_SLICE,
CRAM_OPT_NO_REF,
CRAM_OPT_USE_BZIP2,
CRAM_OPT_SHARED_REF,
CRAM_OPT_NTHREADS,
CRAM_OPT_THREAD_POOL,
CRAM_OPT_USE_LZMA,
CRAM_OPT_USE_RANS,
CRAM_OPT_REQUIRED_FIELDS,
};
/**********************
* Exported functions *
**********************/
extern int hts_verbose;
/*! @abstract Table for converting a nucleotide character to 4-bit encoding.
The input character may be either an IUPAC ambiguity code, '=' for 0, or
'0'/'1'/'2'/'3' for a result of 1/2/4/8. The result is encoded as 1/2/4/8
for A/C/G/T or combinations of these bits for ambiguous bases.
*/
extern const unsigned char seq_nt16_table[256];
/*! @abstract Table for converting a 4-bit encoded nucleotide to an IUPAC
ambiguity code letter (or '=' when given 0).
*/
extern const char seq_nt16_str[];
/*! @abstract Table for converting a 4-bit encoded nucleotide to about 2 bits.
Returns 0/1/2/3 for 1/2/4/8 (i.e., A/C/G/T), or 4 otherwise (0 or ambiguous).
*/
extern const int seq_nt16_int[];
#ifdef __cplusplus
extern "C" {
#endif
/*!
@abstract Get the htslib version number
@return For released versions, a string like "N.N[.N]"; or git describe
output if using a library built within a Git repository.
*/
const char *hts_version(void);
/*!
@abstract Determine format by peeking at the start of a file
@param fp File opened for reading, positioned at the beginning
@param fmt Format structure that will be filled out on return
@return 0 for success, or negative if an error occurred.
*/
int hts_detect_format(struct hFILE *fp, htsFormat *fmt);
/*!
@abstract Get a human-readable description of the file format
@return Description string, to be freed by the caller after use.
*/
char *hts_format_description(const htsFormat *format);
/*!
@abstract Open a SAM/BAM/CRAM/VCF/BCF/etc file
@param fn The file name or "-" for stdin/stdout
@param mode Mode matching /[rwa][bcuz0-9]+/
@discussion
With 'r' opens for reading; any further format mode letters are ignored
as the format is detected by checking the first few bytes or BGZF blocks
of the file. With 'w' or 'a' opens for writing or appending, with format
specifier letters:
b binary format (BAM, BCF, etc) rather than text (SAM, VCF, etc)
c CRAM format
g gzip compressed
u uncompressed
z bgzf compressed
[0-9] zlib compression level
Note that there is a distinction between 'u' and '0': the first yields
plain uncompressed output whereas the latter outputs uncompressed data
wrapped in the zlib format.
@example
[rw]b .. compressed BCF, BAM, FAI
[rw]u .. uncompressed BCF
[rw]z .. compressed VCF
[rw] .. uncompressed VCF
*/
htsFile *hts_open(const char *fn, const char *mode);
/*!
@abstract Open an existing stream as a SAM/BAM/CRAM/VCF/BCF/etc file
@param fn The already-open file handle
@param mode Open mode, as per hts_open()
*/
htsFile *hts_hopen(struct hFILE *fp, const char *fn, const char *mode);
/*!
@abstract Close a file handle, flushing buffered data for output streams
@param fp The file handle to be closed
@return 0 for success, or negative if an error occurred.
*/
int hts_close(htsFile *fp);
/*!
@abstract Returns the file's format information
@param fp The file handle
@return Read-only pointer to the file's htsFormat.
*/
const htsFormat *hts_get_format(htsFile *fp);
/*!
@abstract Sets a specified CRAM option on the open file handle.
@param fp The file handle open the open file.
@param opt The CRAM_OPT_* option.
@param ... Optional arguments, dependent on the option used.
@return 0 for success, or negative if an error occurred.
*/
int hts_set_opt(htsFile *fp, enum cram_option opt, ...);
int hts_getline(htsFile *fp, int delimiter, kstring_t *str);
char **hts_readlines(const char *fn, int *_n);
/*!
@abstract Parse comma-separated list or read list from a file
@param list File name or comma-separated list
@param is_file
@param _n Size of the output array (number of items read)
@return NULL on failure or pointer to newly allocated array of
strings
*/
char **hts_readlist(const char *fn, int is_file, int *_n);
/*!
@abstract Create extra threads to aid compress/decompression for this file
@param fp The file handle
@param n The number of worker threads to create
@return 0 for success, or negative if an error occurred.
@notes THIS THREADING API IS LIKELY TO CHANGE IN FUTURE.
*/
int hts_set_threads(htsFile *fp, int n);
/*!
@abstract Set .fai filename for a file opened for reading
@return 0 for success, negative on failure
@discussion
Called before *_hdr_read(), this provides the name of a .fai file
used to provide a reference list if the htsFile contains no @SQ headers.
*/
int hts_set_fai_filename(htsFile *fp, const char *fn_aux);
#ifdef __cplusplus
}
#endif
/************
* Indexing *
************/
/*!
These HTS_IDX_* macros are used as special tid values for hts_itr_query()/etc,
producing iterators operating as follows:
- HTS_IDX_NOCOOR iterates over unmapped reads sorted at the end of the file
- HTS_IDX_START iterates over the entire file
- HTS_IDX_REST iterates from the current position to the end of the file
- HTS_IDX_NONE always returns "no more alignment records"
When one of these special tid values is used, beg and end are ignored.
When REST or NONE is used, idx is also ignored and may be NULL.
*/
#define HTS_IDX_NOCOOR (-2)
#define HTS_IDX_START (-3)
#define HTS_IDX_REST (-4)
#define HTS_IDX_NONE (-5)
#define HTS_FMT_CSI 0
#define HTS_FMT_BAI 1
#define HTS_FMT_TBI 2
#define HTS_FMT_CRAI 3
struct __hts_idx_t;
typedef struct __hts_idx_t hts_idx_t;
typedef struct {
uint64_t u, v;
} hts_pair64_t;
typedef int hts_readrec_func(BGZF *fp, void *data, void *r, int *tid, int *beg, int *end);
typedef struct {
uint32_t read_rest:1, finished:1, dummy:29;
int tid, beg, end, n_off, i;
int curr_tid, curr_beg, curr_end;
uint64_t curr_off;
hts_pair64_t *off;
hts_readrec_func *readrec;
struct {
int n, m;
int *a;
} bins;
} hts_itr_t;
#ifdef __cplusplus
extern "C" {
#endif
#define hts_bin_first(l) (((1<<(((l)<<1) + (l))) - 1) / 7)
#define hts_bin_parent(l) (((l) - 1) >> 3)
hts_idx_t *hts_idx_init(int n, int fmt, uint64_t offset0, int min_shift, int n_lvls);
void hts_idx_destroy(hts_idx_t *idx);
int hts_idx_push(hts_idx_t *idx, int tid, int beg, int end, uint64_t offset, int is_mapped);
void hts_idx_finish(hts_idx_t *idx, uint64_t final_offset);
void hts_idx_save(const hts_idx_t *idx, const char *fn, int fmt);
hts_idx_t *hts_idx_load(const char *fn, int fmt);
uint8_t *hts_idx_get_meta(hts_idx_t *idx, int *l_meta);
void hts_idx_set_meta(hts_idx_t *idx, int l_meta, uint8_t *meta, int is_copy);
int hts_idx_get_stat(const hts_idx_t* idx, int tid, uint64_t* mapped, uint64_t* unmapped);
uint64_t hts_idx_get_n_no_coor(const hts_idx_t* idx);
const char *hts_parse_reg(const char *s, int *beg, int *end);
hts_itr_t *hts_itr_query(const hts_idx_t *idx, int tid, int beg, int end, hts_readrec_func *readrec);
void hts_itr_destroy(hts_itr_t *iter);
typedef int (*hts_name2id_f)(void*, const char*);
typedef const char *(*hts_id2name_f)(void*, int);
typedef hts_itr_t *hts_itr_query_func(const hts_idx_t *idx, int tid, int beg, int end, hts_readrec_func *readrec);
hts_itr_t *hts_itr_querys(const hts_idx_t *idx, const char *reg, hts_name2id_f getid, void *hdr, hts_itr_query_func *itr_query, hts_readrec_func *readrec);
int hts_itr_next(BGZF *fp, hts_itr_t *iter, void *r, void *data);
const char **hts_idx_seqnames(const hts_idx_t *idx, int *n, hts_id2name_f getid, void *hdr); // free only the array, not the values
/**
* hts_file_type() - Convenience function to determine file type
* DEPRECATED: This function has been replaced by hts_detect_format().
* It and these FT_* macros will be removed in a future HTSlib release.
*/
#define FT_UNKN 0
#define FT_GZ 1
#define FT_VCF 2
#define FT_VCF_GZ (FT_GZ|FT_VCF)
#define FT_BCF (1<<2)
#define FT_BCF_GZ (FT_GZ|FT_BCF)
#define FT_STDIN (1<<3)
int hts_file_type(const char *fname);
#ifdef __cplusplus
}
#endif
static inline int hts_reg2bin(int64_t beg, int64_t end, int min_shift, int n_lvls)
{
int l, s = min_shift, t = ((1<<((n_lvls<<1) + n_lvls)) - 1) / 7;
for (--end, l = n_lvls; l > 0; --l, s += 3, t -= 1<<((l<<1)+l))
if (beg>>s == end>>s) return t + (beg>>s);
return 0;
}
static inline int hts_bin_bot(int bin, int n_lvls)
{
int l, b;
for (l = 0, b = bin; b; ++l, b = hts_bin_parent(b)); // compute the level of bin
return (bin - hts_bin_first(l)) << (n_lvls - l) * 3;
}
/**************
* Endianness *
**************/
static inline int ed_is_big(void)
{
long one= 1;
return !(*((char *)(&one)));
}
static inline uint16_t ed_swap_2(uint16_t v)
{
return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8));
}
static inline void *ed_swap_2p(void *x)
{
*(uint16_t*)x = ed_swap_2(*(uint16_t*)x);
return x;
}
static inline uint32_t ed_swap_4(uint32_t v)
{
v = ((v & 0x0000FFFFU) << 16) | (v >> 16);
return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);
}
static inline void *ed_swap_4p(void *x)
{
*(uint32_t*)x = ed_swap_4(*(uint32_t*)x);
return x;
}
static inline uint64_t ed_swap_8(uint64_t v)
{
v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32);
v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16);
return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8);
}
static inline void *ed_swap_8p(void *x)
{
*(uint64_t*)x = ed_swap_8(*(uint64_t*)x);
return x;
}
#endif
/* hts_defs.h -- Miscellaneous definitions.
Copyright (C) 2013-2015 Genome Research Ltd.
Author: John Marshall <jm18@sanger.ac.uk>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
#ifndef HTSLIB_HTS_DEFS_H
#define HTSLIB_HTS_DEFS_H
#if (__GNUC__ >= 3) || \
(defined __clang__ && __clang_major__ >= 2)
#define HTS_NORETURN __attribute__ ((__noreturn__))
#else
#define HTS_NORETURN
#endif
#if (defined __clang__ && __clang_major__ >= 3) || \
(defined __GNUC__ && (__GNUC__ > 4 || (__GNUC__==4 && __GNUC_MINOR__ >= 5)))
#define HTS_RESULT_USED __attribute__ ((__warn_unused_result__))
#else
#define HTS_RESULT_USED
#endif
#if defined __clang__ || \
(defined __GNUC__ && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95)))
#define HTS_UNUSED __attribute__ ((__unused__))
#else
#define HTS_UNUSED
#endif
#if (defined __clang__ && (__clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 1))) || \
(defined __GNUC__ && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)))
#define HTS_DEPRECATED(x) __attribute__ ((__deprecated__(x)))
#else
#define HTS_DEPRECATED(x)
#endif
#endif
This diff is collapsed.