Skip to content
Commits on Source (5)
......@@ -5,10 +5,6 @@ version: 'vers.{build}'
# branches to build
branches:
# Whitelist
only:
- develop
# Blacklist
except:
- gh-pages
......
......@@ -29,6 +29,12 @@ CFLAGS = -g -Wall -O2
LDFLAGS =
LIBS =
LZ4DIR = ./lz4
LZ4_CPPFLAGS = -I$(LZ4DIR)
LZ4_LDFLAGS = -L$(LZ4DIR)
LOBJS= bam_aux.o bam.o bam_import.o sam.o \
sam_header.o bam_plbuf.o
AOBJS= bam_index.o bam_plcmd.o sam_view.o \
......@@ -38,7 +44,8 @@ AOBJS= bam_index.o bam_plcmd.o sam_view.o \
cut_target.o phase.o bam2depth.o padding.o bedcov.o bamshuf.o \
faidx.o dict.o stats.o stats_isize.o bam_flags.o bam_split.o \
bam_tview.o bam_tview_curses.o bam_tview_html.o bam_lpileup.o \
bam_quickcheck.o bam_addrprg.o bam_markdup.o
bam_quickcheck.o bam_addrprg.o bam_markdup.o tmp_file.o
LZ4OBJS = $(LZ4DIR)/lz4.o
prefix = /usr/local
exec_prefix = $(prefix)
......@@ -85,8 +92,8 @@ TEST_PROGRAMS = \
all: $(PROGRAMS) $(MISC_PROGRAMS) $(TEST_PROGRAMS)
ALL_CPPFLAGS = -I. $(HTSLIB_CPPFLAGS) $(CPPFLAGS)
ALL_LDFLAGS = $(HTSLIB_LDFLAGS) $(LDFLAGS)
ALL_CPPFLAGS = -I. $(HTSLIB_CPPFLAGS) $(LZ4_CPPFLAGS) $(CPPFLAGS)
ALL_LDFLAGS = $(HTSLIB_LDFLAGS) $(LZ4_LDFLAGS) $(LDFLAGS)
ALL_LIBS = -lz $(LIBS)
# Usually config.mk and config.h are generated by running configure
......@@ -125,7 +132,6 @@ print-version:
.c.o:
$(CC) $(CFLAGS) $(ALL_CPPFLAGS) -c -o $@ $<
LIBST_OBJS = sam_opts.o sam_utils.o
......@@ -134,8 +140,8 @@ lib:libbam.a
libbam.a:$(LOBJS)
$(AR) -csru $@ $(LOBJS)
samtools: $(AOBJS) libbam.a libst.a $(HTSLIB)
$(CC) $(ALL_LDFLAGS) -o $@ $(AOBJS) libbam.a libst.a $(HTSLIB_LIB) $(CURSES_LIB) -lm $(ALL_LIBS) -lpthread
samtools: $(AOBJS) $(LZ4OBJS) libbam.a libst.a $(HTSLIB)
$(CC) $(ALL_LDFLAGS) -o $@ $(AOBJS) $(LZ4OBJS) libbam.a libst.a $(HTSLIB_LIB) $(CURSES_LIB) -lm $(ALL_LIBS) -lpthread
# For building samtools and its test suite only: NOT to be installed.
libst.a: $(LIBST_OBJS)
......@@ -151,6 +157,7 @@ bam_tview_h = bam_tview.h $(htslib_hts_h) $(htslib_sam_h) $(htslib_faidx_h) $(ba
sam_h = sam.h $(htslib_sam_h) $(bam_h)
sam_opts_h = sam_opts.h $(htslib_hts_h)
sample_h = sample.h $(htslib_kstring_h)
tmp_file_h = tmp_file.h $(htslib_sam_h) $(LZ4DIR)/lz4.h
bam.o: bam.c config.h $(bam_h) $(htslib_kstring_h) sam_header.h
bam2bcf.o: bam2bcf.c config.h $(htslib_hts_h) $(htslib_sam_h) $(htslib_kstring_h) $(htslib_kfunc_h) $(bam2bcf_h)
......@@ -195,7 +202,8 @@ sam_view.o: sam_view.c config.h $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstri
sample.o: sample.c config.h $(sample_h) $(htslib_khash_h)
stats_isize.o: stats_isize.c config.h stats_isize.h $(htslib_khash_h)
stats.o: stats.c config.h $(htslib_faidx_h) $(htslib_sam_h) $(htslib_hts_h) sam_header.h $(htslib_khash_str2int_h) samtools.h $(htslib_khash_h) $(htslib_kstring_h) stats_isize.h $(sam_opts_h)
bam_markdup.o: bam_markdup.c config.h $(htslib_sam_h) $(sam_opts_h) samtools.h $(bam_h) $(htslib_khash_h)
bam_markdup.o: bam_markdup.c config.h $(htslib_sam_h) $(sam_opts_h) samtools.h $(bam_h) $(htslib_khash_h) $(tmp_file_h)
tmp_file.o: tmp_file.c config.h $(tmp_file_h)
# test programs
......@@ -302,7 +310,7 @@ testclean:
-cd test/mpileup && rm -f FAIL-*.out* PASS-*.out* anomalous.[bc]*am indels.[bc]*am mpileup.*.[cs]*am mpileup.*.crai overlap50.[bc]*am expected/1.out xx#depth*.bam*
mostlyclean: testclean
-rm -f *.o misc/*.o test/*.o test/*/*.o version.h
-rm -f *.o misc/*.o test/*.o test/*/*.o version.h $(LZ4OBJS)
clean: mostlyclean
-rm -f $(PROGRAMS) libbam.a libst.a $(MISC_PROGRAMS) $(TEST_PROGRAMS)
......
Release 1.7 (26th January 2018)
--------------------
* HTSlib, and so samtools, now support BAMs which include CIGARs with more
than 65535 operations as per HTS-Specs 18th November (dab57f4 and 2f915a8).
* samtools quickcheck will now write a warning to stderr if it finds
any problems. These messages can be suppressed with a new `-q` option.
* samtools markdup can now mark supplementary alignments of reads where
the primary alignment is found to be a duplicate. Supplementary marking
can be turned on by passing the `-S` option to markdup. When this
option is enabled, all the alignment data will be written to a temporary
file so that supplementary alignments that occur before a duplicated
primary can be correctly marked in the final output. The location
of this temporary file can be influenced using the new `-T` option.
* samtools view now supports HTSlib's new multi-region iterator.
This can be enabled by passing the `-M` option to view. When using
this option:
- The BED filter (`-L` option) will use the index to skip through the file
- Reads from overlapping regions will only be output once
* samtools bedcov will now ignore BED comment and header lines (#571; thanks
to Daniel Baker).
* samtools collate now updates the @HD SO: and GO: tags, and sort will
remove a GO: tag if present. (#757; reported by Imran Haque).
* Bug-fixes:
- maq2sam now checks for input files that end early. (#751; patch supplied
by Alexandre Rebert of the Mayhem team, via Andreas Tille from Debian.)
- Fixed incorrect check when looking up header tags that could lead
to a crash in samtools stats. (#208; thanks to Dave Larson.)
- Fixed bug in samtools fastq `-O` option where it would fail if
the OQ tag in the input file had an unexpected type. (#758;
reported by Taejeong Bae)
- The MD5 calculations in samtools dict and md5fa did not handle
non-alphabetic characters in the same way as the CRAM MD5 function.
They have now been updated to match. (#704; reported by Chris Norman).
- Fix possible infinite loop in samtools targetcut.
- Building bam_tview_curses should no longer fail if a curses header file
cannot be found.
Release 1.6 (28th September 2017)
--------------------
......
......@@ -9,7 +9,7 @@ Building samtools
The typical simple case of building Samtools using the HTSlib bundled within
this Samtools release tarball is done as follows:
cd .../samtools-1.6 # Within the unpacked release directory
cd .../samtools-1.7 # Within the unpacked release directory
./configure
make
......@@ -21,7 +21,7 @@ install samtools etc properly into a directory of your choosing. Building for
installation using the HTSlib bundled within this Samtools release tarball,
and building the various HTSlib utilities such as bgzip is done as follows:
cd .../samtools-1.6 # Within the unpacked release directory
cd .../samtools-1.7 # Within the unpacked release directory
./configure --prefix=/path/to/location
make all all-htslib
make install install-htslib
......
......@@ -38,7 +38,7 @@ DEALINGS IN THE SOFTWARE. */
@copyright Genome Research Ltd.
*/
#define BAM_VERSION "1.6"
#define BAM_VERSION "1.7"
#include <stdint.h>
#include <stdlib.h>
......
......@@ -29,15 +29,18 @@ DEALINGS IN THE SOFTWARE
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <zlib.h>
#include <unistd.h>
#include <ctype.h>
#include <time.h>
#include <sys/stat.h>
#include "htslib/thread_pool.h"
#include "htslib/sam.h"
#include "sam_opts.h"
#include "samtools.h"
#include "htslib/khash.h"
#include "htslib/klist.h"
#include "htslib/kstring.h"
#include "tmp_file.h"
typedef struct {
int32_t single;
......@@ -126,6 +129,7 @@ static int key_equal(key_data_t a, key_data_t b) {
KHASH_INIT(reads, key_data_t, in_hash_t, 1, hash_key, key_equal) // read map hash
KLIST_INIT(read_queue, read_queue_t, __free_queue_element) // the reads buffer
KHASH_MAP_INIT_STR(duplicates, int) // map of duplicates for supplementary dup id
/* Calculate the mate's unclipped start based on position and cigar string from MC tag. */
......@@ -438,24 +442,53 @@ static void make_single_key(key_data_t *key, bam1_t *bam) {
key->orientation = orientation;
}
/* Add the duplicate name to a hash if it does not exist. */
static int add_duplicate(khash_t(duplicates) *d_hash, bam1_t *dupe) {
khiter_t d;
int ret;
d = kh_get(duplicates, d_hash, bam_get_qname(dupe));
if (d == kh_end(d_hash)) {
d = kh_put(duplicates, d_hash, strdup(bam_get_qname(dupe)), &ret);
if (ret > 0) {
kh_value(d_hash, d) = 1;
} else if (ret == 0) {
kh_value(d_hash, d)++;
} else {
fprintf(stderr, "[markdup] error: unable to store supplementary duplicates.\n");
return 1;
}
}
return 0;
}
/* Compare the reads near each other (coordinate sorted) and try to spot the duplicates.
Generally the highest quality scoring is chosen as the original and all others the duplicates.
The score is based on the sum of the quality values (<= 15) of the read and its mate (if any).
While single reads are compared to only one read of a pair, the pair will chosen as the original.
The comparison is done on position and orientation, see above for details. */
The comparison is done on position and orientation, see above for details.
Marking the supplementary reads of a duplicate as also duplicates takes an extra file read/write
step. This is because the duplicate can occur before the primary read.*/
static int bam_mark_duplicates(samFile *in, samFile *out, int remove_dups, int32_t max_length, int do_stats) {
static int bam_mark_duplicates(samFile *in, samFile *out, char *prefix, int remove_dups, int32_t max_length, int do_stats, int supp, int tag) {
bam_hdr_t *header;
khiter_t k;
khash_t(reads) *pair_hash = kh_init(reads);
khash_t(reads) *single_hash = kh_init(reads);
klist_t(read_queue) *read_buffer = kl_init(read_queue);
kliter_t(read_queue) *rq;
khash_t(duplicates) *dup_hash = kh_init(duplicates);
int32_t prev_tid, prev_coord;
read_queue_t *in_read;
int ret;
int reading, writing, excluded, duplicate, single, pair, single_dup, examined;
tmp_file_t temp;
if ((header = sam_hdr_read(in)) == NULL) {
fprintf(stderr, "[markdup] error reading header\n");
......@@ -489,6 +522,13 @@ static int bam_mark_duplicates(samFile *in, samFile *out, int remove_dups, int32
// get the buffer going
in_read = kl_pushp(read_queue, read_buffer);
// handling supplementary reads needs a temporary file
if (supp) {
if (tmp_file_open_write(&temp, prefix, 1)) {
fprintf(stderr, "[markdup] error: unable to open tmp file %s.\n", prefix);
return 1;
}
}
if ((in_read->b = bam_init1()) == NULL) {
fprintf(stderr, "[markdup] error: unable to allocate memory for alignment.\n");
......@@ -519,6 +559,7 @@ static int bam_mark_duplicates(samFile *in, samFile *out, int remove_dups, int32
if (!(in_read->b->core.flag & (BAM_FSECONDARY | BAM_FSUPPLEMENTARY | BAM_FUNMAP | BAM_FQCFAIL))) {
examined++;
// look at the pairs first
if ((in_read->b->core.flag & BAM_FPAIRED) && !(in_read->b->core.flag & BAM_FMUNMAP)) {
int ret, mate_tmp;
......@@ -557,6 +598,21 @@ static int bam_mark_duplicates(samFile *in, samFile *out, int remove_dups, int32
bp->p = in_read->b;
dup->core.flag |= BAM_FDUP;
single_dup++;
if (tag) {
if (bam_aux_append(dup, "do", 'Z', strlen(bam_get_qname(bp->p)) + 1, (uint8_t*)bam_get_qname(bp->p))) {
fprintf(stderr, "[markdup] error: unable to append 'do' tag.\n");
return 1;
}
}
if (supp) {
if (bam_aux_get(dup, "SA") || (dup->core.flag & BAM_FMUNMAP)) {
if (add_duplicate(dup_hash, dup)) {
return 1;
}
}
}
}
} else {
fprintf(stderr, "[markdup] error: single hashing failure.\n");
......@@ -611,6 +667,22 @@ static int bam_mark_duplicates(samFile *in, samFile *out, int remove_dups, int32
dup->core.flag |= BAM_FDUP;
if (tag) {
if (bam_aux_append(dup, "do", 'Z', strlen(bam_get_qname(bp->p)) + 1, (uint8_t*)bam_get_qname(bp->p))) {
fprintf(stderr, "[markdup] error: unable to append 'do' tag.\n");
return 1;
}
}
if (supp) {
if (bam_aux_get(dup, "SA") || (dup->core.flag & BAM_FMUNMAP)) {
if (add_duplicate(dup_hash, dup)) {
return 1;
}
}
}
duplicate++;
} else {
fprintf(stderr, "[markdup] error: pair hashing failure.\n");
......@@ -637,6 +709,22 @@ static int bam_mark_duplicates(samFile *in, samFile *out, int remove_dups, int32
if ((bp->p->core.flag & BAM_FPAIRED) && !(bp->p->core.flag & BAM_FMUNMAP)) {
// if matched against one of a pair just mark as duplicate
if (tag) {
if (bam_aux_append(in_read->b, "do", 'Z', strlen(bam_get_qname(bp->p)) + 1, (uint8_t*)bam_get_qname(bp->p))) {
fprintf(stderr, "[markdup] error: unable to append 'do' tag.\n");
return 1;
}
}
if (supp) {
if (bam_aux_get(in_read->b, "SA") || (in_read->b->core.flag & BAM_FMUNMAP)) {
if (add_duplicate(dup_hash, in_read->b)) {
return 1;
}
}
}
in_read->b->core.flag |= BAM_FDUP;
} else {
int64_t old_score, new_score;
......@@ -655,6 +743,21 @@ static int bam_mark_duplicates(samFile *in, samFile *out, int remove_dups, int32
}
dup->core.flag |= BAM_FDUP;
if (tag) {
if (bam_aux_append(dup, "do", 'Z', strlen(bam_get_qname(bp->p)) + 1, (uint8_t*)bam_get_qname(bp->p))) {
fprintf(stderr, "[markdup] error: unable to append 'do' tag.\n");
return 1;
}
}
if (supp) {
if (bam_aux_get(dup, "SA") || (dup->core.flag & BAM_FMUNMAP)) {
if (add_duplicate(dup_hash, dup)) {
return 1;
}
}
}
}
single_dup++;
......@@ -680,10 +783,17 @@ static int bam_mark_duplicates(samFile *in, samFile *out, int remove_dups, int32
}
if (!remove_dups || !(in_read->b->core.flag & BAM_FDUP)) {
if (supp) {
if (tmp_file_write(&temp, in_read->b)) {
fprintf(stderr, "[markdup] error: writing temp output failed.\n");
return 1;
}
} else {
if (sam_write1(out, header, in_read->b) < 0) {
fprintf(stderr, "[markdup] error: writing output failed.\n");
return 1;
}
}
writing++;
}
......@@ -725,10 +835,17 @@ static int bam_mark_duplicates(samFile *in, samFile *out, int remove_dups, int32
if (bam_get_qname(in_read->b)) { // last entry will be blank
if (!remove_dups || !(in_read->b->core.flag & BAM_FDUP)) {
if (supp) {
if (tmp_file_write(&temp, in_read->b)) {
fprintf(stderr, "[markdup] error: writing temp output failed.\n");
return 1;
}
} else {
if (sam_write1(out, header, in_read->b) < 0) {
fprintf(stderr, "[markdup] error: writing final output failed.\n");
fprintf(stderr, "[markdup] error: writing output failed.\n");
return 1;
}
}
writing++;
}
......@@ -739,6 +856,56 @@ static int bam_mark_duplicates(samFile *in, samFile *out, int remove_dups, int32
rq = kl_begin(read_buffer);
}
if (supp) {
bam1_t *b;
if (tmp_file_end_write(&temp)) {
fprintf(stderr, "[markdup] error: unable to end tmp writing.\n");
return 1;
}
// read data from temp file and mark duplicate supplementary alignments
if (tmp_file_begin_read(&temp, NULL)) {
return 1;
}
b = bam_init1();
while ((ret = tmp_file_read(&temp, b)) > 0) {
if ((b->core.flag & BAM_FSUPPLEMENTARY) || (b->core.flag & BAM_FUNMAP)) {
k = kh_get(duplicates, dup_hash, bam_get_qname(b));
if (k != kh_end(dup_hash)) {
b->core.flag |= BAM_FDUP;
}
}
if (!remove_dups || !(b->core.flag & BAM_FDUP)) {
if (sam_write1(out, header, b) < 0) {
fprintf(stderr, "[markdup] error: writing final output failed.\n");
return 1;
}
}
}
if (ret == -1) {
fprintf(stderr, "[markdup] error: failed to read tmp file.\n");
return 1;
}
for (k = kh_begin(dup_hash); k != kh_end(dup_hash); ++k) {
if (kh_exist(dup_hash, k)) {
free((char *)kh_key(dup_hash, k));
}
}
tmp_file_destroy(&temp, b, 0);
kh_destroy(duplicates, dup_hash);
bam_destroy1(b);
}
if (do_stats) {
fprintf(stderr, "READ %d WRITTEN %d \n"
"EXCLUDED %d EXAMINED %d\n"
......@@ -762,8 +929,12 @@ static int markdup_usage(void) {
fprintf(stderr, "Usage: samtools markdup <input.bam> <output.bam>\n\n");
fprintf(stderr, "Option: \n");
fprintf(stderr, " -r Remove duplicate reads\n");
fprintf(stderr, " -l Max read length (default 300 bases)\n");
fprintf(stderr, " -l INT Max read length (default 300 bases)\n");
fprintf(stderr, " -S Mark supplemenary alignments of duplicates as duplicates (slower).\n");
fprintf(stderr, " -s Report stats.\n");
fprintf(stderr, " -T PREFIX Write temporary files to PREFIX.samtools.nnnn.nnnn.tmp.\n");
fprintf(stderr, " -t Mark primary duplicates with the name of the original in a \'do\' tag."
" Mainly for information and debugging.\n");
sam_global_opt_help(stderr, "-.O..@");
......@@ -775,23 +946,29 @@ static int markdup_usage(void) {
int bam_markdup(int argc, char **argv) {
int c, ret, remove_dups = 0, report_stats = 0;
int c, ret, remove_dups = 0, report_stats = 0, include_supplementary = 0, tag_dup = 0;
int32_t max_length = 300;
samFile *in = NULL, *out = NULL;
char wmode[3] = {'w', 'b', 0};
sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
htsThreadPool p = {NULL, 0};
kstring_t tmpprefix = {0, 0, NULL};
struct stat st;
unsigned int t;
static const struct option lopts[] = {
SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 0, '@'),
{NULL, 0, NULL, 0}
};
while ((c = getopt_long(argc, argv, "rsl:O:@:", lopts, NULL)) >= 0) {
while ((c = getopt_long(argc, argv, "rsl:StT:O:@:", lopts, NULL)) >= 0) {
switch (c) {
case 'r': remove_dups = 1; break;
case 'l': max_length = atoi(optarg); break;
case 's': report_stats = 1; break;
case 'T': kputs(optarg, &tmpprefix); break;
case 'S': include_supplementary = 1; break;
case 't': tag_dup = 1; break;
default: if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
/* else fall-through */
case '?': return markdup_usage();
......@@ -827,7 +1004,24 @@ int bam_markdup(int argc, char **argv) {
}
// actual stuff happens here
ret = bam_mark_duplicates(in, out, remove_dups, max_length, report_stats);
// we need temp files so fix up the name here
if (tmpprefix.l == 0) {
if (strcmp(argv[optind + 1], "-") != 0)
ksprintf(&tmpprefix, "%s.", argv[optind + 1]);
else
kputc('.', &tmpprefix);
}
if (stat(tmpprefix.s, &st) == 0 && S_ISDIR(st.st_mode)) {
if (tmpprefix.s[tmpprefix.l-1] != '/') kputc('/', &tmpprefix);
}
t = ((unsigned) time(NULL)) ^ ((unsigned) clock());
ksprintf(&tmpprefix, "samtools.%d.%u.tmp", (int) getpid(), t % 10000);
ret = bam_mark_duplicates(in, out, tmpprefix.s, remove_dups, max_length, report_stats, include_supplementary, tag_dup);
sam_close(in);
......@@ -838,6 +1032,7 @@ int bam_markdup(int argc, char **argv) {
if (p.pool) hts_tpool_destroy(p.pool);
free(tmpprefix.s);
sam_global_args_free(&ga);
return ret;
......
......@@ -30,43 +30,66 @@ DEALINGS IN THE SOFTWARE. */
#include <stdlib.h>
#include <unistd.h>
/* File status flags (zero means OK). It's possible for more than one to be
* set on a single file. The final exit status is the bitwise-or of the
* status of all the files. */
#define QC_FAIL_OPEN 2
#define QC_NOT_SEQUENCE 4
#define QC_BAD_HEADER 8
#define QC_NO_EOF_BLOCK 16
#define QC_FAIL_CLOSE 32
static void usage_quickcheck(FILE *write_to)
{
fprintf(write_to,
"Usage: samtools quickcheck [options] <input> [...]\n"
"Options:\n"
" -v verbose output (repeat for more verbosity)\n"
" -q suppress warning messages\n"
"\n"
"Notes:\n"
"\n"
"1. In order to use this command effectively, you should check its exit status;\n"
" without any -v options it will NOT print any output, even when some files\n"
" fail the check. One way to use quickcheck might be as a check that all\n"
" BAM files in a directory are okay:\n"
"1. By default quickcheck will emit a warning message if and only if a file\n"
" fails the checks, in which case the exit status is non-zero. Under normal\n"
" behaviour with valid data it will be silent and has a zero exit status.\n"
" The warning messages are purely for manual inspection and should not be \n"
" parsed by scripts.\n"
"\n"
"2. In order to use this command programmatically, you should check its exit\n"
" status. One way to use quickcheck might be as a check that all BAM files in\n"
" a directory are okay:\n"
"\n"
"\tsamtools quickcheck *.bam && echo 'all ok' \\\n"
"\t || echo 'fail!'\n"
"\n"
" To also determine which files have failed, use the -v option:\n"
" The first level of verbosity lists only files that fail to stdout.\n"
" To obtain a parsable list of files that have failed, use this option:\n"
"\n"
"\tsamtools quickcheck -v *.bam > bad_bams.fofn \\\n"
"\tsamtools quickcheck -qv *.bam > bad_bams.fofn \\\n"
"\t && echo 'all ok' \\\n"
"\t || echo 'some files failed check, see bad_bams.fofn'\n"
);
}
#define QC_ERR(state, v, msg, arg1) \
file_state |= (state); \
if (!quiet || verbose >= (v)) fprintf(stderr, (msg), (arg1))
int main_quickcheck(int argc, char** argv)
{
int verbose = 0;
int verbose = 0, quiet = 0;
hts_verbose = 0;
const char* optstring = "v";
const char* optstring = "vq";
int opt;
while ((opt = getopt(argc, argv, optstring)) != -1) {
switch (opt) {
case 'v':
verbose++;
break;
case 'q':
quiet = 1;
break;
default:
usage_quickcheck(stderr);
return 1;
......@@ -101,28 +124,24 @@ int main_quickcheck(int argc, char** argv)
// attempt to open
htsFile *hts_fp = hts_open(fn, "r");
if (hts_fp == NULL) {
if (verbose >= 2) fprintf(stderr, "%s could not be opened for reading.\n", fn);
file_state |= 2;
QC_ERR(QC_FAIL_OPEN, 2, "%s could not be opened for reading.\n", fn);
}
else {
if (verbose >= 3) fprintf(stderr, "opened %s\n", fn);
// make sure we have sequence data
const htsFormat *fmt = hts_get_format(hts_fp);
if (fmt->category != sequence_data ) {
if (verbose >= 2) fprintf(stderr, "%s was not identified as sequence data.\n", fn);
file_state |= 4;
QC_ERR(QC_NOT_SEQUENCE, 2, "%s was not identified as sequence data.\n", fn);
}
else {
if (verbose >= 3) fprintf(stderr, "%s is sequence data\n", fn);
// check header
bam_hdr_t *header = sam_hdr_read(hts_fp);
if (header == NULL) {
if (verbose >= 2) fprintf(stderr, "%s caused an error whilst reading its header.\n", fn);
file_state |= 8;
QC_ERR(QC_BAD_HEADER, 2, "%s caused an error whilst reading its header.\n", fn);
} else {
if (header->n_targets <= 0) {
if (verbose >= 2) fprintf(stderr, "%s had no targets in header.\n", fn);
file_state |= 8;
QC_ERR(QC_BAD_HEADER, 2, "%s had no targets in header.\n", fn);
}
else {
if (verbose >= 3) fprintf(stderr, "%s has %d targets in header.\n", fn, header->n_targets);
......@@ -133,14 +152,12 @@ int main_quickcheck(int argc, char** argv)
// check EOF on formats that support this
int ret;
if ((ret = hts_check_EOF(hts_fp)) < 0) {
if (verbose >= 2) fprintf(stderr, "%s caused an error whilst checking for EOF block.\n", fn);
file_state |= 16;
QC_ERR(QC_NO_EOF_BLOCK, 2, "%s caused an error whilst checking for EOF block.\n", fn);
}
else {
switch (ret) {
case 0:
if (verbose >= 2) fprintf(stderr, "%s was missing EOF block when one should be present.\n", fn);
file_state |= 16;
QC_ERR(QC_NO_EOF_BLOCK, 2, "%s was missing EOF block when one should be present.\n", fn);
break;
case 1:
if (verbose >= 3) fprintf(stderr, "%s has good EOF block.\n", fn);
......@@ -155,8 +172,7 @@ int main_quickcheck(int argc, char** argv)
}
if (hts_close(hts_fp) < 0) {
file_state |= 32;
if (verbose >= 2) fprintf(stderr, "%s did not close cleanly.\n", fn);
QC_ERR(QC_FAIL_CLOSE, 2, "%s did not close cleanly.\n", fn);
}
}
......
......@@ -1787,41 +1787,6 @@ static int bam_merge_simple(int by_qname, char *sort_tag, const char *out,
return -1;
}
static int change_SO(bam_hdr_t *h, const char *so)
{
char *p, *q, *beg = NULL, *end = NULL, *newtext;
if (h->l_text > 3) {
if (strncmp(h->text, "@HD", 3) == 0) {
if ((p = strchr(h->text, '\n')) == 0) return -1;
*p = '\0';
if ((q = strstr(h->text, "\tSO:")) != 0) {
*p = '\n'; // change back
if (strncmp(q + 4, so, p - q - 4) != 0) {
beg = q;
for (q += 4; *q != '\n' && *q != '\t'; ++q);
end = q;
} else return 0; // no need to change
} else beg = end = p, *p = '\n';
}
}
if (beg == NULL) { // no @HD
h->l_text += strlen(so) + 15;
newtext = (char*)malloc(h->l_text + 1);
if (!newtext) return -1;
snprintf(newtext, h->l_text + 1,
"@HD\tVN:1.3\tSO:%s\n%s", so, h->text);
} else { // has @HD but different or no SO
h->l_text = (beg - h->text) + (4 + strlen(so)) + (h->text + h->l_text - end);
newtext = (char*)malloc(h->l_text + 1);
if (!newtext) return -1;
snprintf(newtext, h->l_text + 1, "%.*s\tSO:%s%s",
(int) (beg - h->text), h->text, so, end);
}
free(h->text);
h->text = newtext;
return 0;
}
// Function to compare reads and determine which one is < or > the other
// Handle sort-by-pos and sort-by-name. Used as the secondary sort in bam1_lt_by_tag, if reads are equivalent by tag.
// Returns a value less than, equal to or greater than zero if a is less than,
......@@ -2120,11 +2085,16 @@ int bam_sort_core_ext(int is_by_qname, char* sort_by_tag, const char *fn, const
else
new_so = "coordinate";
if (change_SO(header, new_so) != 0) {
if (sam_hdr_change_HD(header, "SO", new_so) != 0) {
print_error("sort",
"failed to change sort order header to '%s'\n", new_so);
goto err;
}
if (sam_hdr_change_HD(header, "GO", NULL) != 0) {
print_error("sort",
"failed to delete group order header\n");
goto err;
}
// No gain to using the thread pool here as the flow of this code
// is such that we are *either* reading *or* sorting. Hence a shared
......
......@@ -39,7 +39,16 @@ DEALINGS IN THE SOFTWARE. */
#include <ncurses.h>
#elif defined HAVE_CURSES_H
#include <curses.h>
#else
// Have the library, but no header file
#warning "Curses header file not found; tview with curses is disabled."
#undef HAVE_CURSES
#endif
#else
#warning "No curses library is available; tview with curses is disabled."
#endif
#ifdef HAVE_CURSES
typedef struct CursesTview {
tview_t view;
......@@ -340,8 +349,6 @@ tview_t* curses_tv_init(const char *fn, const char *fn_fa, const char *samples,
#else // !HAVE_CURSES
#warning "No curses library is available; tview with curses is disabled."
extern tview_t* text_tv_init(const char *fn, const char *fn_fa, const char *samples,
const htsFormat *fmt);
......
......@@ -110,6 +110,18 @@ static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
fprintf(stderr, "Couldn't read header for '%s'\n", fn);
goto fail;
}
if (sam_hdr_change_HD(h, "SO", "unsorted") != 0) {
print_error("collate",
"failed to change sort order header to 'unsorted'\n");
goto fail;
}
if (sam_hdr_change_HD(h, "GO", "query") != 0) {
print_error("collate",
"failed to change group order header to 'query'\n");
goto fail;
}
fnt = (char**)calloc(n_files, sizeof(char*));
if (!fnt) goto mem_fail;
fpt = (samFile**)calloc(n_files, sizeof(samFile*));
......
/* bamtk.c -- main samtools command front-end.
Copyright (C) 2008-2017 Genome Research Ltd.
Copyright (C) 2008-2018 Genome Research Ltd.
Author: Heng Li <lh3@sanger.ac.uk>
......@@ -31,7 +31,6 @@ DEALINGS IN THE SOFTWARE. */
#include "htslib/hts.h"
#include "samtools.h"
#include "version.h"
int bam_taf2baf(int argc, char *argv[]);
int bam_mpileup(int argc, char *argv[]);
......@@ -64,10 +63,6 @@ int main_addreplacerg(int argc, char *argv[]);
int faidx_main(int argc, char *argv[]);
int dict_main(int argc, char *argv[]);
const char *samtools_version()
{
return SAMTOOLS_VERSION;
}
static void usage(FILE *fp)
{
......@@ -90,7 +85,6 @@ static void usage(FILE *fp)
" calmd recalculate MD/NM tags and '=' bases\n"
" fixmate fix mate information\n"
" reheader replace BAM header\n"
" rmdup remove PCR duplicates\n"
" targetcut cut fosmid regions (for fosmid pool only)\n"
" addreplacerg adds or replaces RG tags\n"
" markdup mark duplicates\n"
......@@ -201,7 +195,7 @@ int main(int argc, char *argv[])
printf(
"samtools %s\n"
"Using htslib %s\n"
"Copyright (C) 2017 Genome Research Ltd.\n",
"Copyright (C) 2018 Genome Research Ltd.\n",
samtools_version(), hts_version());
}
else if (strcmp(argv[1], "--version-only") == 0) {
......
......@@ -128,6 +128,12 @@ int main_bedcov(int argc, char *argv[])
int tid, beg, end, pos;
bam_mplp_t mplp;
if (str.l == 0 || *str.s == '#') continue; /* empty or comment line */
/* Track and browser lines. Also look for a trailing *space* in
case someone has badly-chosen a chromosome name (it would
be followed by a tab in that case). */
if (strncmp(str.s, "track ", 6) == 0) continue;
if (strncmp(str.s, "browser ", 8) == 0) continue;
for (p = q = str.s; *p && *p != '\t'; ++p);
if (*p != '\t') goto bed_error;
*p = 0; tid = bam_name2id(aux[0]->header, q); *p = '\t';
......
......@@ -31,6 +31,7 @@ DEALINGS IN THE SOFTWARE. */
#include <stdio.h>
#include <errno.h>
#include <zlib.h>
#include "bedidx.h"
#include "htslib/ksort.h"
KSORT_INIT_GENERIC(uint64_t)
......@@ -38,48 +39,91 @@ KSORT_INIT_GENERIC(uint64_t)
#include "htslib/kseq.h"
KSTREAM_INIT(gzFile, gzread, 8192)
/*! @typedef
* @abstract bed_reglist_t - value type of the BED hash table
* This structure encodes the list of intervals (ranges) for the regions provided via BED file or
* command line arguments.
* @field *a pointer to the array of intervals (kept as 64 bit integers). The upper 32 bits
* encode the beginning of the interval, while the lower 32 bits encode the end, for easy sorting.
* |-- 32 bits --|-- 32 bits --|
* |---- beg ----|---- end ----|
* @field n actual number of elements contained by a
* @field m number of allocated elements to a (n <= m)
* @field *idx index array for computing the minimum offset
*/
typedef struct {
int n, m;
uint64_t *a;
int *idx;
int filter;
} bed_reglist_t;
#include "htslib/khash.h"
KHASH_MAP_INIT_STR(reg, bed_reglist_t)
#define LIDX_SHIFT 13
typedef kh_reg_t reghash_t;
void bed_destroy(void *_h);
#if 0
// Debug function
static void bed_print(void *reg_hash) {
reghash_t *h = (reghash_t *)reg_hash;
bed_reglist_t *p;
khint_t k;
int i;
const char *reg;
uint32_t beg, end;
if (!h) {
printf("Hash table is empty!\n");
return;
}
for (k = kh_begin(h); k < kh_end(h); k++) {
if (kh_exist(h,k)) {
reg = kh_key(h,k);
printf("Region: '%s'\n", reg);
if ((p = &kh_val(h,k)) != NULL && p->n > 0) {
printf("Filter: %d\n", p->filter);
for (i=0; i<p->n; i++) {
beg = (uint32_t)(p->a[i]>>32);
end = (uint32_t)(p->a[i]);
printf("\tinterval[%d]: %d-%d\n",i,beg,end);
}
} else {
printf("Region '%s' has no intervals!\n", reg);
}
}
}
}
#endif
int *bed_index_core(int n, uint64_t *a, int *n_idx)
static int *bed_index_core(int n, uint64_t *a)
{
int i, j, m, *idx;
m = *n_idx = 0; idx = 0;
int i, j, l, *idx;
l = 0; idx = 0;
for (i = 0; i < n; ++i) {
int beg, end;
beg = a[i]>>32 >> LIDX_SHIFT; end = ((uint32_t)a[i]) >> LIDX_SHIFT;
if (m < end + 1) {
int oldm = m;
m = end + 1;
kroundup32(m);
idx = realloc(idx, m * sizeof(int));
for (j = oldm; j < m; ++j) idx[j] = -1;
}
if (beg == end) {
if (idx[beg] < 0) idx[beg] = i;
} else {
for (j = beg; j <= end; ++j)
if (idx[j] < 0) idx[j] = i;
if (l < end + 1) {
int old_l = l;
l = end + 1;
kroundup32(l);
idx = realloc(idx, l * sizeof(int));
if (!idx)
return NULL;
for (j = old_l; j < l; ++j)
idx[j] = -1;
}
*n_idx = end + 1;
for (j = beg; j < end+1; ++j)
if (idx[j] < 0)
idx[j] = i;
}
return idx;
}
void bed_index(void *_h)
static void bed_index(void *_h)
{
reghash_t *h = (reghash_t*)_h;
khint_t k;
......@@ -88,23 +132,36 @@ void bed_index(void *_h)
bed_reglist_t *p = &kh_val(h, k);
if (p->idx) free(p->idx);
ks_introsort(uint64_t, p->n, p->a);
p->idx = bed_index_core(p->n, p->a, &p->m);
p->idx = bed_index_core(p->n, p->a);
}
}
}
int bed_overlap_core(const bed_reglist_t *p, int beg, int end)
{
int i, min_off;
if (p->n == 0) return 0;
static int bed_minoff(const bed_reglist_t *p, unsigned int beg, unsigned int end) {
int i, min_off=0;
if (p && p->idx) {
min_off = (beg>>LIDX_SHIFT >= p->n)? p->idx[p->n-1] : p->idx[beg>>LIDX_SHIFT];
if (min_off < 0) { // TODO: this block can be improved, but speed should not matter too much here
int n = beg>>LIDX_SHIFT;
if (n > p->n) n = p->n;
if (n > p->n)
n = p->n;
for (i = n - 1; i >= 0; --i)
if (p->idx[i] >= 0) break;
if (p->idx[i] >= 0)
break;
min_off = i >= 0? p->idx[i] : 0;
}
}
return min_off;
}
static int bed_overlap_core(const bed_reglist_t *p, int beg, int end)
{
int i, min_off;
if (p->n == 0) return 0;
min_off = bed_minoff(p, beg, end);
for (i = min_off; i < p->n; ++i) {
if ((int)(p->a[i]>>32) >= end) break; // out of range; no need to proceed
if ((int32_t)p->a[i] > beg && (int32_t)(p->a[i]>>32) < end)
......@@ -123,6 +180,40 @@ int bed_overlap(const void *_h, const char *chr, int beg, int end)
return bed_overlap_core(&kh_val(h, k), beg, end);
}
/** @brief Trim a sorted interval list, inside a region hash table,
* by removing completely contained intervals and merging adjacent or
* overlapping intervals.
* @param reg_hash the region hash table with interval lists as values
*/
static void bed_unify(void *reg_hash) {
int i, j, new_n;
reghash_t *h;
bed_reglist_t *p;
if (!reg_hash)
return;
h = (reghash_t *)reg_hash;
for (i = kh_begin(h); i < kh_end(h); i++) {
if (!kh_exist(h,i) || !(p = &kh_val(h,i)) || !(p->n))
continue;
for (new_n = 0, j = 1; j < p->n; j++) {
if ((uint32_t)p->a[new_n] < (uint32_t)(p->a[j]>>32)) {
p->a[++new_n] = p->a[j];
} else {
if ((uint32_t)p->a[new_n] < (uint32_t)p->a[j])
p->a[new_n] = (p->a[new_n] & 0xFFFFFFFF00000000) | (uint32_t)(p->a[j]);
}
}
p->n = ++new_n;
}
}
/* "BED" file reader, which actually reads two different formats.
BED files contain between three and nine fields per line, of which
......@@ -218,7 +309,7 @@ void *bed_read(const char *fn)
// Add begin,end to the list
if (p->n == p->m) {
p->m = p->m ? p->m<<1 : 4;
p->a = realloc(p->a, p->m * 8);
p->a = realloc(p->a, p->m * sizeof(uint64_t));
if (NULL == p->a) goto fail;
}
p->a[p->n++] = (uint64_t)beg<<32 | end;
......@@ -230,6 +321,7 @@ void *bed_read(const char *fn)
gzclose(fp);
free(str.s);
bed_index(h);
//bed_unify(h);
return h;
fail:
fprintf(stderr, "[bed_read] Error reading %s : %s\n", fn, strerror(errno));
......@@ -243,8 +335,13 @@ void *bed_read(const char *fn)
void bed_destroy(void *_h)
{
reghash_t *h = (reghash_t*)_h;
reghash_t *h;
khint_t k;
if (!_h)
return;
h = (reghash_t*)_h;
for (k = 0; k < kh_end(h); ++k) {
if (kh_exist(h, k)) {
free(kh_val(h, k).a);
......@@ -254,3 +351,250 @@ void bed_destroy(void *_h)
}
kh_destroy(reg, h);
}
static void *bed_insert(void *reg_hash, char *reg, unsigned int beg, unsigned int end) {
reghash_t *h;
khint_t k;
bed_reglist_t *p;
if (!reg_hash)
return NULL;
h = (reghash_t *)reg_hash;
// Put reg in the hash table if not already there
k = kh_get(reg, h, reg); //looks strange, but only the second reg is the actual region name.
if (k == kh_end(h)) { // absent from the hash table
int ret;
char *s = strdup(reg);
if (NULL == s) goto fail;
k = kh_put(reg, h, s, &ret);
if (-1 == ret) {
free(s);
goto fail;
}
memset(&kh_val(h, k), 0, sizeof(bed_reglist_t));
}
p = &kh_val(h, k);
// Add beg and end to the list
if (p->n == p->m) {
p->m = p->m ? p->m<<1 : 4;
p->a = realloc(p->a, p->m * sizeof(uint64_t));
if (NULL == p->a) goto fail;
}
p->a[p->n++] = (uint64_t)beg<<32 | end;
fail:
return h;
}
/* @brief Filter a region hash table (coming from the BED file) by another
* region hash table (coming from CLI), so that only intervals contained in
* both hash tables are kept.
* @param reg_hash the target region hash table
* @param tmp_hash the filter region hash table
* @return pointer to the filtered hash table
*/
static void *bed_filter(void *reg_hash, void *tmp_hash) {
reghash_t *h;
reghash_t *t;
bed_reglist_t *p, *q;
khint_t l, k;
uint64_t *new_a;
int i, j, new_n, min_off;
const char *reg;
uint32_t beg, end;
h = (reghash_t *)reg_hash;
t = (reghash_t *)tmp_hash;
if (!h)
return NULL;
if (!t)
return h;
for (l = kh_begin(t); l < kh_end(t); l++) {
if (!kh_exist(t,l) || !(q = &kh_val(t,l)) || !(q->n))
continue;
reg = kh_key(t,l);
k = kh_get(reg, h, reg); //looks strange, but only the second reg is a proper argument.
if (k == kh_end(h) || !(p = &kh_val(h, k)) || !(p->n))
continue;
new_a = (uint64_t *)calloc(q->n + p->n, sizeof(uint64_t));
if (!new_a)
return NULL;
new_n = 0;
for (i = 0; i < q->n; i++) {
beg = (uint32_t)(q->a[i]>>32);
end = (uint32_t)(q->a[i]);
min_off = bed_minoff(p, beg, end);
for (j = min_off; j < p->n; ++j) {
if ((uint32_t)(p->a[j]>>32) >= end) break; // out of range; no need to proceed
if ((uint32_t)(p->a[j]) > beg && (uint32_t)(p->a[j]>>32) < end) {
new_a[new_n++] = ((uint64_t)MAX((uint32_t)(p->a[j]>>32), beg) << 32) | MIN((uint32_t)p->a[j], end);
}
}
}
if (new_n > 0) {
free(p->a);
p->a = new_a;
p->n = new_n;
p->m = new_n;
p->filter = FILTERED;
} else {
free(new_a);
p->filter = ALL;
}
}
return h;
}
void *bed_hash_regions(void *reg_hash, char **regs, int first, int last, int *op) {
reghash_t *h = (reghash_t *)reg_hash;
reghash_t *t = NULL;
int i;
char reg[1024];
const char *q;
int beg, end;
if (h) {
t = kh_init(reg);
if (!t) {
fprintf(stderr, "Error when creating the temporary region hash table!\n");
return NULL;
}
} else {
h = kh_init(reg);
if (!h) {
fprintf(stderr, "Error when creating the region hash table!\n");
return NULL;
}
*op = 1;
}
for (i=first; i<last; i++) {
q = hts_parse_reg(regs[i], &beg, &end);
if (q) {
if ((int)(q - regs[i] + 1) > 1024) {
fprintf(stderr, "Region name '%s' is too long (bigger than %d).\n", regs[i], 1024);
continue;
}
strncpy(reg, regs[i], q - regs[i]);
reg[q - regs[i]] = 0;
} else {
// not parsable as a region, but possibly a sequence named "foo:a"
if (strlen(regs[i]) + 1 > 1024) {
fprintf(stderr, "Region name '%s' is too long (bigger than %d).\n", regs[i], 1024);
continue;
}
strcpy(reg, regs[i]);
beg = 0; end = INT_MAX;
}
//if op==1 insert reg to the bed hash table
if (*op && !(bed_insert(h, reg, beg, end))) {
fprintf(stderr, "Error when inserting region='%s' in the bed hash table at address=%p!\n", regs[i], h);
}
//if op==0, first insert the regions in the temporary hash table,
//then filter the bed hash table using it
if (!(*op) && !(bed_insert(t, reg, beg, end))) {
fprintf(stderr, "Error when inserting region='%s' in the temporary hash table at address=%p!\n", regs[i], t);
}
}
if (!(*op)) {
bed_index(t);
bed_unify(t);
h = bed_filter(h, t);
bed_destroy(t);
}
if (h) {
bed_index(h);
bed_unify(h);
}
return h;
}
const char* bed_get(void *reg_hash, int i, int filter) {
reghash_t *h;
bed_reglist_t *p;
if (!reg_hash)
return NULL;
h = (reghash_t *)reg_hash;
if (!kh_exist(h,i) || !(p = &kh_val(h,i)) || (p->filter < filter))
return NULL;
return kh_key(h, i);
}
hts_reglist_t *bed_reglist(void *reg_hash, int filter, int *n_reg) {
reghash_t *h;
bed_reglist_t *p;
khint_t i;
hts_reglist_t *reglist = NULL;
int count = 0;
int j;
if (!reg_hash)
return NULL;
h = (reghash_t *)reg_hash;
for (i = kh_begin(h); i < kh_end(h); i++) {
if (!kh_exist(h,i) || !(p = &kh_val(h,i)) || (p->filter < filter))
continue;
count++;
}
if (!count)
return NULL;
reglist = (hts_reglist_t *)calloc(count, sizeof(hts_reglist_t));
if (!reglist)
return NULL;
*n_reg = count;
count = 0;
for (i = kh_begin(h); i < kh_end(h) && count < *n_reg; i++) {
if (!kh_exist(h,i) || !(p = &kh_val(h,i)) || (p->filter < filter))
continue;
reglist[count].reg = kh_key(h,i);
reglist[count].intervals = (hts_pair32_t *)calloc(p->n, sizeof(hts_pair32_t));
if(!(reglist[count].intervals)) {
hts_reglist_free(reglist, count);
return NULL;
}
reglist[count].count = p->n;
reglist[count].max_end = 0;
for (j = 0; j < p->n; j++) {
reglist[count].intervals[j].beg = (uint32_t)(p->a[j]>>32);
reglist[count].intervals[j].end = (uint32_t)(p->a[j]);
if (reglist[count].intervals[j].end > reglist[count].max_end)
reglist[count].max_end = reglist[count].intervals[j].end;
}
count++;
}
return reglist;
}
#ifndef BEDIDX_H
#define BEDIDX_H
#include "htslib/hts.h"
#define LIDX_SHIFT 13
#define ALL 0
#define FILTERED 1
#define MIN(A,B) ( ( (A) < (B) ) ? (A) : (B) )
#define MAX(A,B) ( ( (A) > (B) ) ? (A) : (B) )
void *bed_read(const char *fn);
void bed_destroy(void *_h);
int bed_overlap(const void *_h, const char *chr, int beg, int end);
void *bed_hash_regions(void *reg_hash, char **regs, int first, int last, int *op);
const char* bed_get(void *reg_hash, int index, int filter);
hts_reglist_t *bed_reglist(void *reg_hash, int filter, int *count_regs);
#endif
......@@ -123,7 +123,7 @@ static void process_cns(bam_hdr_t *h, int tid, int l, uint16_t *cns)
s = b[i]>>s&1;
}
// print
for (i = 0, s = -1; i <= l; ++i) {
for (i = 0, s = -1; i < INT_MAX && i <= l; ++i) {
if (i == l || ((b[i]>>2&3) == 0 && s >= 0)) {
if (s >= 0) {
int j;
......
samtools (1.7-1) UNRELEASED; urgency=medium
* New upstream version
TODO: Adpapt patches
* Standards-Version: 4.1.3
-- Andreas Tille <tille@debian.org> Wed, 14 Feb 2018 09:50:58 +0100
samtools (1.6-4) unstable; urgency=medium
* Reduce per-thread memory limit of sort tests
......
......@@ -16,7 +16,7 @@ Build-Depends: debhelper (>= 10),
pkg-config,
tabix (>= 1.0)
# tabix is needed for the regression tests.
Standards-Version: 4.1.2
Standards-Version: 4.1.3
Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/samtools.git
Vcs-Git: https://anonscm.debian.org/git/debian-med/samtools.git
Homepage: http://www.htslib.org/
......
......@@ -71,8 +71,8 @@ static void write_dict(const char *fn, args_t *args)
if (args->header) fprintf(out, "@HD\tVN:1.0\tSO:unsorted\n");
while ((l = kseq_read(seq)) >= 0) {
for (i = k = 0; i < seq->seq.l; ++i) {
if (islower(seq->seq.s[i])) seq->seq.s[k++] = toupper(seq->seq.s[i]);
else if (isupper(seq->seq.s[i])) seq->seq.s[k++] = seq->seq.s[i];
if (seq->seq.s[i] >= '!' && seq->seq.s[i] <= '~')
seq->seq.s[k++] = toupper(seq->seq.s[i]);
}
hts_md5_reset(md5);
hts_md5_update(md5, (unsigned char*)seq->seq.s, k);
......
LZ4 Library
Copyright (c) 2011-2016, Yann Collet
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
This diff is collapsed.