Skip to content
Commits on Source (9)
src/*.o
dwgsim*
sudo: required
language: c
compiler:
- clang
- gcc
script: make && make test
......@@ -71,3 +71,7 @@ dist:clean
tar -vcf dwgsim-${PACKAGE_VERSION}.tar dwgsim-${PACKAGE_VERSION}; \
gzip -9 dwgsim-${PACKAGE_VERSION}.tar; \
rm -rv dwgsim-${PACKAGE_VERSION};
test:
if [ -d tmp ]; then rm -r tmp; fi
/bin/bash testdata/test.sh
[![Build Status](https://travis-ci.org/nh13/DWGSIM.svg?branch=master)](https://travis-ci.org/nh13/DWGSIM)
Welcome to DWGSIM.
Please see the file LICENSE for details.
Please see the file INSTALL for installation instructions;
This software package has limited support since it is in active development.
This software package has limited support since it is not longer in active development.
Please use the following command when cloning this repository:
```git clone --recursive```
Please see the following page for more details:
https://sourceforge.net/apps/mediawiki/dnaa/index.php?title=Whole_Genome_Simulation
https://github.com/nh13/DWGSIM/wiki
dwgsim (0.1.12-1) unstable; urgency=medium
* Team upload.
* New upstream version
* cme fix dpkg-control
* debhelper 11
* build time test needs example data from samtools
* Upstream converted README to README.md -> install html
* hardening=+all
-- Andreas Tille <tille@debian.org> Thu, 29 Mar 2018 10:25:11 +0200
dwgsim (0.1.11-3) unstable; urgency=medium
[Andreas Tille]
......
README.html
9
11
\ No newline at end of file
Source: dwgsim
Section: science
Priority: optional
Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.org>
Uploaders: Kevin Murray <spam@kdmurray.id.au>
Build-Depends: debhelper (>= 9),
Section: science
Priority: optional
Build-Depends: debhelper (>= 11~),
samtools (>= 1.7-2),
python-markdown,
zlib1g-dev
Standards-Version: 3.9.6
Standards-Version: 4.1.3
Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/dwgsim.git
Vcs-Git: https://anonscm.debian.org/git/debian-med/dwgsim.git
Homepage: https://github.com/nh13/DWGSIM/
Vcs-Git: git://anonscm.debian.org/debian-med/dwgsim.git
Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/dwgsim.git/
Package: dwgsim
Architecture: any
......
......@@ -4,5 +4,15 @@
DPKG_EXPORT_BUILDFLAGS = 1
include /usr/share/dpkg/default.mk
export DEB_BUILD_MAINT_OPTIONS=hardening=+all
%:
dh $@ --parallel
dh $@
override_dh_auto_build:
dh_auto_build
markdown_py -f README.html README.md
override_dh_auto_test:
ln -s /usr/share/doc/samtools/examples samtools
dh_auto_test
#!/bin/perl
#!/usr/bin/env perl
use strict;
use warnings;
......
#!/usr/bin/perl -w
#!/usr/bin/env perl -w
# Copied from SAMtools (http://samtools.sourceforge.net)
......
......@@ -50,6 +50,8 @@
#include "dwgsim.h"
//#include <config.h>
#define QUAL_MAX 40
uint8_t nst_nt4_table[256] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
......@@ -95,18 +97,18 @@ uint8_t nst_nt4_table[256] = {
n_indel_first[x]++; \
if(1 == mut_get_ins(currseq, i, &n, &ins)) { \
if(0 == strand[x]) { \
if(k < s[x]) tmp_seq[x][k++] = c & 0xf; \
while(n > 0 && k < s[x]) { \
tmp_seq[x][k++] = ins & 0x3; \
--n, ins >>= 2; \
} \
if(k < s[x]) tmp_seq[x][k++] = c & 0xf; \
} else { \
tmp_seq[x][k++] = c & 0xf; \
while(n > 0 && k < s[x]) { \
ext_coor[x]++; \
tmp_seq[x][k++] = (ins >> ((n-1) << 1) & 0x3); \
--n; \
} \
if (k < s[x]) tmp_seq[x][k++] = c & 0xf; \
} \
} else { \
int32_t byte_index, bit_index; \
......@@ -114,7 +116,8 @@ uint8_t nst_nt4_table[256] = {
uint8_t *insertion = NULL; \
insertion = mut_get_ins_long_n(currseq->ins[ins], &num_ins); \
if(0 == strand[x]) { \
byte_index = mut_packed_len(num_ins) - 1; bit_index = 3 - (num_ins & 3); \
byte_index = mut_packed_len(num_ins) - 1; bit_index = (num_ins-1) & 0x3 ; \
if(k < s[x]) tmp_seq[x][k++] = c & 0xf; \
while(num_ins > 0 && k < s[x]) { \
assert(0 <= byte_index); \
tmp_seq[x][k++] = (insertion[byte_index] >> (bit_index << 1)) & 0x3; \
......@@ -125,9 +128,7 @@ uint8_t nst_nt4_table[256] = {
byte_index--; \
} \
} \
if(k < s[x]) tmp_seq[x][k++] = c & 0xf; \
} else { \
tmp_seq[x][k++] = c & 0xf; \
byte_index = 0; bit_index = 0; \
while(num_ins > 0 && k < s[x]) { \
ext_coor[x]++; \
......@@ -139,6 +140,7 @@ uint8_t nst_nt4_table[256] = {
byte_index++; \
} \
} \
if (k < s[x]) tmp_seq[x][k++] = c & 0xf; \
} \
} \
} \
......@@ -419,7 +421,7 @@ void dwgsim_core(dwgsim_opt_t * opt)
{
seq_t seq;
mutseq_t *mutseq[2]={NULL,NULL};
uint64_t tot_len, ii=0, ctr=0;
uint64_t tot_len, ii=0, ctr=0, rand_ii=0;
int i, l, m, n_ref, contig_i;
char name[1024], *qstr;
int32_t name_len_max=0;
......@@ -486,7 +488,7 @@ void dwgsim_core(dwgsim_opt_t * opt)
}
}
}
fprintf(stderr, "[dwgsim_core] %d sequences, total length: %llu\n", n_ref, (long long)tot_len);
fprintf(stderr, "[dwgsim_core] %d sequences, total length: %llu\n", n_ref, (unsigned long long)tot_len);
rewind(opt->fp_fa);
mut_print_header_post(opt->fp_vcf);
......@@ -841,12 +843,16 @@ void dwgsim_core(dwgsim_opt_t * opt)
}
else {
for (i = 0; i < s[j]; ++i) {
qstr[i] = (int)(-10.0 * log(e[j]->start + e[j]->by*i) / log(10.0) + 0.499) + 33;
if (e[j]->start+e[j]->by*i>0) {
qstr[i] = (int)(-10.0 * log(e[j]->start + e[j]->by*i) / log(10.0) + 0.499) + '!';
} else {
qstr[i] = QUAL_MAX + '!';
}
if(0 < opt->quality_std) {
qstr[i] += (int)((ran_normal() * opt->quality_std) + 0.5);
if(qstr[i] < 33) qstr[i] = 33;
if(73 < qstr[i]) qstr[i] = 73;
}
if(qstr[i] < '!') qstr[i] = '!';
if(QUAL_MAX + '!' < qstr[i]) qstr[i] = QUAL_MAX + '!';
}
}
qstr[i] = 0;
......@@ -859,7 +865,7 @@ void dwgsim_core(dwgsim_opt_t * opt)
name, ext_coor[0]+1, ext_coor[1]+1, strand[0], strand[1], 0, 0,
n_err[0], n_sub[0], n_indel[0],
n_err[1], n_sub[1],n_indel[1],
(long long)ii, j+1);
(unsigned long long)ii, j+1);
for (i = 0; i < s[j]; ++i)
fputc("ACGTN"[(int)tmp_seq[j][i]], fpo);
fprintf(fpo, "\n+\n%s\n", qstr);
......@@ -877,7 +883,7 @@ void dwgsim_core(dwgsim_opt_t * opt)
name, ext_coor[0]+1, ext_coor[1]+1, strand[0], strand[1], 0, 0,
n_err[0] - n_err_first[0], n_sub[0] - n_sub_first[0], n_indel[0] - n_indel_first[0],
n_err[1] - n_err_first[1], n_sub[1] - n_sub_first[1], n_indel[1] - n_indel_first[1],
(long long)ii, 2 - j);
(unsigned long long)ii, 2 - j);
//fputc('A', fpo);
for (i = 1; i < s[j]; ++i)
fputc("ACGTN"[(int)tmp_seq[j][i]], fpo);
......@@ -893,7 +899,7 @@ void dwgsim_core(dwgsim_opt_t * opt)
(NULL == opt->read_prefix) ? "" : "_",
name, ext_coor[0]+1, ext_coor[1]+1, strand[0], strand[1], 0, 0,
n_err[0], n_sub[0], n_indel[0], n_err[1], n_sub[1], n_indel[1],
(long long)ii);
(unsigned long long)ii);
if(ILLUMINA == opt->data_type || IONTORRENT == opt->data_type) {
for (i = 0; i < s[j]; ++i)
fputc("ACGTN"[(int)tmp_seq[j][i]], opt->fp_bfast);
......@@ -909,7 +915,6 @@ void dwgsim_core(dwgsim_opt_t * opt)
fprintf(opt->fp_bfast, "\n");
}
}
n_sim++;
}
else { // random DNA read
for(j=0;j<2;j++) {
......@@ -931,12 +936,16 @@ void dwgsim_core(dwgsim_opt_t * opt)
}
else {
for (i = 0; i < s[j]; ++i) {
qstr[i] = (int)(-10.0 * log(e[j]->start + e[j]->by*i) / log(10.0) + 0.499) + 33;
if (e[j]->start+e[j]->by*i>0) {
qstr[i] = (int)(-10.0 * log(e[j]->start + e[j]->by*i) / log(10.0) + 0.499) + '!';
} else {
qstr[i] = QUAL_MAX + '!';
}
if(0 < opt->quality_std) {
qstr[i] += (int)((ran_normal() * opt->quality_std) + 0.5);
if(qstr[i] < 33) qstr[i] = 33;
if(73 < qstr[i]) qstr[i] = 73;
}
if(qstr[i] < '!') qstr[i] = '!';
if(QUAL_MAX + '!' < qstr[i]) qstr[i] = '!';
}
}
qstr[i] = 0;
......@@ -959,7 +968,7 @@ void dwgsim_core(dwgsim_opt_t * opt)
(NULL == opt->read_prefix) ? "" : "_",
"rand", 0, 0, 0, 0, 1, 1,
0, 0, 0, 0, 0, 0,
(long long)ii,
(unsigned long long)rand_ii,
j+1);
for (i = 0; i < s[j]; ++i)
fputc("ACGTN"[(int)tmp_seq[j][i]], fpo);
......@@ -977,7 +986,8 @@ void dwgsim_core(dwgsim_opt_t * opt)
(NULL == opt->read_prefix) ? "" : "_",
"rand", 0, 0, 0, 0, 1, 1,
0, 0, 0, 0, 0, 0,
(long long)ii, 2 - j);
(unsigned long long)rand_ii,
2 - j);
//fputc('A', fpo);
for (i = 1; i < s[j]; ++i)
fputc("ACGTN"[(int)tmp_seq[j][i]], fpo);
......@@ -993,7 +1003,7 @@ void dwgsim_core(dwgsim_opt_t * opt)
(NULL == opt->read_prefix) ? "" : "_",
"rand", 0, 0, 0, 0, 1, 1,
0, 0, 0, 0, 0, 0,
(long long)ii);
(unsigned long long)rand_ii);
if(ILLUMINA == opt->data_type || IONTORRENT == opt->data_type) {
for (i = 0; i < s[j]; ++i)
fputc("ACGTN"[(int)tmp_seq[j][i]], opt->fp_bfast);
......@@ -1009,8 +1019,9 @@ void dwgsim_core(dwgsim_opt_t * opt)
fprintf(opt->fp_bfast, "\n");
}
}
n_sim++;
rand_ii++;
}
n_sim++;
}
fprintf(stderr, "\r[dwgsim_core] %llu",
(unsigned long long int)ctr);
......
......@@ -614,28 +614,30 @@ dwgsim_eval_counts_print(dwgsim_eval_counts_t *counts, int32_t a, int32_t d, int
// header
fprintf(stdout, "# thr | the minimum %s threshold\n", (0 == a) ? "mapping quality" : "alignment score");
fprintf(stdout, "# mc | the number of reads mapped correctly that should be mapped at the threshold\n");
fprintf(stdout, "# mi | the number of reads mapped incorrectly that should be mapped be mapped at the threshold\n");
fprintf(stdout, "# mu | the number of reads unmapped that should be mapped be mapped at the threshold\n");
fprintf(stdout, "# mc | the number of correctly mapped reads that should be mapped at the threshold\n");
fprintf(stdout, "# mi | the number of incorrectly mapped reads that should be mapped at the threshold\n");
fprintf(stdout, "# mu | the number of unmapped reads that should be mapped at the threshold\n");
fprintf(stdout, "# um | the number of reads mapped that should be unmapped be mapped at the threshold\n");
fprintf(stdout, "# uu | the number of reads unmapped that should be unmapped be mapped at the threshold\n");
fprintf(stdout, "# mc' + mi' + mu' + um' + uu' | the total number of reads mapped at the threshold\n");
fprintf(stdout, "# mc' | the number of reads mapped correctly that should be mapped at or greater than that threshold\n");
fprintf(stdout, "# mi' | the number of reads mapped incorrectly that should be mapped be mapped at or greater than that threshold\n");
fprintf(stdout, "# um | the number of mapped reads that should be unmapped at the threshold\n");
fprintf(stdout, "# uu | the number of unmapped reads that should be unmapped at the threshold\n");
fprintf(stdout, "# mu' | the number of reads unmapped that should be mapped be mapped at or greater than that threshold\n");
fprintf(stdout, "# mc + mi + mu + um + uu | the total number of reads at the threshold\n");
fprintf(stdout, "# um' | the number of reads mapped that should be unmapped be mapped at or greater than that threshold\n");
fprintf(stdout, "# uu' | the number of reads unmapped that should be unmapped be mapped at or greater than that threshold\n");
fprintf(stdout, "# mc' + mi' + mu' + um' + uu' | the total number of reads mapped at or greater than the threshold\n");
fprintf(stdout, "# mc' | the number of correctly mapped reads that should be mapped at or greater than that threshold\n");
fprintf(stdout, "# mi' | the number of incorrectly mapped reads that should be mapped at or greater than that threshold\n");
fprintf(stdout, "# mu' | the number of unmapped reads that should be mapped at or greater than that threshold\n");
fprintf(stdout, "# (mc / (mc' + mi' + mu')) | sensitivity: the fraction of reads that should be mapped that are mapped correctly at the threshold\n");
fprintf(stdout, "# (mc / mc' + mi') | positive predictive value: the fraction of mapped reads that are mapped correctly at the threshold\n");
fprintf(stdout, "# um' | the number of mapped reads that should be unmapped at or greater than that threshold\n");
fprintf(stdout, "# uu' | the number of unmapped reads that should be unmapped at or greater than that threshold\n");
fprintf(stdout, "# mc' + mi' + mu' + um' + uu' | the total number of reads at or greater than the threshold\n");
fprintf(stdout, "# (mc / (mc' + mi' + mu')) | sensitivity: the fraction of mappable reads that are mapped correctly at the threshold\n");
fprintf(stdout, "# (mc / (mc' + mi')) | positive predictive value: the fraction of mapped mappable reads that are mapped correctly at the threshold\n");
fprintf(stdout, "# (um / (um' + uu')) | false discovery rate: the fraction of random reads that are mapped at the threshold\n");
fprintf(stdout, "# (mc' / (mc' + mi' + mu')) | sensitivity: the fraction of reads that should be mapped that are mapped correctly at or greater than the threshold\n");
fprintf(stdout, "# (mc' / mc' + mi') | positive predictive value: the fraction of mapped reads that are mapped correctly at or greater than the threshold\n");
fprintf(stdout, "# (mc' / (mc' + mi' + mu')) | sensitivity: the fraction of mappable reads that are mapped correctly at or greater than the threshold\n");
fprintf(stdout, "# (mc' / (mc' + mi')) | positive predictive value: the fraction of mapped mappable reads that are mapped correctly at or greater than the threshold\n");
fprintf(stdout, "# (um' / (um' + uu')) | false discovery rate: the fraction of random reads that are mapped at or greater than the threshold\n");
......
......@@ -163,19 +163,22 @@ static void get_error_rate(const char *str, error_t *e)
}
int32_t
dwgsim_opt_is_int(char *optarg)
dwgsim_opt_is_int(char *optarg, int32_t neg_ok)
{
int32_t i;
for(i=0;i<strlen(optarg);i++) {
int32_t len = strlen(optarg);
if (len == 0) return 0;
if ('+' != optarg[0] && (neg_ok == 0 || '-' != optarg[0]) && !isdigit(optarg[0])) return 0;
for(i=1;i<len;i++) {
if(!isdigit(optarg[i])) return 0;
}
return 1;
}
int32_t
dwgsim_atoi(char *optarg, char flag)
dwgsim_atoi(char *optarg, char flag, int32_t neg_ok)
{
if(0 == dwgsim_opt_is_int(optarg)) {
if(0 == dwgsim_opt_is_int(optarg, neg_ok)) {
fprintf(stderr, "Error: command line option -%c is not a number [%s]\n", flag, optarg);
exit(1);
}
......@@ -192,22 +195,22 @@ dwgsim_opt_parse(dwgsim_opt_t *opt, int argc, char *argv[])
while ((c = getopt(argc, argv, "id:s:N:C:1:2:e:E:r:F:R:X:I:c:S:n:y:BHf:z:Mm:b:v:x:P:q:Q:h")) >= 0) {
switch (c) {
case 'i': opt->is_inner = 1; break;
case 'd': opt->dist = dwgsim_atoi(optarg, 'd'); break;
case 'd': opt->dist = dwgsim_atoi(optarg, 'd', 0); break;
case 's': opt->std_dev = atof(optarg); break;
case 'N': opt->N = dwgsim_atoi(optarg, 'N'); opt->C = -1; break;
case 'N': opt->N = dwgsim_atoi(optarg, 'N', 1); opt->C = -1; break;
case 'C': opt->C = atof(optarg); opt->N = -1; break;
case '1': opt->length[0] = dwgsim_atoi(optarg, '1'); break;
case '2': opt->length[1] = dwgsim_atoi(optarg, '2'); break;
case '1': opt->length[0] = dwgsim_atoi(optarg, '1', 0); break;
case '2': opt->length[1] = dwgsim_atoi(optarg, '2', 0); break;
case 'e': get_error_rate(optarg, &opt->e[0]); break;
case 'E': get_error_rate(optarg, &opt->e[1]); break;
case 'r': opt->mut_rate = atof(optarg); break;
case 'F': opt->mut_freq = atof(optarg); break;
case 'R': opt->indel_frac = atof(optarg); break;
case 'X': opt->indel_extend = atof(optarg); break;
case 'I': opt->indel_min = dwgsim_atoi(optarg, 'I'); break;
case 'c': opt->data_type = dwgsim_atoi(optarg, 'c'); break;
case 'S': opt->strandedness = dwgsim_atoi(optarg, 'S'); break;
case 'n': opt->max_n = dwgsim_atoi(optarg, 'n'); break;
case 'I': opt->indel_min = dwgsim_atoi(optarg, 'I', 0); break;
case 'c': opt->data_type = dwgsim_atoi(optarg, 'c', 0); break;
case 'S': opt->strandedness = dwgsim_atoi(optarg, 'S', 0); break;
case 'n': opt->max_n = dwgsim_atoi(optarg, 'n', 0); break;
case 'y': opt->rand_read = atof(optarg); break;
case 'f':
if(NULL != opt->flow_order) free(opt->flow_order);
......@@ -216,7 +219,7 @@ dwgsim_opt_parse(dwgsim_opt_t *opt, int argc, char *argv[])
case 'B': opt->use_base_error = 1; break;
case 'H': opt->is_hap = 1; break;
case 'h': return 0;
case 'z': opt->seed = dwgsim_atoi(optarg, 'n'); break;
case 'z': opt->seed = dwgsim_atoi(optarg, 'z', 1); break;
case 'M': opt->muts_only = 1; break;
case 'm': free(opt->fn_muts_input); opt->fn_muts_input = strdup(optarg); opt->fn_muts_input_type = MUT_INPUT_TXT; muts_input_type |= 0x1; break;
case 'b': free(opt->fn_muts_input); opt->fn_muts_input = strdup(optarg); opt->fn_muts_input_type = MUT_INPUT_BED; muts_input_type |= 0x2; break;
......@@ -293,6 +296,18 @@ dwgsim_opt_parse(dwgsim_opt_t *opt, int argc, char *argv[])
fprintf(stderr, "Warning: remember to use the -P option with dwgsim_eval\n");
}
if(0 < opt->length[1]){ //paired end / mate pair
int s = opt->length[0] + opt->length[1] + opt->is_inner;
double r = (s-opt->dist)*1.0/opt->std_dev;
if(r>6){
fprintf(stderr, "Error: command line option -d is too small for the read length (%d)\n",opt->dist);
return 0;
}
if(r>4){
fprintf(stderr, "Warning: command line option -d is small for the read length (%d). Generation speed could be affected.\n",opt->dist);
}
}
switch(muts_input_type) {
case 0x0:
case 0x1:
......
......@@ -840,7 +840,7 @@ void mut_print(const char *name, const seq_t *seq, mutseq_t *hap1, mutseq_t *hap
}
if (0 < i) fprintf(fpout_vcf, "\t%c", "ACGTN"[nst_nt4_table[(int)seq->s[i-1]]]);
else fprintf(fpout_vcf, "\t.");
fprintf(fpout_vcf, "\t100\tPASS\tAF=1.0;pl=1;mt=DELETE\n");
fprintf(fpout_vcf, "\t100\tPASS\tAF=0.5;pl=1;mt=DELETE\n");
}
// NB: convert back 'c'
c[0] = nst_nt4_table[(int)seq->s[i]];
......@@ -860,7 +860,7 @@ void mut_print(const char *name, const seq_t *seq, mutseq_t *hap1, mutseq_t *hap
}
if (0 < i) fprintf(fpout_vcf, "\t%c", "ACGTN"[nst_nt4_table[(int)seq->s[i-1]]]);
else fprintf(fpout_vcf, "\t.");
fprintf(fpout_vcf, "\t100\tPASS\tAF=1.0;pl=2;mt=DELETE\n");
fprintf(fpout_vcf, "\t100\tPASS\tAF=0.5;pl=2;mt=DELETE\n");
}
// NB: convert back 'c'
c[0] = nst_nt4_table[(int)seq->s[i]];
......