Skip to content
Commits on Source (7)
CFLAGS = -Wall -O3 -std=c99
LIBS = align.o alnfrags.o ankers.o assembly.o chain.o compdna.o compkmers.o compress.o decon.o ef.o filebuff.o frags.o hashmap.o hashmapindex.o hashmapkma.o hashmapkmers.o hashtable.o index.o kma.o kmapipe.o kmers.o kmmap.o loadupdate.o makeindex.o mt1.o nw.o pherror.o printconsensus.o qseqs.o qualcheck.o runinput.o runkma.o sam.o savekmers.o seq2fasta.o seqparse.o shm.o sparse.o spltdb.o stdnuc.o stdstat.o update.o updateindex.o updatescores.o valueshash.o vcf.o
LIBS = align.o alnfrags.o ankers.o assembly.o chain.o compdna.o compkmers.o compress.o decon.o ef.o filebuff.o frags.o hashmap.o hashmapindex.o hashmapkma.o hashmapkmers.o hashtable.o index.o kma.o kmapipe.o kmers.o kmmap.o loadupdate.o makeindex.o mt1.o nw.o pherror.o printconsensus.o qseqs.o qualcheck.o runinput.o runkma.o sam.o savekmers.o seq2fasta.o seqparse.o shm.o sparse.o spltdb.o stdnuc.o stdstat.o tmp.o update.o updateindex.o updatescores.o valueshash.o vcf.o
PROGS = kma kma_index kma_shm kma_update
.c .o:
......@@ -43,7 +43,7 @@ hashmapkma.o: hashmapkma.h pherror.h
hashmapkmers.o: hashmapkmers.h pherror.h
hashtable.o: hashtable.h hashmapkma.h hashmapkmers.h pherror.h
index.o: index.h compress.h decon.h hashmap.h hashmapkma.h loadupdate.h makeindex.h pherror.h stdstat.h version.h
kma.o: kma.h ankers.h assembly.h chain.h hashmapkma.h kmers.h mt1.h penalties.h pherror.h qseqs.h runinput.h runkma.h savekmers.h sparse.h spltdb.h version.h
kma.o: kma.h ankers.h assembly.h chain.h hashmapkma.h kmers.h mt1.h penalties.h pherror.h qseqs.h runinput.h runkma.h savekmers.h sparse.h spltdb.h tmp.h version.h
kmapipe.o: kmapipe.h pherror.h
kmers.o: kmers.h ankers.h compdna.h hashmapkma.h kmapipe.h pherror.h qseqs.h savekmers.h spltdb.h
kmmap.o: kmmap.h hashmapkma.h
......@@ -56,16 +56,17 @@ printconsensus.o: printconsensus.h assembly.h
qseqs.o: qseqs.h pherror.h
qualcheck.o: qualcheck.h compdna.h hashmap.h pherror.h stdnuc.h stdstat.h
runinput.o: runinput.h compdna.h filebuff.h pherror.h qseqs.h seqparse.h
runkma.o: runkma.h align.h alnfrags.h assembly.h chain.h compdna.h ef.h filebuff.h frags.h hashmapindex.h kmapipe.h nw.h pherror.h printconsensus.h qseqs.h stdnuc.h stdstat.h vcf.h
runkma.o: runkma.h align.h alnfrags.h assembly.h chain.h compdna.h ef.h filebuff.h frags.h hashmapindex.h kmapipe.h nw.h pherror.h printconsensus.h qseqs.h stdnuc.h stdstat.h tmp.h vcf.h
sam.o: sam.h nw.h pherror.h qseqs.h runkma.h
savekmers.o: savekmers.h ankers.h compdna.h hashmapkma.h penalties.h pherror.h qseqs.h stdnuc.h stdstat.h threader.h
seq2fasta.o: seq2fasta.h pherror.h qseqs.h runkma.h stdnuc.h
seqparse.o: seqparse.h filebuff.h qseqs.h
shm.o: shm.h pherror.h hashmapkma.h version.h
sparse.o: sparse.h compkmers.h hashtable.h kmapipe.h pherror.h runinput.h savekmers.h stdnuc.h stdstat.h
spltdb.o: spltdb.h align.h alnfrags.h assembly.h chain.h compdna.h ef.h filebuff.h frags.h hashmapindex.h kmapipe.h nw.h pherror.h printconsensus.h qseqs.h runkma.h stdnuc.h stdstat.h vcf.h
spltdb.o: spltdb.h align.h alnfrags.h assembly.h chain.h compdna.h ef.h filebuff.h frags.h hashmapindex.h kmapipe.h nw.h pherror.h printconsensus.h qseqs.h runkma.h stdnuc.h stdstat.h tmp.h vcf.h
stdnuc.o: stdnuc.h
stdstat.o: stdstat.h
tmp.o: tmp.h pherror.h threader.h
update.o: update.h hashmapkma.h pherror.h stdnuc.h
updateindex.o: updateindex.h compdna.h hashmap.h hashmapindex.h pherror.h qualcheck.h stdnuc.h pherror.h
updatescores.o: updatescores.h qseqs.h
......
......@@ -48,6 +48,7 @@ AlnScore KMA(const HashMap_index *template_index, const unsigned char *qseq, int
mask = 0;
mask = (~mask) >> (sizeof(long unsigned) * sizeof(long unsigned) - (kmersize << 1));
key = 0;
/* circular, skip boundaries */
if(min < max) {
min = 0;
......@@ -757,6 +758,7 @@ int anker_rc(const HashMap_index *template_index, unsigned char *qseq, int q_len
if(end == -1) {
end = q_len;
}
if(i < end - kmersize) {
key = makeKmer(qseq, i, kmersize - 1);
i += (kmersize - 1);
......@@ -767,6 +769,7 @@ int anker_rc(const HashMap_index *template_index, unsigned char *qseq, int q_len
while(i < end) {
key = ((key << 2) | qseq[i]) & mask;
value = hashMap_index_get_bound(template_index, key, 0, t_len, shifter);
if(value == 0) {
++i;
} else if(0 < value) {
......@@ -870,6 +873,8 @@ int anker_rc(const HashMap_index *template_index, unsigned char *qseq, int q_len
stop = hashMap_index_getNextDubPos(template_index, key, 0, t_len, stop, shifter);
}
/* add best anker score */
score_r += (bias - i);
i = bias + 1;
/* update position */
......@@ -883,6 +888,7 @@ int anker_rc(const HashMap_index *template_index, unsigned char *qseq, int q_len
}
i = end + 1;
}
if(bestScore < score_r) {
bestScore = score_r;
}
......
......@@ -702,6 +702,7 @@ HashMapKMA * compressKMA_megaDB(HashMap *templates, FILE *out) {
fprintf(stderr, "# Overflow bypassed.\n");
}
free(templates->values);
/* convert valuesHash to a linked list */
table = 0;
i = shmValues->size;
......
kma (1.2.10a-1) unstable; urgency=medium
* New upstream version
* debhelper-compat 12
* Standards-Version: 4.4.0
-- Andreas Tille <tille@debian.org> Thu, 01 Aug 2019 13:58:31 +0200
kma (1.2.6-1) unstable; urgency=medium
* New upstream version
......
......@@ -3,9 +3,9 @@ Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.
Uploaders: Andreas Tille <tille@debian.org>
Section: science
Priority: optional
Build-Depends: debhelper (>= 12~),
Build-Depends: debhelper-compat (= 12),
zlib1g-dev
Standards-Version: 4.3.0
Standards-Version: 4.4.0
Vcs-Browser: https://salsa.debian.org/med-team/kma
Vcs-Git: https://salsa.debian.org/med-team/kam.git
Homepage: https://bitbucket.org/genomicepidemiology/kma
......
......@@ -7,7 +7,7 @@ Description: Propagate hardening options
@@ -1,4 +1,4 @@
-CFLAGS = -Wall -O3 -std=c99
+CFLAGS += -Wall -O3 -std=c99
LIBS = align.o alnfrags.o ankers.o assembly.o chain.o compdna.o compkmers.o compress.o decon.o ef.o filebuff.o frags.o hashmap.o hashmapindex.o hashmapkma.o hashmapkmers.o hashtable.o index.o kma.o kmapipe.o kmers.o kmmap.o loadupdate.o makeindex.o mt1.o nw.o pherror.o printconsensus.o qseqs.o qualcheck.o runinput.o runkma.o sam.o savekmers.o seq2fasta.o seqparse.o shm.o sparse.o spltdb.o stdnuc.o stdstat.o update.o updateindex.o updatescores.o valueshash.o vcf.o
LIBS = align.o alnfrags.o ankers.o assembly.o chain.o compdna.o compkmers.o compress.o decon.o ef.o filebuff.o frags.o hashmap.o hashmapindex.o hashmapkma.o hashmapkmers.o hashtable.o index.o kma.o kmapipe.o kmers.o kmmap.o loadupdate.o makeindex.o mt1.o nw.o pherror.o printconsensus.o qseqs.o qualcheck.o runinput.o runkma.o sam.o savekmers.o seq2fasta.o seqparse.o shm.o sparse.o spltdb.o stdnuc.o stdstat.o tmp.o update.o updateindex.o updatescores.o valueshash.o vcf.o
PROGS = kma kma_index kma_shm kma_update
@@ -8,16 +8,16 @@ PROGS = kma kma_index kma_shm kma_update
......
......@@ -24,6 +24,7 @@
#include "pherror.h"
#include "qseqs.h"
#include "threader.h"
#include "tmp.h"
FILE * printFrags(Frag **alignFrags, int DB_size) {
......@@ -31,7 +32,7 @@ FILE * printFrags(Frag **alignFrags, int DB_size) {
FILE *OUT;
Frag *alignFrag, *next;
if(!(OUT = tmpfile())) {
if(!(OUT = tmpF(0))) {
fprintf(stderr, "Could not create tmp files.\n");
ERROR();
}
......
......@@ -497,9 +497,13 @@ int index_main(int argc, char *argv[]) {
if(templatefilename != 0) {
/* load */
fprintf(stderr, "# Loading database: %s\n", outputfilename);
finalDB = load_DBs(templatefilename, outputfilename, &template_lengths, &template_ulengths, &template_slengths);
finalDB = smalloc(sizeof(HashMapKMA));
kmerindex = load_DBs(templatefilename, outputfilename, &template_lengths, &template_ulengths, &template_slengths, finalDB);
kmersize = finalDB->kmersize;
kmerindex = *template_lengths;
mask = 0;
mask = (~mask) >> (sizeof(long unsigned) * sizeof(long unsigned) - (kmersize << 1));
prefix = finalDB->prefix;
prefix_len = finalDB->prefix_len;
/* determine params based on loaded DB */
if(prefix_len == 0 && prefix == 0) {
......@@ -598,7 +602,6 @@ int index_main(int argc, char *argv[]) {
*template_lengths = 1024;
}
}
fprintf(stderr, "# Indexing databases.\n");
t0 = clock();
makeDB(templates, kmerindex, inputfiles, filecount, outputfilename, appender, to2Bit, MinLen, MinKlen, homQ, homT, &template_lengths, &template_ulengths, &template_slengths);
......
......@@ -41,6 +41,7 @@
#include "sparse.h"
#include "spltdb.h"
#include "stdstat.h"
#include "tmp.h"
#include "vcf.h"
#include "version.h"
......@@ -137,6 +138,7 @@ static void helpMessage(int exeStatus) {
fprintf(helpOut, "#\t-apm\t\tSets both pm and fpm\t\tu\n");
fprintf(helpOut, "#\t-shm\t\tUse shared DB made by kma_shm\t0 (lvl)\n");
fprintf(helpOut, "#\t-mmap\t\tMemory map *.comp.by\n");
fprintf(helpOut, "#\t-tmp\t\tSet directory for temporary files.\n");
fprintf(helpOut, "#\t-1t1\t\tForce end to end mapping\tFalse\n");
fprintf(helpOut, "#\t-ck\t\tCount kmers instead of\n#\t\t\tpseudo alignment\t\tFalse\n");
fprintf(helpOut, "#\t-ca\t\tMake circular alignments\tFalse\n");
......@@ -167,13 +169,13 @@ int kma_main(int argc, char *argv[]) {
static int minPhred, fiveClip, sparse_run, mem_mode, Mt1, ConClave, bcd;
static int fileCounter, fileCounter_PE, fileCounter_INT, targetNum, vcf;
static int extendedFeatures, spltDB, mq, thread_num, kmersize, one2one;
static int ref_fsa, print_matrix, print_all, sam, **d, W1, U, M, MM, PE;
static int ref_fsa, print_matrix, print_all, sam, Ts, Tv, **d;
static unsigned nc, nf, shm, exhaustive;
static char *outputfilename, *templatefilename, **templatefilenames;
static char **inputfiles, **inputfiles_PE, **inputfiles_INT, ss;
static double ID_t, scoreT, evalue;
static Penalties *rewards;
int i, j, args, exe_len, status, size, escape, step1, step2;
int i, j, args, exe_len, status, size, escape, tmp, step1, step2;
unsigned totFrags;
char *to2Bit, *exeBasic, *myTemplatefilename;
double support;
......@@ -223,16 +225,22 @@ int kma_main(int argc, char *argv[]) {
one2one = 0;
ss = 'q';
mem_mode = 0;
M = 1;
MM = -2;
W1 = -3;
U = -1;
PE = 7;
rewards = smalloc(sizeof(Penalties));
rewards->M = 1;
rewards->MM = -2;
rewards->U = -1;
rewards->W1 = -3;
rewards->Wl = -6;
rewards->Mn = -1;
rewards->PE = 7;
Tv = -2;
Ts = -2;
thread_num = 1;
inputfiles_PE = 0;
inputfiles_INT = 0;
inputfiles = 0;
templatefilenames = 0;
tmp = 0;
Mt1 = 0;
inputfiles = 0;
deConPrintPtr = printPtr;
......@@ -618,8 +626,8 @@ int kma_main(int argc, char *argv[]) {
} else if(strcmp(argv[args], "-reward") == 0) {
++args;
if(args < argc) {
M = strtol(argv[args], &exeBasic, 10);
M = abs(M);
rewards->M = strtol(argv[args], &exeBasic, 10);
rewards->M = abs(rewards->M);
if(*exeBasic != 0) {
fprintf(stderr, "Invalid argument at \"-reward\".\n");
exit(4);
......@@ -628,8 +636,8 @@ int kma_main(int argc, char *argv[]) {
} else if(strcmp(argv[args], "-penalty") == 0) {
++args;
if(args < argc) {
MM = strtol(argv[args], &exeBasic, 10);
MM = MIN(-MM, MM);
rewards->MM = strtol(argv[args], &exeBasic, 10);
rewards->MM = MIN(-rewards->MM, rewards->MM);
if(*exeBasic != 0) {
fprintf(stderr, "Invalid argument at \"-penalty\".\n");
exit(4);
......@@ -638,8 +646,8 @@ int kma_main(int argc, char *argv[]) {
} else if(strcmp(argv[args], "-gapopen") == 0) {
++args;
if(args < argc) {
W1 = strtol(argv[args], &exeBasic, 10);
W1 = MIN(-W1, W1);
rewards->W1 = strtol(argv[args], &exeBasic, 10);
rewards->W1 = MIN(-rewards->W1, rewards->W1);
if(*exeBasic != 0) {
fprintf(stderr, "Invalid argument at \"-gapopen\".\n");
exit(4);
......@@ -648,23 +656,71 @@ int kma_main(int argc, char *argv[]) {
} else if(strcmp(argv[args], "-gapextend") == 0) {
++args;
if(args < argc) {
U = strtol(argv[args], &exeBasic, 10);
U = MIN(-U, U);
rewards->U = strtol(argv[args], &exeBasic, 10);
rewards->U = MIN(-rewards->U, rewards->U);
if(*exeBasic != 0) {
fprintf(stderr, "Invalid argument at \"-gapextend\".\n");
exit(4);
}
}
} else if(strcmp(argv[args], "-localopen") == 0) {
/* here */
/* add to help */
++args;
if(args < argc) {
rewards->Wl = strtol(argv[args], &exeBasic, 10);
rewards->Wl = MIN(-rewards->Wl, rewards->Wl);
if(*exeBasic != 0) {
fprintf(stderr, "Invalid argument at \"-localopen\".\n");
exit(4);
}
}
} else if(strcmp(argv[args], "-Npenalty") == 0) {
/* here */
/* add to help */
++args;
if(args < argc) {
rewards->Mn = strtol(argv[args], &exeBasic, 10);
rewards->Mn = MIN(-rewards->Mn, rewards->Mn);
if(*exeBasic != 0) {
fprintf(stderr, "Invalid argument at \"-localopen\".\n");
exit(4);
}
}
} else if(strcmp(argv[args], "-per") == 0) {
++args;
if(args < argc) {
PE = strtol(argv[args], &exeBasic, 10);
PE = abs(PE);
rewards->PE = strtol(argv[args], &exeBasic, 10);
rewards->PE = abs(rewards->PE);
if(*exeBasic != 0) {
fprintf(stderr, "Invalid argument at \"-per\".\n");
exit(4);
}
}
} else if(strcmp(argv[args], "-transition") == 0) {
/* here */
/* add to help */
++args;
if(args < argc) {
Ts = strtol(argv[args], &exeBasic, 10);
Ts = MIN(-Ts, Ts);
if(*exeBasic != 0) {
fprintf(stderr, "Invalid argument at \"-localopen\".\n");
exit(4);
}
}
} else if(strcmp(argv[args], "-transversion") == 0) {
/* here */
/* add to help */
++args;
if(args < argc) {
Tv = strtol(argv[args], &exeBasic, 10);
Tv = MIN(-Tv, Tv);
if(*exeBasic != 0) {
fprintf(stderr, "Invalid argument at \"-localopen\".\n");
exit(4);
}
}
} else if(strcmp(argv[args], "-and") == 0) {
cmp = &cmp_and;
} else if(strcmp(argv[args], "-boot") == 0) {
......@@ -727,11 +783,21 @@ int kma_main(int argc, char *argv[]) {
nf = 1;
} else if(strcmp(argv[args], "-cge") == 0) {
scoreT = 0.75;
M = 1;
MM = -3;
W1 = -5;
U = -1;
PE = 17;
rewards->M = 1;
rewards->MM = -3;
rewards->W1 = -5;
rewards->U = -1;
rewards->PE = 17;
} else if(strcmp(argv[args], "-tmp") == 0) {
tmp = 1;
if(++args < argc) {
if(argv[args][0] != '-') {
tmpF(argv[args]);
tmp = 0;
} else {
--args;
}
}
} else if(strcmp(argv[args], "-spltDB") == 0) {
spltDB = 1;
} else if(strcmp(argv[args], "-status") == 0) {
......@@ -782,40 +848,40 @@ int kma_main(int argc, char *argv[]) {
fprintf(stderr, " Too few arguments handed\n");
fprintf(stderr, " Printing help message:\n");
helpMessage(1);
} else if(tmp) {
/* set tmp files */
tmpF(outputfilename);
}
if(fileCounter == 0 && fileCounter_PE == 0 && fileCounter_INT == 0) {
inputfiles = malloc(sizeof(char*));
if(!inputfiles) {
ERROR();
}
inputfiles = smalloc(sizeof(char*));
inputfiles[0] = "--";
fileCounter = 1;
}
status = 0;
/* set scoring matrix */
d = smalloc(5 * sizeof(int *));
for(i = 0; i < 4; ++i) {
d[i] = smalloc(5 * sizeof(int));
for(j = 0; j < 4; ++j) {
d[i][j] = MM;
}
d[i][i] = M;
}
d[4] = smalloc(5 * sizeof(int));
for(i = 0; i < 5; ++i) {
d[4][i] = U;
d[i][4] = U;
rewards->MM = (Ts + Tv - 1) / 2; /* avg. of transition and transversion, rounded down */
d = smalloc(5 * sizeof(int *) + 25 * sizeof(int));
*d = (int *) (d + 5);
i = 0;
while(i < 4) {
j = 4;
d[i][j] = rewards->Mn;
while(j--) {
d[i][j] = Tv;
}
d[i][(i - 2) < 0 ? (i + 2) : (i - 2)] = Ts;
d[i][i] = rewards->M;
j = i++;
d[i] = d[j] + 5;
}
i = 5;
while(i--) {
d[4][i] = rewards->Mn;
}
d[4][4] = 0;
rewards = smalloc(sizeof(Penalties));
rewards->d = (int **) d;
rewards->W1 = W1;
rewards->U = U;
rewards->M = M;
rewards->MM = MM;
rewards->PE = PE;
if(spltDB && targetNum != 1) {
/* allocate space for commands */
......@@ -827,8 +893,8 @@ int kma_main(int argc, char *argv[]) {
} else if(escape) {
size += 2;
}
size += strlen(argv[i]);
if(strncmp(argv[i], "-i", 2) == 0) {
size += strlen(argv[args]);
if(strncmp(argv[args], "-i", 2) == 0) {
escape = 1;
}
}
......
......@@ -148,17 +148,15 @@ unsigned ** hashMapKMA_openValues(HashMapKMA *src) {
return Values;
}
HashMapKMA * load_DBs(char *templatefilename, char *outputfilename, unsigned **template_lengths, unsigned **template_ulengths, unsigned **template_slengths) {
unsigned load_DBs(char *templatefilename, char *outputfilename, unsigned **template_lengths, unsigned **template_ulengths, unsigned **template_slengths, HashMapKMA *finalDB) {
int file_len, out_len, DB_size;
int file_len, out_len, DB_size, kmerindex;
FILE *infile;
HashMapKMA *finalDB;
file_len = strlen(templatefilename);
out_len = strlen(outputfilename);
/* load hash */
finalDB = smalloc(sizeof(HashMapKMA));
strcat(templatefilename, ".comp.b");
infile = sfopen(templatefilename, "rb");
if(hashMapKMAload(finalDB, infile)) {
......@@ -178,13 +176,14 @@ HashMapKMA * load_DBs(char *templatefilename, char *outputfilename, unsigned **t
}
templatefilename[file_len] = 0;
fread(&DB_size, sizeof(unsigned), 1, infile);
if(finalDB->prefix_len) {
if(finalDB->prefix) {
*template_lengths = smalloc((DB_size << 1) * sizeof(unsigned));
*template_slengths = smalloc((DB_size << 1) * sizeof(unsigned));
*template_ulengths = smalloc((DB_size << 1) * sizeof(unsigned));
fread(*template_lengths, sizeof(unsigned), DB_size, infile);
fread(*template_slengths, sizeof(unsigned), DB_size, infile);
fread(*template_ulengths, sizeof(unsigned), DB_size, infile);
fread(*template_lengths, sizeof(unsigned), DB_size, infile);
kmerindex = **template_slengths;
**template_ulengths = DB_size << 1;
**template_slengths = DB_size << 1;
} else {
......@@ -192,6 +191,7 @@ HashMapKMA * load_DBs(char *templatefilename, char *outputfilename, unsigned **t
*template_slengths = 0;
*template_ulengths = 0;
fread(*template_lengths, sizeof(unsigned), DB_size, infile);
kmerindex = **template_lengths;
**template_lengths = DB_size << 1;
}
fclose(infile);
......@@ -222,5 +222,5 @@ HashMapKMA * load_DBs(char *templatefilename, char *outputfilename, unsigned **t
}
templatefilename[file_len] = 0;
return finalDB;
return kmerindex;
}
......@@ -24,4 +24,4 @@ void * memdup(const void * src, size_t size);
int CP(char *templatefilename, char *outputfilename);
HashMap * hashMapKMA_openChains(HashMapKMA *src);
unsigned ** hashMapKMA_openValues(HashMapKMA *src);
HashMapKMA * load_DBs(char *templatefilename, char *outputfilename, unsigned **template_lengths, unsigned **template_ulengths, unsigned **template_slengths);
unsigned load_DBs(char *templatefilename, char *outputfilename, unsigned **template_lengths, unsigned **template_ulengths, unsigned **template_slengths, HashMapKMA *finalDB);
......@@ -71,7 +71,6 @@ void makeDB(HashMap *templates, int kmerindex, char **inputfiles, int fileCount,
seq_out = sfopen(outputfilename, "wb");
outputfilename[file_len] = 0;
}
if(dumpIndex == &makeIndexing) {
if(appender) {
strcat(outputfilename, ".index.b");
......@@ -109,16 +108,16 @@ void makeDB(HashMap *templates, int kmerindex, char **inputfiles, int fileCount,
while(isspace(*--seq)) {
*seq = 0;
}
if(bias > 0) {
fprintf(name_out, "%s B%d\n", header->seq + 1, bias);
} else {
fprintf(name_out, "%s\n", header->seq + 1);
}
updateAnnotsPtr(compressor, templates->DB_size, kmerindex, seq_out, index_out, template_lengths, template_ulengths, template_slengths);
/* here */
fprintf(stderr, "# Added:\t%s\n", header->seq + 1);
if(++templates->DB_size == USHRT_MAX) {
if(++(templates->DB_size) == USHRT_MAX) {
/* convert values to unsigned */
convertToU(templates);
}
......
......@@ -21,10 +21,12 @@
typedef struct penalties Penalties;
struct penalties {
int **d;
int W1;
int U;
int M;
int MM;
int U;
int W1;
int Wl;
int Mn;
int PE;
};
#endif
......@@ -43,6 +43,7 @@
#include "sam.h"
#include "stdnuc.h"
#include "stdstat.h"
#include "tmp.h"
#include "updatescores.h"
#include "vcf.h"
#ifndef _WIN32
......@@ -254,7 +255,7 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
alignment_out = 0;
consensus_out = 0;
}
frag_out_raw = tmpfile();
frag_out_raw = tmpF(0);
if(!frag_out_raw) {
ERROR();
}
......@@ -1394,7 +1395,7 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
alignment_out = 0;
consensus_out = 0;
}
frag_out_raw = tmpfile();
frag_out_raw = tmpF(0);
if(!frag_out_raw) {
ERROR();
}
......
......@@ -393,17 +393,13 @@ int getProxiMatchSparse(int *bestTemplates, int *Score, int kmersize, int n_kmer
return bestScore;
}
int clearScore(int *bestTemplates, int *Score) {
static int clearScore(int *bestTemplates, int *Score) {
int i;
i = *bestTemplates + 1;
while(--i) {
Score[*++bestTemplates] = 0;
if(*bestTemplates < 0) {
fprintf(stderr, "Moher fucker.\n");
exit(1);
}
}
return 0;
......@@ -4759,3 +4755,299 @@ void ankerAndClean_MEM(int *regionTemplates, int *Score, int *Score_r, char *inc
header->seq[l] = 0;
header->len = l;
}
int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int *bestTemplates, int *bestTemplates_r, int *Score, int *Score_r, CompDNA *qseq, CompDNA *qseq_r, const Qseqs *header, int *extendScore, const int exhaustive, volatile int *excludeOut, FILE *out) {
/* return 0 for match, 1 otherwise */
static int *Sizes = 0;
static KmerAnker **tVF_scores, **tVR_scores;
int Wl, W1, U, M, MM, Ms, MMs, Us, W1s, score, gaps, template, maxS;
int *bests;
unsigned i, j, rc, shifter, kmersize, DB_size, SU, HIT, start, end, pos;
unsigned hitCounter, hitCounter_r;
unsigned *values, *last;
short unsigned *values_s;
char *include;
CompDNA *qseqP;
KmerAnker *V_score, *V_scores, *VF_scores, *VR_scores, *tmp_score;
if(qseq == 0) {
if(Sizes) {
free(Sizes);
Sizes = 0;
i = *bestTemplates;
while(i--) {
free(tVF_scores[i]);
}
free(tVF_scores);
} else {
i = *bestTemplates;
Sizes = smalloc(i * sizeof(int));
tVF_scores = smalloc(2 * i * sizeof(KmerAnker *));
tVR_scores = tVF_scores + i;
while(i--) {
Sizes[i] = 256;
tVF_scores[i] = calloc(512, sizeof(KmerAnker));
if(!tVF_scores[i]) {
ERROR();
} else {
tVR_scores[i] = tVF_scores[i] + 256;
}
}
}
return 0;
} else if(qseq->seqlen < (kmersize = templates->kmersize)) {
return 1;
} else if((DB_size = templates->DB_size) < USHRT_MAX) {
SU = 1;
} else {
SU = 0;
}
shifter = sizeof(long unsigned) * sizeof(long unsigned) - (templates->kmersize << 1);
M = rewards->M;
MM = rewards->MM;
U = rewards->U;
W1 = rewards->W1;
Wl = rewards->Wl;
include = (char *) (extendScore + (templates->DB_size + 1));
hitCounter = 0;
hitCounter_r = 0;
values = 0;
values_s = 0;
if(Sizes[*Score] < qseq->size) {
Sizes[*Score] = qseq->size << 1;
free(tVF_scores[*Score]);
tVF_scores[*Score] = calloc(qseq->size, sizeof(KmerAnker));
if(!tVF_scores[*Score]) {
ERROR();
} else {
tVR_scores[*Score] = tVF_scores[*Score] + qseq->size;
}
}
VF_scores = tVF_scores[*Score];
VR_scores = tVR_scores[*Score];
V_scores = VF_scores;
qseqP = qseq;
rc = 2;
while(rc--) {
if(rc == 0) {
V_scores = VR_scores;
qseqP = qseq_r;
rc_comp(qseq, qseq_r);
hitCounter = hitCounter_r;
hitCounter_r = 0;
}
HIT = exhaustive;
j = 0;
++*(qseqP->N);
qseqP->N[*(qseqP->N)] = qseqP->seqlen;
for(i = 1; i <= *(qseqP->N) && !HIT; ++i) {
end = qseqP->N[i] - kmersize + 1;
while(j < end && hashMap_get(templates, getKmer(qseqP->seq, j, shifter)) == 0) {
j += kmersize;
}
if(j < end) {
HIT = 1;
} else {
j = qseqP->N[i] + 1;
}
}
if(HIT) {
V_score = V_scores;
V_score->start = (j = 0);
V_score->values = (last = 0);
V_score->next = 0;
Ms = 0;
MMs = 0;
Us = 0;
W1s = 0;
gaps = 0;
for(i = 1; i <= *(qseqP->N); ++i) {
end = qseqP->N[i] - kmersize + 1;
while(j < end) {
if((values = hashMap_get(templates, getKmer(qseqP->seq, j, shifter)))) {
if(values == last) {
if(kmersize < gaps) {
Ms += kmersize;
gaps -= kmersize;
if(gaps) {
/* go for best scenario */
if(gaps == 1) {
MMs += 2;
} else {
gaps -= 2;
if((MM << 1) + gaps * M < 0) {
Ms += gaps;
MMs += 2;
}
}
} else {
++MMs;
}
} else if(gaps) {
--gaps;
++W1s;
Us += gaps;
} else {
++Ms;
}
} else {
if(last) {
/* update and link between ankers */
V_score->weight = Ms * M + MMs * MM + Us * U + W1s * W1;
V_score->end = j - gaps + kmersize;
++V_score;
}
V_score->start = j;
V_score->values = (last = values);
Ms = 0;
MMs = 0;
Us = 0;
W1s = 0;
++hitCounter_r;
}
gaps = 0;
} else {
++gaps;
}
j += kmersize;
}
j = qseqP->N[i] + 1;
}
if(last) {
/* update anker */
V_score->weight = Ms * M + MMs * MM + Us * U + W1s * W1;
}
}
--*(qseqP->N);
}
/* here */
/* chaining */
/*
while(chain is good) {
1. make chain. OK
2. choose best.
3. silence "used" ankers.
4. redo chaining if next best hits a silenced anker.
}
*/
/*
for each anker
mark last anker w.r.t. highest score on anker
*/
/* make chains */
maxS = 0;
V_score = VF_scores;
HIT = hitCounter + 1;
bests = bestTemplates;
rc = 2;
while(rc--) {
if(rc == 0) {
V_score = VR_scores;
HIT = hitCounter_r + 1;
bests = bestTemplates_r;
}
*bests = 0;
while(--HIT) {
/*
Save pos of best score, and chain it (0 if local).
Score is always current / w.r.t. the anker you are on.
extendScore contains endpos of last hit anker.
*/
start = V_score->start;
end = V_score->end;
/* chain anker */
V_score->score = 0;
if(SU) {
values_s = (short unsigned *) V_score->values;
i = *values_s + 1;
values_s += i;
} else {
values = V_score->values;
i = *values + 1;
values += i;
}
while(--i) {
template = SU ? *--values_s : *--values;
if(!include[template]) {
include[template] = 1;
bests[++*bests] = template;
}
score = Score[template];
gaps = start - (pos = extendScore[template]);
if(gaps == 0) {
/* continued match */
score += M;
} else if(gaps < 0) {
/* deletion */
score += (W1 + abs(gaps + 1) * U);
} else if(gaps == 1) {
/* one unmatched between */
score += MM;
} else {
/* unmatched bases between */
if((gaps = (gaps - 2) * M) < 0) {
score += gaps;
}
score += (MM << 1);
}
/* check local chain is needed */
if(score < Wl) {
score = Wl + V_score->weight;
/* mark as terminating */
V_score->next = 0;
extendScore[template] = 0;
} else {
score += V_score->weight;
/* mark descendant */
tmp_score = V_score;
while((--tmp_score)->end != pos);
V_score->next = tmp_score;
extendScore[template] = end;
}
/* update Scores */
if(V_score->score < score) {
V_score->score = score;
/* here */
/* consider several bests */
if(maxS < score) {
maxS = score;
}
}
Score[template] = score;
}
++V_score;
}
/* clear Score */
i = *bests + 1;
while(--i) {
include[*++bests] = 0;
}
clearScore(bests, Score);
clearScore(bests, extendScore);
}
return 1;
}
......@@ -25,6 +25,8 @@
#ifndef SAVEKMERS
typedef struct kmerScan_thread KmerScan_thread;
typedef struct kmerAnker KmerAnker;
struct kmerScan_thread {
pthread_t id;
int num;
......@@ -40,6 +42,16 @@ struct kmerScan_thread {
Penalties *rewards;
struct kmerScan_thread *next;
};
struct kmerAnker {
int score;
int weight;
unsigned start;
unsigned end;
unsigned *values;
struct kmerAnker *next;
};
#define SAVEKMERS 1;
#endif
......@@ -84,3 +96,4 @@ int save_kmers_forcePair(const HashMapKMA *templates, const Penalties *rewards,
int save_kmers_HMM(const HashMapKMA *templates, const Penalties *rewards, int *bestTemplates, int *bestTemplates_r, int *Score, int *Score_r, CompDNA *qseq, CompDNA *qseq_r, const Qseqs *header, int *extendScore, const int exhaustive, volatile int *excludeOut, FILE *out);
void ankerAndClean(int *regionTemplates, int *Score, int *Score_r, char *include, int *template_lengths, unsigned **VF_scores, unsigned **VR_scores, int *tmpNs, CompDNA *qseq, int HIT, int bestScore, int start_cut, int end_cut, Qseqs *header, volatile int *excludeOut, FILE *out);
void ankerAndClean_MEM(int *regionTemplates, int *Score, int *Score_r, char *include, int *template_lengths, unsigned **VF_scores, unsigned **VR_scores, int *tmpNs, CompDNA *qseq, int HIT, int bestScore, int start_cut, int end_cut, Qseqs *header, volatile int *excludeOut, FILE *out);
int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int *bestTemplates, int *bestTemplates_r, int *Score, int *Score_r, CompDNA *qseq, CompDNA *qseq_r, const Qseqs *header, int *extendScore, const int exhaustive, volatile int *excludeOut, FILE *out);
......@@ -80,8 +80,7 @@ void printFastas(char *filename, int *template_lengths) {
if(!seq || !compseq) {
ERROR();
}
for(i = 1; i < DB_size; i++) {
for(i = 1; i < DB_size; ++i) {
fread(compseq, sizeof(long unsigned), (template_lengths[i] >> 5) + 1, seqfile);
j = template_lengths[i];
......@@ -103,7 +102,7 @@ void printFastaList(char *filename, int *template_lengths, int *seqlist) {
const char bases[6] = "ACGTN-";
int i, j, n, max, DB_size, file_len;
long unsigned *compseq;
long unsigned skip, *compseq;
char *seq;
FILE *seqfile, *namefile;
Qseqs *template_name;
......@@ -139,11 +138,13 @@ void printFastaList(char *filename, int *template_lengths, int *seqlist) {
if(!seq || !compseq) {
ERROR();
}
for(i = 1; n && i < DB_size; i++) {
skip = 0;
for(i = 1; n && i < DB_size; ++i) {
if(i == *seqlist) {
/* get seq */
fseek(seqfile, skip, SEEK_CUR);
fread(compseq, sizeof(long unsigned), (template_lengths[i] >> 5) + 1, seqfile);
j = template_lengths[i];
*(seq += j) = '\n';
while(j--) {
......@@ -156,8 +157,10 @@ void printFastaList(char *filename, int *template_lengths, int *seqlist) {
/* get next target */
while(--n && i == *++seqlist);
skip = 0;
} else {
nameSkip(namefile, max);
skip += (((template_lengths[i] >> 5) + 1) * sizeof(long unsigned));
}
}
}
......
......@@ -42,6 +42,7 @@
#include "spltdb.h"
#include "stdnuc.h"
#include "stdstat.h"
#include "tmp.h"
#include "updatescores.h"
#include "version.h"
#include "vcf.h"
......@@ -522,7 +523,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
alignment_out = 0;
consensus_out = 0;
}
frag_out_raw = tmpfile();
frag_out_raw = tmpF(0);
if(!frag_out_raw) {
ERROR();
}
......