Skip to content
Commits on Source (4)
CFLAGS = -Wall -O3 -std=c99
LIBS = align.o alnfrags.o ankers.o assembly.o chain.o compdna.o compkmers.o compress.o decon.o ef.o filebuff.o frags.o hashmap.o hashmapindex.o hashmapkma.o hashmapkmers.o hashtable.o index.o kma.o kmapipe.o kmers.o kmmap.o loadupdate.o makeindex.o mt1.o nw.o pherror.o printconsensus.o qseqs.o qualcheck.o runinput.o runkma.o sam.o savekmers.o seq2fasta.o seqparse.o shm.o sparse.o spltdb.o stdnuc.o stdstat.o tmp.o update.o updateindex.o updatescores.o valueshash.o vcf.o
LIBS = align.o alnfrags.o ankers.o assembly.o chain.o compdna.o compkmers.o compress.o decon.o ef.o filebuff.o frags.o hashmap.o hashmapindex.o hashmapkma.o hashmapkmers.o hashtable.o index.o kma.o kmapipe.o kmeranker.o kmers.o kmmap.o loadupdate.o makeindex.o mt1.o nw.o pherror.o printconsensus.o qseqs.o qualcheck.o runinput.o runkma.o sam.o savekmers.o seq2fasta.o seqmenttree.o seqparse.o shm.o sparse.o spltdb.o stdnuc.o stdstat.o tmp.o update.o updateindex.o updatescores.o valueshash.o vcf.o
PROGS = kma kma_index kma_shm kma_update
.c .o:
......@@ -45,6 +45,7 @@ hashtable.o: hashtable.h hashmapkma.h hashmapkmers.h pherror.h
index.o: index.h compress.h decon.h hashmap.h hashmapkma.h loadupdate.h makeindex.h pherror.h stdstat.h version.h
kma.o: kma.h ankers.h assembly.h chain.h hashmapkma.h kmers.h mt1.h penalties.h pherror.h qseqs.h runinput.h runkma.h savekmers.h sparse.h spltdb.h tmp.h version.h
kmapipe.o: kmapipe.h pherror.h
kmeranker.o: kmeranker.h penalties.h
kmers.o: kmers.h ankers.h compdna.h hashmapkma.h kmapipe.h pherror.h qseqs.h savekmers.h spltdb.h
kmmap.o: kmmap.h hashmapkma.h
loadupdate.o: loadupdate.h pherror.h hashmap.h hashmapkma.h updateindex.h
......@@ -58,8 +59,9 @@ qualcheck.o: qualcheck.h compdna.h hashmap.h pherror.h stdnuc.h stdstat.h
runinput.o: runinput.h compdna.h filebuff.h pherror.h qseqs.h seqparse.h
runkma.o: runkma.h align.h alnfrags.h assembly.h chain.h compdna.h ef.h filebuff.h frags.h hashmapindex.h kmapipe.h nw.h pherror.h printconsensus.h qseqs.h stdnuc.h stdstat.h tmp.h vcf.h
sam.o: sam.h nw.h pherror.h qseqs.h runkma.h
savekmers.o: savekmers.h ankers.h compdna.h hashmapkma.h penalties.h pherror.h qseqs.h stdnuc.h stdstat.h threader.h
savekmers.o: savekmers.h ankers.h compdna.h hashmapkma.h kmeranker.h penalties.h pherror.h qseqs.h stdnuc.h stdstat.h threader.h
seq2fasta.o: seq2fasta.h pherror.h qseqs.h runkma.h stdnuc.h
seqmenttree.o: seqmenttree.h pherror.h
seqparse.o: seqparse.h filebuff.h qseqs.h
shm.o: shm.h pherror.h hashmapkma.h version.h
sparse.o: sparse.h compkmers.h hashtable.h kmapipe.h pherror.h runinput.h savekmers.h stdnuc.h stdstat.h
......
kma (1.2.11-1) UNRELEASED; urgency=medium
* Team upload.
* New upstream version
-- Steffen Moeller <moeller@debian.org> Sat, 31 Aug 2019 11:18:21 +0200
kma (1.2.10a-1) unstable; urgency=medium
* New upstream version
......
......@@ -2,12 +2,14 @@ Author: Andreas Tille <tille@debian.org>
Last-Update: Thu, 07 Jun 2018 21:55:00 +0200
Description: Propagate hardening options
--- a/Makefile
+++ b/Makefile
Index: kma/Makefile
===================================================================
--- kma.orig/Makefile
+++ kma/Makefile
@@ -1,4 +1,4 @@
-CFLAGS = -Wall -O3 -std=c99
+CFLAGS += -Wall -O3 -std=c99
LIBS = align.o alnfrags.o ankers.o assembly.o chain.o compdna.o compkmers.o compress.o decon.o ef.o filebuff.o frags.o hashmap.o hashmapindex.o hashmapkma.o hashmapkmers.o hashtable.o index.o kma.o kmapipe.o kmers.o kmmap.o loadupdate.o makeindex.o mt1.o nw.o pherror.o printconsensus.o qseqs.o qualcheck.o runinput.o runkma.o sam.o savekmers.o seq2fasta.o seqparse.o shm.o sparse.o spltdb.o stdnuc.o stdstat.o tmp.o update.o updateindex.o updatescores.o valueshash.o vcf.o
LIBS = align.o alnfrags.o ankers.o assembly.o chain.o compdna.o compkmers.o compress.o decon.o ef.o filebuff.o frags.o hashmap.o hashmapindex.o hashmapkma.o hashmapkmers.o hashtable.o index.o kma.o kmapipe.o kmeranker.o kmers.o kmmap.o loadupdate.o makeindex.o mt1.o nw.o pherror.o printconsensus.o qseqs.o qualcheck.o runinput.o runkma.o sam.o savekmers.o seq2fasta.o seqmenttree.o seqparse.o shm.o sparse.o spltdb.o stdnuc.o stdstat.o tmp.o update.o updateindex.o updatescores.o valueshash.o vcf.o
PROGS = kma kma_index kma_shm kma_update
@@ -8,16 +8,16 @@ PROGS = kma kma_index kma_shm kma_update
......
......@@ -39,7 +39,7 @@ void reallocHashMap_kmers(HashMap_kmers *dest) {
/* save buckets */
table = 0;
index = ++dest->size;
index = dest->size;
while(index--) {
for(node = dest->table[index]; node; node = node_next) {
node_next = node->next;
......@@ -54,12 +54,11 @@ void reallocHashMap_kmers(HashMap_kmers *dest) {
if(!dest->table) {
ERROR();
}
--dest->size;
/* refill table */
for(node = table; node; node = node_next) {
node_next = node->next;
index = node->key & dest->size;
index = node->key % dest->size;
node->next = dest->table[index];
dest->table[index] = node;
}
......@@ -96,7 +95,7 @@ int hashMap_CountKmer(HashMap_kmers *dest, long unsigned key) {
long unsigned index;
HashTable_kmers *node;
index = key & dest->size;
index = key % dest->size;
for(node = dest->table[index]; node != 0; node = node->next) {
if(node->key == key) {
return 0;
......
......@@ -816,7 +816,7 @@ int kma_main(int argc, char *argv[]) {
}
preseed(0, 0, exhaustive);
if(sam) {
if(sam && kmaPipe != &kmaPipeThread) {
fprintf(stderr, "\"-sam\" and \"-status\" cannot coincide.\n");
kmaPipe = &kmaPipeThread;
}
......
/* Philip T.L.C. Clausen Jan 2017 plan@dtu.dk */
/*
* Copyright (c) 2017, Philip Clausen, Technical University of Denmark
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define _XOPEN_SOURCE 600
#include <stdlib.h>
#include "kmeranker.h"
#include "penalties.h"
KmerAnker * getBestChainTemplates(KmerAnker *src, const Penalties *rewards, int *bests, int *Score, int *extendScore, char *include) {
/* get set of best templates and silences the chain, except for the initial anker */
int template, score, bestScore, Wl, W1, U, M, MM, gaps;
unsigned i, j, start, end, SU;
unsigned *values;
short unsigned *values_s;
KmerAnker *node, *prev;
/* mark template candidates */
if(!src) {
return 0;
} else if((SU = *include)) {
values_s = (short unsigned *) src->values;
i = *values_s + 1;
*bests += i;
values_s += i;
bests += i;
while(--i) {
include[(*--bests = *--values_s)] ^= 1;
}
} else {
values = src->values;
i = *values + 1;
*bests += i;
values += i;
bests += i;
while(--i) {
include[(*--bests = *--values)] ^= 1;
}
}
M = rewards->M;
MM = rewards->MM;
U = rewards->U;
W1 = rewards->W1;
Wl = rewards->Wl;
bestScore = src->score;
/* get chaining scores */
for(node = src; node != 0; node = node->next) {
if(SU) {
values_s = (short unsigned *) node->values;
i = *values_s + 1;
values_s += i;
} else {
values = node->values;
i = *values + 1;
values += i;
}
start = node->start;
end = node->end;
while(--i) {
template = SU ? *--values_s : *--values;
if(!include[template]) {
score = Score[template];
gaps = start - extendScore[template];
if(gaps == 0) {
/* continued match */
score += M;
} else if(gaps < 0) {
/* deletion */
score += (W1 + abs(gaps + 1) * U);
} else if(gaps == 1) {
/* one unmatched between */
score += MM;
} else {
/* unmatched bases between */
if((gaps = (gaps - 2) * M) < 0) {
score += gaps;
}
score += (MM << 1);
}
/* check local chain is needed */
if(score < Wl) {
score = Wl + node->weight;
/* mark as terminating */
extendScore[template] = 0;
} else {
score += node->weight;
/* mark descendant */
extendScore[template] = end;
}
/* update Scores */
Score[template] = score;
}
}
/* mark as used */
node->score = 0;
prev = node;
}
src->score = bestScore;
/* get best templates */
j = 0;
for(i = 1; i <= *bests; ++i) {
if(Score[(template = bests[i])] == bestScore) {
bests[++j] = template;
}
/* clear Score arrays */
Score[template] = 0;
extendScore[template] = 0;
include[template] = 0;
}
*bests = j;
return prev;
}
KmerAnker * pruneAnkers(KmerAnker *V_score, int kmersize) {
KmerAnker *node, *prev;
while(V_score->score < kmersize) {
++V_score;
}
prev = V_score;
node = V_score->descend;
while(node) {
if(kmersize <= node->score) {
prev->descend = node;
prev = node;
}
node = node->descend;
}
prev->descend = 0;
return V_score;
}
unsigned getStartAnker(KmerAnker *src) {
while(src->next) {
src = src->next;
}
return src->start;
}
KmerAnker * getBestAnker(KmerAnker **src, unsigned *ties) {
KmerAnker *node, *prev, *best;
*ties = 0;
prev = *src;
while(prev && prev->score == 0) {
prev = prev->descend;
}
*src = prev;
best = prev;
node = prev->descend;
while(node) {
if(best->score < node->score) {
best = node;
*ties = 0;
} else if(best->score == node->score) {
if((node->end - node->start) < (best->end - best->start)) {
best = node;
*ties = 0;
} else if((node->end - node->start) == (best->end - best->start)) {
best = node;
++*ties;
}
} else if(node->score != 0) {
prev->descend = node;
prev = node;
}
node = node->descend;
}
prev->descend = 0;
return best;
}
KmerAnker * getTieAnker(KmerAnker *stop, KmerAnker *src, int score, int len) {
/* search downwards */
while(--src != stop) {
if(src->score == score && (src->end - src->start) == len) {
return src;
}
}
return 0;
}
/* Philip T.L.C. Clausen Jan 2017 plan@dtu.dk */
/*
* Copyright (c) 2017, Philip Clausen, Technical University of Denmark
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define _XOPEN_SOURCE 600
#include "penalties.h"
#ifndef KMERANKER
#define KMERANKER 1;
typedef struct kmerAnker KmerAnker;
struct kmerAnker {
int score;
int weight;
unsigned start;
unsigned end;
unsigned flag;
unsigned *values;
struct kmerAnker *descend; /* descending anker */
struct kmerAnker *next; /* next anker in chain */
};
#endif
KmerAnker * getBestChainTemplates(KmerAnker *src, const Penalties *rewards, int *bests, int *Score, int *extendScore, char *include);
KmerAnker * pruneAnkers(KmerAnker *V_score, int kmersize);
unsigned getStartAnker(KmerAnker *src);
KmerAnker * getBestAnker(KmerAnker **src, unsigned *ties);
KmerAnker * getTieAnker(KmerAnker *stop, KmerAnker *src, int score, int len);
......@@ -37,3 +37,22 @@ void destroyQseqs(Qseqs *dest) {
free(dest->seq);
free(dest);
}
void insertKmerBound(Qseqs *header, int start, int end) {
int *seq;
if((header->len + 3 * sizeof(int)) < header->size) {
header->size = (header->len + 3 * sizeof(int)) << 1;
if(!(header->seq = realloc(header->seq, header->size))) {
ERROR();
}
}
seq = (int *) (header->seq + header->len + 1);
*seq = start;
*++seq = end;
*++seq = 0;
header->len += (2 * sizeof(int) + 1);
}
......@@ -31,3 +31,4 @@ struct qseqs {
Qseqs * setQseqs(int size);
/* destroy Qseqs */
void destroyQseqs(Qseqs *dest);
void insertKmerBound(Qseqs *header, int start, int end);
......@@ -32,8 +32,8 @@ char * makeCigar(Qseqs *Cigar, const Aln *aligned) {
char op, pop, *s, *cigar;
unsigned char *t, *q;
if(Cigar->size < aligned->len << 1) {
Cigar->size <<= 1;
if(Cigar->size < (aligned->len << 1)) {
Cigar->size = (aligned->len << 1);
free(Cigar->seq);
Cigar->seq = smalloc(Cigar->size);
} else if(aligned->len == 0) {
......
......@@ -25,11 +25,13 @@
#include "ankers.h"
#include "compdna.h"
#include "hashmapkma.h"
#include "kmeranker.h"
#include "penalties.h"
#include "pherror.h"
#include "qseqs.h"
#include "sam.h"
#include "savekmers.h"
#include "seqmenttree.h"
#include "stdnuc.h"
#include "stdstat.h"
#include "threader.h"
......@@ -4760,17 +4762,19 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
/* return 0 for match, 1 otherwise */
static int *Sizes = 0;
static double mrs = 0.5, coverT = 0.5;
static KmerAnker **tVF_scores, **tVR_scores;
static SeqmentTree **tSeqments;
int Wl, W1, U, M, MM, Ms, MMs, Us, W1s, score, gaps, template, maxS;
int *bests;
int Wl, W1, U, M, MM, Ms, MMs, Us, W1s, score, gaps, template, *bests;
unsigned i, j, rc, shifter, kmersize, DB_size, SU, HIT, start, end, pos;
unsigned hitCounter, hitCounter_r;
unsigned *values, *last;
unsigned hitCounter, hitCounter_r, ties, len, *values, *last;
short unsigned *values_s;
char *include;
CompDNA *qseqP;
KmerAnker *V_score, *V_scores, *VF_scores, *VR_scores, *tmp_score;
KmerAnker *best_score, *best_score_r;
SeqmentTree *chainSeqments;
if(qseq == 0) {
if(Sizes) {
......@@ -4782,10 +4786,18 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
}
free(tVF_scores);
} else {
/* here */
/* set mrs and coverT */
i = *bestTemplates;
Sizes = smalloc(i * sizeof(int));
tVF_scores = smalloc(2 * i * sizeof(KmerAnker *));
tVR_scores = tVF_scores + i;
tSeqments = calloc(i, sizeof(SeqmentTree *));
if(!tSeqments) {
ERROR();
}
while(i--) {
Sizes[i] = 256;
tVF_scores[i] = calloc(512, sizeof(KmerAnker));
......@@ -4828,6 +4840,7 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
}
VF_scores = tVF_scores[*Score];
VR_scores = tVR_scores[*Score];
chainSeqments = tSeqments[*Score];
V_scores = VF_scores;
qseqP = qseq;
......@@ -4857,16 +4870,20 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
}
}
if(HIT) {
V_score = V_scores;
V_score->start = (j = 0);
V_score->values = (last = 0);
V_score->start = 0;
V_score->end = 0;
V_score->values = 0;
V_score->next = 0;
V_score->descend = 0;
if(HIT) {
Ms = 0;
MMs = 0;
Us = 0;
W1s = 0;
gaps = 0;
last = 0;
j = 0;
for(i = 1; i <= *(qseqP->N); ++i) {
end = qseqP->N[i] - kmersize + 1;
while(j < end) {
......@@ -4901,10 +4918,12 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
/* update and link between ankers */
V_score->weight = Ms * M + MMs * MM + Us * U + W1s * W1;
V_score->end = j - gaps + kmersize;
V_score->descend = V_score + 1;
++V_score;
}
V_score->start = j;
V_score->values = (last = values);
V_score->descend = 0;
Ms = 0;
MMs = 0;
Us = 0;
......@@ -4927,34 +4946,27 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
--*(qseqP->N);
}
/* here */
/* chaining */
/*
while(chain is good) {
1. make chain. OK
2. choose best.
3. silence "used" ankers.
4. redo chaining if next best hits a silenced anker.
/* no matches */
if(!hitCounter && !hitCounter_r) {
*include = 0;
return 1;
}
*/
/*
for each anker
mark last anker w.r.t. highest score on anker
*/
/* make chains */
maxS = 0;
V_score = VF_scores;
best_score = 0;
best_score_r = V_score;
HIT = hitCounter + 1;
bests = bestTemplates;
ties = 0;
rc = 2;
while(rc--) {
if(rc == 0) {
V_score = VR_scores;
HIT = hitCounter_r + 1;
bests = bestTemplates_r;
best_score = best_score_r;
best_score_r = V_score;
}
*bests = 0;
while(--HIT) {
......@@ -5006,48 +5018,556 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
if(score < Wl) {
score = Wl + V_score->weight;
/* mark as terminating */
V_score->next = 0;
tmp_score = 0;
extendScore[template] = 0;
} else {
score += V_score->weight;
/* mark descendant */
tmp_score = V_score;
while((--tmp_score)->end != pos);
V_score->next = tmp_score;
extendScore[template] = end;
}
/* update Scores */
if(V_score->score < score) {
V_score->score = score;
/* here */
/* consider several bests */
if(maxS < score) {
maxS = score;
/* find link chain */
if(tmp_score) {
while((--tmp_score)->end != pos);
}
V_score->next = tmp_score;
}
Score[template] = score;
}
/* Mark (first) best hit */
if(best_score_r->score < V_score->score) {
best_score_r = V_score;
ties = 0;
} else if(best_score_r->score == V_score->score) {
/* lower uncertainty in shorter chains */
if((V_score->end - V_score->start) < (best_score_r->end - best_score_r->start)) {
best_score_r = V_score;
ties = 0;
} else if((V_score->end - V_score->start) == (best_score_r->end - best_score_r->start)) {
/* equals */
++ties;
/* first hit on rc is likely last hit on forward */
if(rc) {
best_score_r = V_score;
}
}
}
V_score->flag = 0;
++V_score;
}
/* clear Score */
/* clear Score arrays */
clearScore(bests, Score);
clearScore(bests, extendScore);
i = *bests + 1;
while(--i) {
include[*++bests] = 0;
}
}
/* no good hits */
if(best_score->score < kmersize && best_score_r->score < kmersize) {
*include = 0;
return 1;
}
/* get best chain,
start/end, score, template(s)
and zero out ankers */
*include = SU;
*bestTemplates = 0;
if(best_score_r->score < best_score->score ||
(best_score_r->score < best_score->score &&
(best_score->end - best_score->start) <= (best_score_r->end - best_score->start))) {
tmp_score = getBestChainTemplates(best_score, rewards, bestTemplates, Score, extendScore, include);
score = best_score->score;
start = tmp_score->start;
len = best_score->end - start;
rc = 0;
} else {
tmp_score = getBestChainTemplates(best_score_r, rewards, bestTemplates, Score, extendScore, include);
score = best_score_r->score;
start = tmp_score->start;
len = best_score_r->end - start;
rc = 1;
}
if(score < mrs * len) {
*include = 0;
return 1;
}
/* prune hits */
VF_scores = pruneAnkers(VF_scores, kmersize);
VR_scores = pruneAnkers(VR_scores, kmersize);
/* get best chains */
while(best_score || best_score_r) {
/* get equal ankers inside chain,
that lies under the threshold */
/* here */
if(ties && best_score != tmp_score) {
V_score = best_score;
while(V_score && (V_score = getTieAnker(tmp_score, V_score, best_score->score, len))) {
/*
1. tie anker -> tie_len == best_len
2. Best match is last on seq -> tie_start < best_start
1. && 2. -> cover = |tie_start - best_end| / len
*/
if((V_score->end - start) < coverT * len ) {
/* not a co-match / overlap is insufficient ->
no more equal matches */
V_score = 0;
} else { /* Anker is equal, update with templates */
/* Mark current templates to avoid double hitting */
i = *bestTemplates + 1;
bests = bestTemplates;
while(--i) {
include[*++bests] = 1;
}
/* update best template candidates */
template = *bests;
*bests = 0;
getBestChainTemplates(V_score, rewards, bests, Score, extendScore, include);
*bestTemplates += *bests;
*bests = template;
start = V_score->end - len;
}
}
}
}
/*
Best template(s) must in values on best anker,
remove non-union of A and B, while backtracking.
Where A and B are templates on ankerA and ankerB.
For proxi scoring the score has to be imputed while tracking.
*/
/* here */
/* what happens when two equal chains overlap */
/*
------------- ------------- -------------
------------- -------------
------------- ------------- -------------
------------- -------------
*/
/* no rechaining */
/*
Q = set of best chains
while(score is good)
1. if in Q.
if(<50%)
add to Q
else if(scores equal)
add with match
else
skip chain
else
add to Q.
2. Silence it.
3. find best chain.
*/
*include = 0;
return 1;
}
int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *rewards, int *bestTemplates, int *bestTemplates_r, int *Score, int *Score_r, CompDNA *qseq, CompDNA *qseq_r, const Qseqs *header, int *extendScore, const int exhaustive, volatile int *excludeOut, FILE *out) {
/* return 0 for match, 1 otherwise */
static int *Sizes = 0;
static double mrs = 0.5, coverT = 0.5;
static KmerAnker **tVF_scores;
static SeqmentTree **tSeqments;
int Wl, W1, U, M, MM, Ms, MMs, Us, W1s, score, gaps, template, *bests;
unsigned i, j, shifter, kmersize, DB_size, SU, HIT, start, end, pos;
unsigned len, cover, hitCounter, ties, *values, *last;
short unsigned *values_s;
char *include;
KmerAnker *V_score, *VF_scores, *tmp_score, *best_score;
SeqmentTree *chainSeqments;
if(qseq == 0) {
if(Sizes) {
free(Sizes);
Sizes = 0;
i = *bestTemplates;
while(i--) {
free(tVF_scores[i]);
}
free(tVF_scores);
} else {
/* here */
/* set mrs and coverT */
i = *bestTemplates;
Sizes = smalloc(i * sizeof(int));
tVF_scores = smalloc(i * sizeof(KmerAnker *));
tSeqments = calloc(i, sizeof(SeqmentTree *));
if(!tSeqments) {
ERROR();
}
while(i--) {
Sizes[i] = 256;
tVF_scores[i] = calloc(512, sizeof(KmerAnker));
if(!tVF_scores[i]) {
ERROR();
}
}
}
return 0;
} else if(qseq->seqlen < (kmersize = templates->kmersize)) {
return 1;
} else if((DB_size = templates->DB_size) < USHRT_MAX) {
SU = 1;
} else {
SU = 0;
}
shifter = sizeof(long unsigned) * sizeof(long unsigned) - (templates->kmersize << 1);
M = rewards->M;
MM = rewards->MM;
U = rewards->U;
W1 = rewards->W1;
Wl = rewards->Wl;
include = (char *) (extendScore + (templates->DB_size + 1));
hitCounter = 0;
values = 0;
values_s = 0;
if(Sizes[*Score] < qseq->size) {
Sizes[*Score] = qseq->size << 1;
free(tVF_scores[*Score]);
tVF_scores[*Score] = calloc(qseq->size, sizeof(KmerAnker));
if(!tVF_scores[*Score]) {
ERROR();
}
}
VF_scores = tVF_scores[*Score];
chainSeqments = tSeqments[*Score];
HIT = exhaustive;
j = 0;
++*(qseq->N);
qseq->N[*(qseq->N)] = qseq->seqlen;
for(i = 1; i <= *(qseq->N) && !HIT; ++i) {
end = qseq->N[i] - kmersize + 1;
while(j < end && hashMap_get(templates, getKmer(qseq->seq, j, shifter)) == 0) {
j += kmersize;
}
if(j < end) {
HIT = 1;
} else {
j = qseq->N[i] + 1;
}
}
if(HIT) {
V_score = VF_scores;
V_score->start = (j = 0);
V_score->values = (last = 0);
V_score->next = 0;
Ms = 0;
MMs = 0;
Us = 0;
W1s = 0;
gaps = 0;
for(i = 1; i <= *(qseq->N); ++i) {
end = qseq->N[i] - kmersize + 1;
while(j < end) {
if((values = hashMap_get(templates, getKmer(qseq->seq, j, shifter)))) {
if(values == last) {
if(kmersize < gaps) {
Ms += kmersize;
gaps -= kmersize;
if(gaps) {
/* go for best scenario */
if(gaps == 1) {
MMs += 2;
} else {
gaps -= 2;
if((MM << 1) + gaps * M < 0) {
Ms += gaps;
MMs += 2;
}
}
} else {
++MMs;
}
} else if(gaps) {
--gaps;
++W1s;
Us += gaps;
} else {
++Ms;
}
} else {
if(last) {
/* update and link between ankers */
V_score->weight = Ms * M + MMs * MM + Us * U + W1s * W1;
V_score->end = j - gaps + kmersize;
V_score->descend = V_score + 1;
++V_score;
}
V_score->start = j;
V_score->values = (last = values);
V_score->descend = 0;
Ms = 0;
MMs = 0;
Us = 0;
W1s = 0;
++hitCounter;
}
gaps = 0;
} else {
++gaps;
}
j += kmersize;
}
j = qseq->N[i] + 1;
}
if(last) {
/* update anker */
V_score->weight = Ms * M + MMs * MM + Us * U + W1s * W1;
}
}
--*(qseq->N);
/* no matches */
if(!hitCounter) {
return 1;
}
/* make chains */
V_score = VF_scores;
best_score = V_score;
HIT = hitCounter + 1;
bests = bestTemplates;
*bests = (ties = 0);
while(--HIT) {
/*
Save pos of best score, and chain it (0 if local).
Score is always current / w.r.t. the anker you are on.
extendScore contains endpos of last hit anker.
*/
start = V_score->start;
end = V_score->end;
/* chain anker */
V_score->score = 0;
if(SU) {
values_s = (short unsigned *) V_score->values;
i = *values_s + 1;
values_s += i;
} else {
values = V_score->values;
i = *values + 1;
values += i;
}
while(--i) {
template = SU ? *--values_s : *--values;
if(!include[template]) {
include[template] = 1;
bests[++*bests] = template;
}
score = Score[template];
gaps = start - (pos = extendScore[template]);
if(gaps == 0) {
/* continued match */
score += M;
} else if(gaps < 0) {
/* deletion */
score += (W1 + abs(gaps + 1) * U);
} else if(gaps == 1) {
/* one unmatched between */
score += MM;
} else {
/* unmatched bases between */
if((gaps = (gaps - 2) * M) < 0) {
score += gaps;
}
score += (MM << 1);
}
/* check local chain is needed */
if(score < Wl) {
score = Wl + V_score->weight;
/* mark as terminating */
tmp_score = 0;
extendScore[template] = 0;
} else {
score += V_score->weight;
/* mark descendant */
tmp_score = V_score;
extendScore[template] = end;
}
/* update Scores */
if(V_score->score < score) {
V_score->score = score;
/* find link chain */
if(tmp_score) {
while((--tmp_score)->end != pos);
}
V_score->next = tmp_score;
}
Score[template] = score;
}
/* Mark (last) best hit */
if(best_score->score < V_score->score) {
best_score = V_score;
ties = 0;
} else if(best_score->score == V_score->score) {
if((V_score->end - V_score->start) < (best_score->end - best_score->start)) {
/* lower uncertainty in shorter chains */
best_score = V_score;
ties = 0;
} else if((V_score->end - V_score->start) == (best_score->end - best_score->start)) {
/* equals */
best_score = V_score;
++ties;
}
}
V_score->flag = 0;
++V_score;
}
/* clear Score arrays */
clearScore(bests, Score);
clearScore(bests, extendScore);
i = *bests + 1;
while(--i) {
include[*++bests] = 0;
}
/* no good hits */
if((score = best_score->score) < kmersize) {
*include = 0;
return 1;
}
/* get best chain,
start/end, score, template(s)
and zero out ankers */
*include = SU;
*bestTemplates = 0;
tmp_score = getBestChainTemplates(best_score, rewards, bestTemplates, Score, extendScore, include);
start = tmp_score->start;
len = best_score->end - start;
if(score < mrs * len) {
*include = 0;
return 1;
}
/* prune hits */
VF_scores = pruneAnkers(VF_scores, kmersize);
/* get best chains */
while(best_score) {
/* get equal ankers inside chain,
that lies under the threshold */
if(ties && best_score != tmp_score) {
V_score = best_score;
while(V_score && (V_score = getTieAnker(tmp_score, V_score, best_score->score, len))) {
/*
1. tie anker -> tie_len == best_len
2. Best match is last on seq -> tie_start < best_start
1. && 2. -> cover = |tie_start - best_end| / len
*/
if((V_score->end - start) < coverT * len ) {
/* not a co-match / overlap is insufficient ->
no more equal matches */
V_score = 0;
} else { /* Anker is equal, update with templates */
/* Mark current templates to avoid double hitting */
i = *bestTemplates + 1;
bests = bestTemplates;
while(--i) {
include[*++bests] = 1;
}
/* update best template candidates */
template = *bests;
*bests = 0;
getBestChainTemplates(V_score, rewards, bests, Score, extendScore, include);
*bestTemplates += *bests;
*bests = template;
start = V_score->end - len;
}
}
}
/* use segment trees to mark "used" regions of query.
Allow for quick check of intersection with chosen chains. */
growSeqmentTree(chainSeqments, start, best_score->end);
/* insert anker-boundaries */
insertKmerBound((Qseqs *) header, start, best_score->end);
/* print match */
lock(excludeOut);
i = deConPrintPtr(bestTemplates, qseq, best_score->score, header, 0, out);
unlock(excludeOut);
/* get next match */
best_score->score = 0;
*bestTemplates = 0;
while(best_score->score == 0) {
/* find best anker */
if(!(best_score = getBestAnker(&VF_scores, &ties))) {
*include = 0;
return 0;
}
/* check chain */
start = getStartAnker(best_score);
cover = queSeqmentTree(chainSeqments->root, start, best_score->end);
/* verify chain */
if(cover < coverT * (best_score->end - start)) {
/* get chain */
tmp_score = getBestChainTemplates(best_score, rewards, bestTemplates, Score, extendScore, include);
len = best_score->end - start;
if(best_score->score < mrs * len) {
/* silence anker */
best_score->score = 0;
}
} else {
/* silence anker */
best_score->score = 0;
}
}
}
*include = 0;
return 1;
}
......@@ -25,7 +25,6 @@
#ifndef SAVEKMERS
typedef struct kmerScan_thread KmerScan_thread;
typedef struct kmerAnker KmerAnker;
struct kmerScan_thread {
pthread_t id;
......@@ -43,15 +42,6 @@ struct kmerScan_thread {
struct kmerScan_thread *next;
};
struct kmerAnker {
int score;
int weight;
unsigned start;
unsigned end;
unsigned *values;
struct kmerAnker *next;
};
#define SAVEKMERS 1;
#endif
......
/* Philip T.L.C. Clausen Jan 2017 plan@dtu.dk */
/*
* Copyright (c) 2017, Philip Clausen, Technical University of Denmark
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define _XOPEN_SOURCE 600
#include <stdlib.h>
#include "pherror.h"
#include "seqmenttree.h"
SeqmentTree * initializeSeqmentTree(const long unsigned size) {
SeqmentTree *dest;
dest = smalloc(sizeof(SeqmentTree));
dest->n = 0;
dest->root = smalloc((dest->size = size) * sizeof(SeqmentTrees));
return dest;
}
SeqmentTree * initSeqmentTree(SeqmentTree *src, const unsigned start, const unsigned end) {
SeqmentTrees *node;
if(!src) {
src = initializeSeqmentTree(64);
}
src->n = 1;
node = src->root;
node->start = start;
node->end = end;
node->covered = end - start;
node->branch[0] = 0;
node->branch[1] = 0;
return src;
}
void reallocSeqmentTree(SeqmentTree *src) {
src->root = realloc(src->root, (src->size <<= 1) * sizeof(SeqmentTrees));
if(!src->root) {
ERROR();
}
}
unsigned addSeqmentTrees(SeqmentTrees *root, SeqmentTrees *node) {
unsigned pos, covered;
SeqmentTrees *bud;
if(*(root->branch)) { /* search */
/* adjust limits of root */
if(node->start < root->start && root->end < node->end) {
root->start = node->start;
root->end = node->end;
root->covered = node->covered;
node->covered = 0;
*(root->branch) = 0;
return root->covered;
} else if(root->end < node->end) {
root->end = node->end;
} else if(node->start < root->start) {
root->start = node->start;
}
/* search tree */
if(node->end < (pos = root->branch[1]->start)) { /* left */
root->covered = root->branch[1]->covered + addSeqmentTrees(root->branch[0], node);
} else if(pos <= node->start) { /* right */
root->covered = root->branch[0]->covered + addSeqmentTrees(root->branch[1], node);
} else { /* split */
/* calculate right side */
pos = node->start;
node->start = root->branch[0]->end + 1;
node->covered = node->end - node->start;
covered = addSeqmentTrees(root->branch[1], node);
/* here */
if(node->covered) {
fprintf(stderr, "I was wrong about split seqmenttrees.\n");
exit(1);
}
/* calculate left side */
node->start = pos;
node->end = root->branch[0]->end;
node->covered = node->end - node->start;
root->covered = covered + addSeqmentTrees(root->branch[0], node);
/* here */
if(node->covered) {
fprintf(stderr, "I was wrong about split seqmenttrees.\n");
exit(1);
}
}
} else if((pos = node->end < root->start) || (pos = root->end < node->start)) { /* new leaf */
/* create and grow bud */
bud = node + 1;
root->branch[pos] = node;
root->branch[pos] = bud;
/* form new leaf */
bud->start = root->start;
bud->end = root->end;
bud->covered = root->covered;
*(bud->branch) = 0;
/* update the root */
root->covered += node->covered;
} else { /* extend leaf */
if(node->start < root->start) { /* extend left */
root->start = node->start;
} else if(root->end < node->end) { /* extend right */
root->end = node->end;
}
node->covered = 0;
root->covered = root->end - root->start;
}
return root->covered;
}
void growSeqmentTree(SeqmentTree *src, const unsigned start, const unsigned end) {
SeqmentTrees *node;
/* make room for new anker */
if(src->size <= src->n + 2) {
reallocSeqmentTree(src);
}
/* make new leaf */
node = src->root + src->n;
node->start = start;
node->end = end;
node->covered = end - start;
*(node->branch) = 0;
src->root->covered = addSeqmentTrees(src->root, node);
if(node->covered) {
src->n += 2;
}
}
unsigned queSeqmentTree(SeqmentTrees *src, const unsigned start, const unsigned end) {
if(end < src->start || src->end < start) {
return 0;
} else if(src->start <= start && end <= src->end) {
return src->covered;
} else {
return queSeqmentTree(src->branch[0], start, end) + queSeqmentTree(src->branch[1], start, end);
}
}
/* Philip T.L.C. Clausen Jan 2017 plan@dtu.dk */
/*
* Copyright (c) 2017, Philip Clausen, Technical University of Denmark
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define _XOPEN_SOURCE 600
//cc -Wall -O3 -std=c99 -c -o seqmenttree seqmenttree.c
#ifndef SEQMENTTREE
#define SEQMENTTREE 1
typedef struct seqmentTree SeqmentTree;
typedef struct seqmentTrees SeqmentTrees;
struct seqmentTree {
unsigned n;
unsigned size;
struct seqmentTrees *root;
};
struct seqmentTrees {
unsigned start;
unsigned end;
unsigned covered;
struct seqmentTrees *branch[2];
};
#endif
SeqmentTree * initializeSeqmentTree(long unsigned size);
SeqmentTree * initSeqmentTree(SeqmentTree *src, const unsigned start, const unsigned end);
void reallocSeqmentTree(SeqmentTree *src);
unsigned addSeqmentTrees(SeqmentTrees *root, SeqmentTrees *node);
void growSeqmentTree(SeqmentTree *src, const unsigned start, const unsigned end);
unsigned queSeqmentTree(SeqmentTrees *src, const unsigned start, const unsigned end);
......@@ -400,14 +400,12 @@ int save_kmers_sparse_batch(char *templatefilename, char *outputfilename, char *
Kmers = smalloc(sizeof(CompKmers));
allocCompKmers(Kmers, 1024);
Ntot = 0;
/* count kmers */
while((Kmers->n = fread(Kmers->kmers, sizeof(long unsigned), Kmers->size, inputfile))) {
Ntot += Kmers->n;
save_kmers_sparse(templates, foundKmers, Kmers);
}
kmaPipe(0, 0, inputfile, &status);
if(kmaPipe == &kmaPipeFork) {
t1 = clock();
fprintf(stderr, "#\n# Total time used to identify k-mers in query: %.2f s.\n#\n", difftime(t1, t0) / 1000000);
......
......@@ -17,4 +17,4 @@
* limitations under the License.
*/
#define KMA_VERSION "1.2.10a"
#define KMA_VERSION "1.2.11"