Skip to content
Commits on Source (5)
kma
kma_index
kma_shm
kma_update
*.o
*.a
This diff is collapsed.
This diff is collapsed.
CFLAGS = -w -O3
BINS = kma kma_index kma_shm kma_update
CFLAGS = -Wall -O3 -std=c99
LIBS = align.o alnfrags.o ankers.o assembly.o chain.o compdna.o compkmers.o compress.o decon.o ef.o filebuff.o frags.o hashmap.o hashmapindex.o hashmapkma.o hashmapkmers.o hashtable.o index.o kma.o kmapipe.o kmers.o kmmap.o loadupdate.o makeindex.o mt1.o nw.o pherror.o printconsensus.o qseqs.o qualcheck.o runinput.o runkma.o sam.o savekmers.o seq2fasta.o seqparse.o shm.o sparse.o spltdb.o stdnuc.o stdstat.o update.o updateindex.o updatescores.o valueshash.o vcf.o
PROGS = kma kma_index kma_shm kma_update
all: $(BINS)
.c .o:
$(CC) $(CFLAGS) -c -o $@ $<
kma: KMA.c
$(CC) $(CFLAGS) -o $@ $< -lm -lpthread -lz
all: $(PROGS)
kma_index: KMA_index.c
$(CC) $(CFLAGS) -o $@ $< -lm -lz
kma: main.c libkma.a
$(CC) $(CFLAGS) -o $@ main.c libkma.a -lm -lpthread -lz
kma_shm: KMA_SHM.c
$(CC) $(CFLAGS) -o $@ $<
kma_index: kma_index.c libkma.a
$(CC) $(CFLAGS) -o $@ kma_index.c libkma.a -lm -lz
kma_update: KMA_update.c
$(CC) $(CFLAGS) -o $@ $<
kma_shm: kma_shm.c libkma.a
$(CC) $(CFLAGS) -o $@ kma_shm.c libkma.a
kma_update: kma_update.c libkma.a
$(CC) $(CFLAGS) -o $@ kma_update.c libkma.a
libkma.a: $(LIBS)
$(AR) -csru $@ $(LIBS)
clean:
$(RM) $(BINS)
$(RM) $(LIBS) $(PROGS) libkma.a
align.o: align.h chain.h compdna.h hashmapindex.h nw.h stdnuc.h stdstat.h
alnfrags.o: alnfrags.h align.h ankers.h compdna.h hashmapindex.h qseqs.h threader.h updatescores.h
ankers.o: ankers.h compdna.h pherror.h qseqs.h
assembly.o: assembly.h align.h filebuff.h kmapipe.h pherror.h stdnuc.h stdstat.h threader.h
chain.o: chain.h penalties.h pherror.h stdstat.h
compdna.o: compdna.h pherror.h stdnuc.h
compkmers.o: compkmers.h pherror.h
compress.o: compress.h hashmap.h hashmapkma.h pherror.h valueshash.h
decon.o: decon.h compdna.h filebuff.h hashmapkma.h seqparse.h stdnuc.h qseqs.h updateindex.h
ef.o: ef.h assembly.h stdnuc.h vcf.h version.h
filebuff.o: filebuff.h pherror.h qseqs.h
frags.o: frags.h filebuff.h pherror.h qseqs.h
hashmap.o: hashmap.h hashtable.h pherror.h
hashmapindex.o: hashmapindex.h pherror.h stdnuc.h
hashmapkma.o: hashmapkma.h pherror.h
hashmapkmers.o: hashmapkmers.h pherror.h
hashtable.o: hashtable.h hashmapkma.h hashmapkmers.h pherror.h
index.o: index.h compress.h decon.h hashmap.h hashmapkma.h loadupdate.h makeindex.h pherror.h stdstat.h version.h
kma.o: kma.h ankers.h assembly.h chain.h hashmapkma.h kmers.h mt1.h penalties.h pherror.h qseqs.h runinput.h runkma.h savekmers.h sparse.h spltdb.h version.h
kmapipe.o: kmapipe.h pherror.h
kmers.o: kmers.h ankers.h compdna.h hashmapkma.h kmapipe.h pherror.h qseqs.h savekmers.h spltdb.h
kmmap.o: kmmap.h hashmapkma.h
loadupdate.o: loadupdate.h pherror.h hashmap.h hashmapkma.h updateindex.h
makeindex.o: makeindex.h compdna.h filebuff.h hashmap.h pherror.h qseqs.h seqparse.h updateindex.h
mt1.o: mt1.h assembly.h chain.h filebuff.h hashmapindex.h kmapipe.h nw.h penalties.h pherror.h printconsensus.h qseqs.h runkma.h stdstat.h vcf.h
nw.o: nw.h pherror.h stdnuc.h penalties.h
pherror.o: pherror.h
printconsensus.o: printconsensus.h assembly.h
qseqs.o: qseqs.h pherror.h
qualcheck.o: qualcheck.h compdna.h hashmap.h pherror.h stdnuc.h stdstat.h
runinput.o: runinput.h compdna.h filebuff.h pherror.h qseqs.h seqparse.h
runkma.o: runkma.h align.h alnfrags.h assembly.h chain.h compdna.h ef.h filebuff.h frags.h hashmapindex.h kmapipe.h nw.h pherror.h printconsensus.h qseqs.h stdnuc.h stdstat.h vcf.h
sam.o: sam.h nw.h pherror.h qseqs.h runkma.h
savekmers.o: savekmers.h ankers.h compdna.h hashmapkma.h penalties.h pherror.h qseqs.h stdnuc.h stdstat.h threader.h
seq2fasta.o: seq2fasta.h pherror.h qseqs.h runkma.h stdnuc.h
seqparse.o: seqparse.h filebuff.h qseqs.h
shm.o: shm.h pherror.h hashmapkma.h version.h
sparse.o: sparse.h compkmers.h hashtable.h kmapipe.h pherror.h runinput.h savekmers.h stdnuc.h stdstat.h
spltdb.o: spltdb.h align.h alnfrags.h assembly.h chain.h compdna.h ef.h filebuff.h frags.h hashmapindex.h kmapipe.h nw.h pherror.h printconsensus.h qseqs.h runkma.h stdnuc.h stdstat.h vcf.h
stdnuc.o: stdnuc.h
stdstat.o: stdstat.h
update.o: update.h hashmapkma.h pherror.h stdnuc.h
updateindex.o: updateindex.h compdna.h hashmap.h hashmapindex.h pherror.h qualcheck.h stdnuc.h pherror.h
updatescores.o: updatescores.h qseqs.h
valueshash.o: valueshash.h pherror.h
vcf.o: vcf.h assembly.h filebuff.h stdnuc.h stdstat.h version.h
......@@ -4,7 +4,7 @@
git clone https://bitbucket.org/genomicepidemiology/kma.git
cd kma && make
./kma_index -i templates.fsa.gz -o templates
./kma index -i templates.fsa.gz -o templates
./kma -i reads_se.fq.gz -o output/name -t_db templates
./kma -ipe reads_1.fq.gz reads_2.fq.gz -o output/name -t_db templates
```
......@@ -24,19 +24,18 @@ BMC Bioinformatics, 2018;19:307.
# Usage #
For practical reasons you might want to add the three programs (kma, kma_index and kma_shm)
to your path, this is usually done with:
For practical reasons you might want to add kma to your path, this is usually done with:
```
mv kma* ~/bin/
mv kma ~/bin/
```
## Indexing ##
In order to use KMA for mapping, the databases need to indexed.
This is done with “kma_index”, the most important options are described below:
This is done with “kma index”, the most important options are described below:
```
-i Input fasta file(s), space separated. By default kma_index reads from stdin.
-i Input fasta file(s), space separated. By default kma index reads from stdin.
-o Output name, the name given to the database.
-k kmersize used for indexing the database.
-k_t kmersize used to identify template candidates when running KMA.
......@@ -46,7 +45,7 @@ This is done with “kma_index”, the most important options are described belo
Example of use:
```
kma_index -i templates.fsa.gz -o database/name
kma index -i templates.fsa.gz -o database/name
```
## Mapping ##
......@@ -117,7 +116,7 @@ the computer is restarted or computer breaks down.
Example of setting up a database in shared memory.
```
kma_shm –t_db templates –shmLvl 1
kma shm –t_db templates –shmLvl 1
```
“-shmLvl” specifies how much of the database there should be stored in shared memory, use.
......@@ -125,7 +124,7 @@ kma_shm –t_db templates –shmLvl 1
Example of taking it down again, always remember to do this then it is no longer needed:
```
kma_shm –t_db database/name –shmLvl 1 –destroy
kma shm –t_db database/name –shmLvl 1 –destroy
```
# Installation Requirements #
......
This diff is collapsed.
/* Philip T.L.C. Clausen Jan 2017 plan@dtu.dk */
/*
* Copyright (c) 2017, Philip Clausen, Technical University of Denmark
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define _XOPEN_SOURCE 600
#include "chain.h"
#include "compdna.h"
#include "hashmapindex.h"
#include "nw.h"
AlnScore KMA(const HashMap_index *template_index, const unsigned char *qseq, int q_len, Aln *aligned, Aln *Frag_align, int min, int max, int mq, double scoreT, AlnPoints *points, NWmat *matrices);
AlnScore KMA_score(const HashMap_index *template_index, const unsigned char *qseq, int q_len, const CompDNA *qseq_comp, int mq, double scoreT, AlnPoints *points, NWmat *matrices);
int preseed(const HashMap_index *template_index, unsigned char *qseq, int q_len);
void intcpy(int *dest, int *src, int size);
int anker_rc(const HashMap_index *template_index, unsigned char *qseq, int q_len, AlnPoints *points);
This diff is collapsed.
/* Philip T.L.C. Clausen Jan 2017 plan@dtu.dk */
/*
* Copyright (c) 2017, Philip Clausen, Technical University of Denmark
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define _XOPEN_SOURCE 600
#include <pthread.h>
#include <stdio.h>
#include "chain.h"
#include "compdna.h"
#include "filebuff.h"
#include "hashmapindex.h"
#include "qseqs.h"
#ifndef ALNTHREAD
typedef struct aln_thread Aln_thread;
struct aln_thread {
pthread_t id;
int *matched_templates;
int *bestTemplates;
int *bestTemplates_r;
int *best_start_pos;
int *best_end_pos;
int *template_lengths;
long unsigned *alignment_scores;
long unsigned *uniq_alignment_scores;
long *index_indexes;
long *seq_indexes;
FILE *inputfile;
FILE *frag_out_raw;
FileBuff *frag_out_all;
int index_in;
int seq_in;
int kmersize;
int mq;
int sam;
double scoreT;
CompDNA *qseq_comp;
CompDNA *qseq_r_comp;
Qseqs *qseq;
Qseqs *qseq_r;
Qseqs *header;
Qseqs *header_r;
AlnPoints *points;
NWmat *NWmatrices;
HashMap_index **templates_index;
struct aln_thread *next;
};
#define ALNTHREAD 1;
#endif
extern int (*alnFragsPE)(HashMap_index**, int*, int*, int, double, CompDNA*, CompDNA*, unsigned char*, unsigned char*, Qseqs*, Qseqs*, int, int*, int*, long unsigned*, long unsigned*, int*, int*, int*, int*, int*, int*, int, int, long*, long*, FILE*, AlnPoints*, NWmat*, volatile int*, volatile int*);
int alnFragsSE(HashMap_index **templates_index, int *matched_templates, int *template_lengths, int mq, double scoreT, int rc_flag, CompDNA *qseq_comp, CompDNA *qseq_r_comp, unsigned char *qseq, unsigned char *qseq_r, int q_len, int kmersize, Qseqs *header, int *bestTemplates, long unsigned *alignment_scores, long unsigned *uniq_alignment_scores, int *best_start_pos, int *best_end_pos, int *flag, int *best_read_score, int seq_in, int index_in, long *seq_indexes, long *index_indexes, FILE *frag_out_raw, AlnPoints *points, NWmat *NWmatrices, volatile int *excludeOut, volatile int *excludeDB);
int alnFragsUnionPE(HashMap_index **templates_index, int *matched_templates, int *template_lengths, int mq, double scoreT, CompDNA *qseq_comp, CompDNA *qseq_r_comp, unsigned char *qseq, unsigned char *qseq_r, Qseqs *header, Qseqs *header_r, int kmersize, int *bestTemplates, int *bestTemplates_r, long unsigned *alignment_scores, long unsigned *uniq_alignment_scores, int *best_start_pos, int *best_end_pos, int *flag, int *flag_r, int *best_read_score, int *best_read_score_r, int seq_in, int index_in, long *seq_indexes, long *index_indexes, FILE *frag_out_raw, AlnPoints *points, NWmat *NWmatrices, volatile int *excludeOut, volatile int *excludeDB);
int alnFragsPenaltyPE(HashMap_index **templates_index, int *matched_templates, int *template_lengths, int mq, double scoreT, CompDNA *qseq_comp, CompDNA *qseq_r_comp, unsigned char *qseq, unsigned char *qseq_r, Qseqs *header, Qseqs *header_r, int kmersize, int *bestTemplates, int *bestTemplates_r, long unsigned *alignment_scores, long unsigned *uniq_alignment_scores, int *best_start_pos, int *best_end_pos, int *flag, int *flag_r, int *best_read_score, int *best_read_score_r, int seq_in, int index_in, long *seq_indexes, long *index_indexes, FILE *frag_out_raw, AlnPoints *points, NWmat *NWmatrices, volatile int *excludeOut, volatile int *excludeDB);
int alnFragsForcePE(HashMap_index **templates_index, int *matched_templates, int *template_lengths, int mq, double scoreT, CompDNA *qseq_comp, CompDNA *qseq_r_comp, unsigned char *qseq, unsigned char *qseq_r, Qseqs *header, Qseqs *header_r, int kmersize, int *bestTemplates, int *bestTemplates_r, long unsigned *alignment_scores, long unsigned *uniq_alignment_scores, int *best_start_pos, int *best_end_pos, int *flag, int *flag_r, int *best_read_score, int *best_read_score_r, int seq_in, int index_in, long *seq_indexes, long *index_indexes, FILE *frag_out_raw, AlnPoints *points, NWmat *NWmatrices, volatile int *excludeOut, volatile int *excludeDB);
void * alnFrags_threaded(void * arg);
/* Philip T.L.C. Clausen Jan 2017 plan@dtu.dk */
/*
* Copyright (c) 2017, Philip Clausen, Technical University of Denmark
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include "ankers.h"
#include "compdna.h"
#include "pherror.h"
#include "qseqs.h"
int (*printPtr)(int*, CompDNA*, int, const Qseqs*, const int, FILE *out) = &print_ankers;
int (*printPairPtr)(int*, CompDNA*, int, const Qseqs*, CompDNA*, int, const Qseqs*, const int flag, const int flag_r, FILE *out) = &printPair;
int (*deConPrintPtr)(int*, CompDNA*, int, const Qseqs*, const int flag, FILE *out) = &print_ankers;
int print_ankers(int *out_Tem, CompDNA *qseq, int rc_flag, const Qseqs *header, const int flag, FILE *out) {
int infoSize[7];
infoSize[0] = qseq->seqlen;
infoSize[1] = qseq->complen;
infoSize[2] = *(qseq->N);
infoSize[3] = rc_flag;
infoSize[4] = *out_Tem;
infoSize[5] = header->len;
infoSize[6] = flag;
sfwrite(infoSize, sizeof(int), 7, out);
sfwrite(qseq->seq, sizeof(long unsigned), qseq->complen, out);
if(*(qseq->N)) {
sfwrite(qseq->N + 1, sizeof(int), *(qseq->N), out);
}
sfwrite(out_Tem + 1, sizeof(int), *out_Tem, out);
sfwrite(header->seq, 1, header->len, out);
return 0;
}
int print_ankers_Sparse(int *out_Tem, CompDNA *qseq, int rc_flag, const Qseqs *header, const int flag, FILE *out) {
int infoSize[7];
infoSize[0] = qseq->seqlen;
infoSize[1] = qseq->complen;
infoSize[2] = *(qseq->N);
infoSize[3] = -(abs(rc_flag));
infoSize[4] = *out_Tem;
infoSize[5] = header->len;
infoSize[6] = flag;
sfwrite(infoSize, sizeof(int), 7, out);
sfwrite(qseq->seq, sizeof(long unsigned), qseq->complen, out);
if(*(qseq->N)) {
sfwrite(qseq->N + 1, sizeof(int), *(qseq->N), out);
}
sfwrite(out_Tem + 1, sizeof(int), *out_Tem, out);
sfwrite(header->seq, 1, header->len, out);
return 0;
}
int find_contamination(int *out_Tem, const int contamination) {
int i;
i = *out_Tem + 1;
out_Tem += i;
while(--i) {
if(*--out_Tem == contamination) {
return i;
}
}
return 0;
}
int find_contamination2(int *out_Tem, const int contamination) {
int i;
i = *out_Tem + 1;
out_Tem += i;
while(--i) {
if(*--out_Tem == contamination) {
return i;
} else if(0 < *out_Tem) {
return 0;
}
}
return 0;
}
int deConPrint(int *out_Tem, CompDNA *qseq, int rc_flag, const Qseqs *header, const int flag, FILE *out) {
int contPos;
if((contPos = find_contamination(out_Tem, out_Tem[-3])) != 0) {
out_Tem[contPos] = out_Tem[*out_Tem];
--*out_Tem;
}
if((contPos = find_contamination2(out_Tem, -out_Tem[-3])) != 0) {
out_Tem[contPos] = out_Tem[*out_Tem];
--*out_Tem;
}
if(0 < *out_Tem) {
return printPtr(out_Tem, qseq, rc_flag, header, flag, out);
}
return 1;
}
int deConPrintPair(int *out_Tem, CompDNA *qseq, int bestScore, const Qseqs *header, CompDNA *qseq_r, int bestScore_r, const Qseqs *header_r, const int flag, const int flag_r, FILE *out) {
int contPos;
if((contPos = find_contamination(out_Tem, out_Tem[-3])) != 0) {
out_Tem[contPos] = out_Tem[*out_Tem];
--*out_Tem;
}
if((contPos = find_contamination2(out_Tem, -out_Tem[-3])) != 0) {
out_Tem[contPos] = out_Tem[*out_Tem];
--*out_Tem;
}
if(0 < *out_Tem) {
contPos = *out_Tem;
*out_Tem = 0;
printPtr(out_Tem, qseq, bestScore, header, flag, out);
*out_Tem = contPos;
return printPtr(out_Tem, qseq_r, bestScore_r, header_r, flag_r, out);
}
return 1;
}
int printPair(int *out_Tem, CompDNA *qseq, int bestScore, const Qseqs *header, CompDNA *qseq_r, int bestScore_r, const Qseqs *header_r, const int flag, const int flag_r, FILE *out) {
int contPos;
contPos = *out_Tem;
*out_Tem = 0;
printPtr(out_Tem, qseq, bestScore, header, flag, out);
*out_Tem = contPos;
printPtr(out_Tem, qseq_r, bestScore_r, header_r, flag_r, out);
return 0;
}
int get_ankers(int *out_Tem, CompDNA *qseq, Qseqs *header, int *flag, FILE *inputfile) {
static int infoSize[7];
if(fread(infoSize, sizeof(int), 7, inputfile) == 7) {
qseq->seqlen = infoSize[0];
qseq->complen = infoSize[1];
*out_Tem = infoSize[4];
header->len = infoSize[5];
*flag = infoSize[6];
/* reallocate */
if(qseq->size <= qseq->seqlen) {
free(qseq->N);
free(qseq->seq);
if(qseq->seqlen & 31) {
qseq->size = (qseq->seqlen >> 5) + 1;
qseq->size <<= 6;
} else {
qseq->size = qseq->seqlen << 1;
}
qseq->seq = calloc(qseq->size >> 5, sizeof(long unsigned));
qseq->N = malloc((qseq->size + 1) * sizeof(int));
if(!qseq->seq || !qseq->N) {
ERROR();
}
}
qseq->N[0] = infoSize[2];
if(header->size <= header->len) {
free(header->seq);
header->size = header->len << 1;
header->seq = malloc(header->size);
if(!header->seq) {
ERROR();
}
}
fread(qseq->seq, sizeof(long unsigned), qseq->complen, inputfile);
fread(qseq->N + 1, sizeof(int), qseq->N[0], inputfile);
fread(out_Tem + 1, sizeof(int), *out_Tem, inputfile);
fread(header->seq, 1, header->len, inputfile);
} else {
*out_Tem = infoSize[0];
return 0;
}
/* return score */
return infoSize[3];
}
/* Philip T.L.C. Clausen Jan 2017 plan@dtu.dk */
/*
* Copyright (c) 2017, Philip Clausen, Technical University of Denmark
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include "compdna.h"
#include "qseqs.h"
extern int (*printPtr)(int*, CompDNA*, int, const Qseqs*, const int, FILE *out);
extern int (*printPairPtr)(int*, CompDNA*, int, const Qseqs*, CompDNA*, int, const Qseqs*, const int flag, const int flag_r, FILE *out);
extern int (*deConPrintPtr)(int*, CompDNA*, int, const Qseqs*, const int flag, FILE *out);
int print_ankers(int *out_Tem, CompDNA *qseq, int rc_flag, const Qseqs *header, const int flag, FILE *out);
int print_ankers_Sparse(int *out_Tem, CompDNA *qseq, int rc_flag, const Qseqs *header, const int flag, FILE *out);
int find_contamination(int *out_Tem, const int contamination);
int find_contamination2(int *out_Tem, const int contamination);
int deConPrint(int *out_Tem, CompDNA *qseq, int rc_flag, const Qseqs *header, const int flag, FILE *out);
int deConPrintPair(int *out_Tem, CompDNA *qseq, int bestScore, const Qseqs *header, CompDNA *qseq_r, int bestScore_r, const Qseqs *header_r, const int flag, const int flag_r, FILE *out);
int printPair(int *out_Tem, CompDNA *qseq, int bestScore, const Qseqs *header, CompDNA *qseq_r, int bestScore_r, const Qseqs *header_r, const int flag, const int flag_r, FILE *out);
int get_ankers(int *out_Tem, CompDNA *qseq, Qseqs *header, int *flag, FILE *inputfile);
This diff is collapsed.
/* Philip T.L.C. Clausen Jan 2017 plan@dtu.dk */
/*
* Copyright (c) 2017, Philip Clausen, Technical University of Denmark
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define _XOPEN_SOURCE 600
#include <pthread.h>
#include <stdio.h>
#include "chain.h"
#include "filebuff.h"
#include "hashmapindex.h"
#include "nw.h"
#include "qseqs.h"
#ifndef ASSEMBLY
typedef struct assem Assem;
typedef struct assembly Assembly;
typedef struct assemInfo AssemInfo;
typedef struct assemble_thread Assemble_thread;
struct assem {
unsigned char *t; /* template */
char *s; /* score */
unsigned char *q; /* query */
long unsigned depth;
long unsigned depthVar;
long unsigned score;
unsigned cover;
unsigned len;
unsigned aln_len;
unsigned size;
};
struct assembly {
short unsigned counts[6];
unsigned next;
};
struct assemInfo {
int len;
int size;
struct assembly *assmb;
};
struct assemble_thread {
pthread_t id;
int num;
int template;
int file_count;
int spin;
int mq;
int bcd;
int sam;
int thread_num;
double scoreT;
double evalue;
char *template_name;
FILE **files;
FileBuff *frag_out;
Assem *aligned_assem;
Aln *aligned, *gap_align;
Qseqs *qseq, *header;
AssemInfo *matrix;
AlnPoints *points;
NWmat *NWmatrices;
HashMap_index *template_index;
Assemble_thread *next;
};
#define ASSEMBLY 1
#endif
extern void * (*assembly_KMA_Ptr)(void *);
extern int (*significantBase)(int, int, double);
extern unsigned char (*baseCall)(unsigned char, unsigned char, int, int, double, Assembly*);
void updateMatrix(FileBuff *dest, char *template_name, long unsigned *template_seq, AssemInfo *matrix, int t_len);
int significantNuc(int X, int Y, double evalue);
int significantAnd90Nuc(int X, int Y, double evalue);
int significantAndSupport(int X, int Y, double evalue);
unsigned char baseCaller(unsigned char bestNuc, unsigned char tNuc, int bestScore, int depthUpdate, double evalue, Assembly *calls);
unsigned char orgBaseCaller(unsigned char bestNuc, unsigned char tNuc, int bestScore, int depthUpdate, double evalue, Assembly *calls);
unsigned char refCaller(unsigned char bestNuc, unsigned char tNuc, int bestScore, int depthUpdate, double evalue, Assembly *calls);
unsigned char nanoCaller(unsigned char bestNuc, unsigned char tNuc, int bestScore, int depthUpdate, double evalue, Assembly *calls);
unsigned char refNanoCaller(unsigned char bestNuc, unsigned char tNuc, int bestScore, int depthUpdate, double evalue, Assembly *calls);
void * assemble_KMA_threaded(void *arg);
void * assemble_KMA_dense_threaded(void *arg);
void skip_assemble_KMA(int template, int sam, int t_len, char *template_name, int file_count, FILE **files, Assem *aligned_assem, Qseqs *qseq, Qseqs *header);
This diff is collapsed.
/* Philip T.L.C. Clausen Jan 2017 plan@dtu.dk */
/*
* Copyright (c) 2017, Philip Clausen, Technical University of Denmark
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "penalties.h"
#ifndef CHAIN
typedef struct alnPoints AlnPoints;
struct alnPoints {
int size;
int len;
int *tStart;
int *tEnd;
int *qStart;
int *qEnd;
int *weight;
int *score;
int *next;
Penalties *rewards;
};
#define CHAIN 1
#endif
/* pointer to chaining method */
extern int (*chainSeedsPtr)(AlnPoints *, int, int, int, unsigned *);
/* FUNCTIONS */
AlnPoints * seedPoint_init(int size, Penalties *rewards);
void seedPoint_realloc(AlnPoints *dest, int size);
void seedPoint_free(AlnPoints *src);
int chainSeeds(AlnPoints *points, int q_len, int t_len, int kmersize, unsigned *mapQ);
int chainSeeds_circular(AlnPoints *points, int q_len, int t_len, int kmersize, unsigned *mapQ);
This diff is collapsed.
/* Philip T.L.C. Clausen Jan 2017 plan@dtu.dk */
/*
* Copyright (c) 2017, Philip Clausen, Technical University of Denmark
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include "qseqs.h"
#ifndef COMPDNA
typedef struct compDNA CompDNA;
struct compDNA {
int seqlen;
int size;
int complen;
long unsigned *seq;
int *N;
};
#define COMPDNA 1
#endif
void allocComp(CompDNA *compressor, int size);
void freeComp(CompDNA *compressor);
void resetComp(CompDNA *compressor);
void compDNA(CompDNA *compressor, unsigned char *seq, int seqlen);
int compDNAref(CompDNA *compressor, unsigned char *qseq, int seqlen);
void unCompDNA(CompDNA *compressor, unsigned char *seq);
void qseqCompDNA(CompDNA *compressor, Qseqs *qseq);
long unsigned binRev(long unsigned mer);
void rc_comp(CompDNA *compressor, CompDNA *compressor_rc);
void comp_rc(CompDNA *compressor);
void dumpComp(CompDNA *compressor, FILE* file);
int loadComp(CompDNA *compressor, FILE* file);
int getComp(CompDNA *compressor, FILE* file);
/* Philip T.L.C. Clausen Jan 2017 plan@dtu.dk */
/*
* Copyright (c) 2017, Philip Clausen, Technical University of Denmark
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdlib.h>
#include "compkmers.h"
#include "pherror.h"
void allocCompKmers(CompKmers *compressor, int size) {
compressor->n = 0;
compressor->size = size;
compressor->kmers = smalloc(size * sizeof(long unsigned));
}
void reallocCompKmers(CompKmers *compressor, int size) {
compressor->kmers = realloc(compressor->kmers, size * sizeof(long unsigned));
if(!compressor->kmers) {
ERROR();
}
compressor->size = size;
}
int pushCompKmers(CompKmers *compressor, char *qseq, int kmersize) {
int i;
long unsigned key = 0;
for(i = 0; i < kmersize; ++i) {
if(qseq[i] == 4) {
return 0;
} else {
key = (key << 2) | qseq[i];
}
}
compressor->kmers[compressor->n] = key;
return 1;
}
/* Philip T.L.C. Clausen Jan 2017 plan@dtu.dk */
/*
* Copyright (c) 2017, Philip Clausen, Technical University of Denmark
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef COMPKMERS
typedef struct compKmers CompKmers;
struct compKmers {
long unsigned n;
long unsigned size;
long unsigned *kmers;
};
#define COMPKMERS 1
#endif
void allocCompKmers(CompKmers *compressor, int size);
void reallocCompKmers(CompKmers *compressor, int size);
int pushCompKmers(CompKmers *compressor, char *qseq, int kmersize);
\ No newline at end of file