Skip to content
Commits on Source (2)
*.o
kmerresistance
......@@ -16,18 +16,21 @@
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <stdio.h>
#define _XOPEN_SOURCE 600
#include <ctype.h>
#include <errno.h>
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <ctype.h>
#include <errno.h>
#include "pherror.h"
#include "qseqs.h"
#define VERSION "2.2.0"
int strpos_last(const char* str1, const char* str2) {
char* strp;
char *strp;
int i, len1, len2;
len1 = strlen(str1);
......@@ -36,48 +39,58 @@ int strpos_last(const char* str1, const char* str2) {
return -1;
}
strp = (char*)(str1 + len1 - len2);
strp = (char*)(str1 + len1 - len2 + 1);
for(i = len1 - len2; i >= 0; i--) {
if(*strp == *str2) {
if(strncmp(strp,str2,len2)==0)
return i;
if(*--strp == *str2 && strncmp(strp,str2,len2) == 0) {
return i;
}
strp--;
}
return -1;
}
long unsigned fget_to(char *line, long unsigned line_size, FILE *file, char stopChar) {
int c, stop;
long unsigned i;
int fget2(Qseqs *line, FILE *file, char stopChar) {
stop = 0;
i = 0;
while(!stop) {
while(i < line_size) {
if((c = fgetc(file)) != EOF) {
line[i] = c;
} else {
line[i] = '\0';
return line_size;
}
if(c != stopChar) {
i++;
int c, len, size;
unsigned char *ptr;
len = 0;
size = line->size;
ptr = line->seq;
while((c = fgetc(file)) != EOF) {
if(++len == size) {
if(!(line->seq = realloc(line->seq, (size <<= 1)))) {
ERROR();
} else {
break;
ptr = line->seq + len;
line->size = size;
}
}
if(line[i] != stopChar) {
line_size *= 2;
line = realloc(line, line_size);
} else {
stop = 1;
if((*ptr++ = c) == stopChar) {
*--ptr = 0;
line->len = --len;
return 1;
}
}
line[i] = '\0';
return line_size;
*ptr = 0;
line->len = len;
return 0;
}
char * stripSpace(const unsigned char *src) {
char *ptr;
ptr = (char *) src;
while(*ptr != 0 && isspace(*ptr)) {
++ptr;
}
return ptr;
}
#define skipLine(file, c, stopChar) while((c = fgetc(file)) != EOF && c != stopChar)
void helpMessage(int exeStatus) {
FILE *helpOut;
if(exeStatus == 0) {
......@@ -93,35 +106,10 @@ void helpMessage(int exeStatus) {
fprintf(helpOut, "#\t-t_db\t\tTemplate DB\t\t\t\t\t\tREQUIRED\n");
fprintf(helpOut, "#\t-s_db\t\tSpecies DB\t\t\t\t\t\tREQUIRED\n");
fprintf(helpOut, "#\t-id\t\tID threshhold\t\t\t70.0\n");
fprintf(helpOut, "#\t-hid\t\tHard ID threshhold\t\t\t1.0\n");
fprintf(helpOut, "#\t-dct\t\tDepth correction threshhold\t0.1\n");
fprintf(helpOut, "#\t-kma\t\talternative KMA\t\tkma\n");
fprintf(helpOut, "#\n# KMA options:\t\tDesc:\t\t\t\tDefault:\t\tRequirements:\n");
fprintf(helpOut, "#\t-i\t\tInput/query file name\t\tNone\t\tREQUIRED\n");
fprintf(helpOut, "#\t-o\t\tOutput file\t\t\tNone\t\tREQUIRED\n");
fprintf(helpOut, "#\t-t_db\t\tTemplate DB\t\t\tNone\t\tREQUIRED\n");
fprintf(helpOut, "#\t-k\t\tKmersize\t\t\t16\n");
fprintf(helpOut, "#\t-e\t\tevalue\t\t\t\t0.05\n");
fprintf(helpOut, "#\t-delta\t\tAlign in pieces of delta\t511\n");
fprintf(helpOut, "#\t-mem_mode\tUse kmers to choose best\n#\t\t\ttemplate, and save memory\tFalse\n");
fprintf(helpOut, "#\t-ex_mode\tSearh kmers exhaustively\tFalse\n");
fprintf(helpOut, "#\t-deCon\t\tRemove contamination\t\tFalse\n");
fprintf(helpOut, "#\t-dense\t\tDo not allow insertions\n#\t\t\tin assembly\t\t\tFalse\n");
fprintf(helpOut, "#\t-ref_fsa\tConsensus sequnce will\n#\t\t\thave \"n\" instead of gaps\tFalse\n");
fprintf(helpOut, "#\t-matrix\t\tPrint assembly matrix\t\tFalse\n");
fprintf(helpOut, "#\t-mp\t\tMinimum phred score\t\t30\n");
fprintf(helpOut, "#\t-5p\t\tCut a constant number of\n#\t\t\tnucleotides from the 5 prime.\t0\n");
fprintf(helpOut, "#\t-Sparse\t\tRun KmerFinder\t\t\tFalse\n");
fprintf(helpOut, "#\t-ID\t\tMinimum ID\t\t\t1.0%%\n");
fprintf(helpOut, "#\t-ss\t\tSparse sorting (q,c,d)\t\tq\n");
fprintf(helpOut, "#\t-shm\t\tUse shared DB made by kma_shm\t0 (lvl)\n");
fprintf(helpOut, "#\t-swap\t\tSwap DB to disk\t\t\t0 (lvl)\n");
fprintf(helpOut, "#\t-1t1\t\tSkip HMM\t\t\tFalse\n");
fprintf(helpOut, "#\t-boot\t\tBootstrap sequence\t\tFalse\n");
fprintf(helpOut, "#\t-mrs\t\tMinimum alignment score,\n#\t\t\tnormalized to alignment length\t0.5\n");
fprintf(helpOut, "#\t-reward\t\tScore for match\t\t\t1\n");
fprintf(helpOut, "#\t-penalty\tPenalty for mismatch\t\t-2\n");
fprintf(helpOut, "#\t-gapopen\tPenalty for gap opening\t\t-3\n");
fprintf(helpOut, "#\t-gapextend\tPenalty for gap extension\t-1\n");
fprintf(helpOut, "#\t-v\t\tVersion\n");
fprintf(helpOut, "#\t-h\t\tShows this help message\n");
fprintf(helpOut, "#\n");
exit(exeStatus);
......@@ -129,22 +117,22 @@ void helpMessage(int exeStatus) {
int main(int argc, char *argv[]) {
int i, args, KMA_call_len, pos, line_size, out_len, HMM, stop;
char *outputfilename, *t_db, *s_db, *KMA_dest, *KMA_call, *line, *q_id;
char *template_name, *score, *expected, *t_len, *q, *p, *t_cov, *q_cov;
double depth_corr_t, ID_t, ID, depth, depth_corr, expDepth;
int args, KMA_call_len, out_len, stop;
char *outputfilename, *t_db, *s_db, *KMA_dest, *KMA_call, *exeBasic;
double depth_corr_t, ID_t, HID_t, ID, depth, depth_corr, expDepth;
FILE *output, *input;
Qseqs *line, *template_name, *score, *expected, *t_len, *q, *p, *t_cov;
Qseqs *q_cov, *q_id;
/* set defaults */
depth_corr_t = 0.1;
ID_t = 70.0;
HID_t = 1.0;
outputfilename = 0;
t_db = 0;
s_db = 0;
KMA_dest = strdup("kma");
if(!KMA_dest) {
fprintf(stderr, "OOM\n");
exit(-1);
}
KMA_dest = smalloc(4);
strcpy(KMA_dest, "kma");
KMA_call_len = 11;
/* parse cmd-line */
......@@ -155,7 +143,10 @@ int main(int argc, char *argv[]) {
if(args < argc) {
KMA_call_len += (strlen(argv[args]) + 4);
out_len = strlen(argv[args]);
outputfilename = (char*) calloc(out_len + strlen(".KmerRes") + 1, sizeof(char));
outputfilename = (char*) calloc(out_len + 16, sizeof(char));
if(!outputfilename) {
ERROR();
}
strcpy(outputfilename, argv[args]);
strcat(outputfilename, ".KmerRes");
}
......@@ -163,6 +154,9 @@ int main(int argc, char *argv[]) {
args++;
if(args < argc) {
t_db = (char*) calloc(strlen(argv[args]) + 1, sizeof(char));
if(!t_db) {
ERROR();
}
strcpy(t_db, argv[args]);
KMA_call_len += (strlen(argv[args]) + 1);
}
......@@ -170,26 +164,47 @@ int main(int argc, char *argv[]) {
args++;
if(args < argc) {
s_db = (char*) calloc(strlen(argv[args]) + 1, sizeof(char));
if(!s_db) {
ERROR();
}
strcpy(s_db, argv[args]);
KMA_call_len += (strlen(argv[args]) + 1);
}
} else if(strcmp(argv[args], "-id") == 0) {
args++;
if(args < argc) {
ID_t = atof(argv[args]);
ID_t = strtod(argv[args], &exeBasic);
if(*exeBasic != 0) {
fprintf(stderr, "%s\n", exeBasic);
exit(1);
} else if(ID_t < 0) {
fprintf(stderr, "Invalid threshold\n");
exit(1);
}
}
if(ID_t < 0) {
fprintf(stderr, "Invalid threshold\n");
exit(1);
} else if(strcmp(argv[args], "-hid") == 0) {
args++;
if(args < argc) {
HID_t = strtod(argv[args], &exeBasic);
if(*exeBasic != 0) {
fprintf(stderr, "%s\n", exeBasic);
exit(1);
} else if(HID_t < 0) {
fprintf(stderr, "Invalid threshold\n");
exit(1);
}
}
} else if(strcmp(argv[args], "-dct") == 0) {
args++;
if(args < argc) {
depth_corr_t = atof(argv[args]);
}
if(depth_corr_t < 0) {
fprintf(stderr, "Invalid threshold\n");
exit(1);
depth_corr_t = strtod(argv[args], &exeBasic);
if(*exeBasic != 0) {
fprintf(stderr, "%s\n", exeBasic);
exit(1);
} else if(depth_corr_t < 0) {
fprintf(stderr, "Invalid threshold\n");
exit(1);
}
}
} else if(strcmp(argv[args], "-kma") == 0) {
args++;
......@@ -197,24 +212,24 @@ int main(int argc, char *argv[]) {
free(KMA_dest);
KMA_dest = strdup(argv[args]);
if(!KMA_dest) {
fprintf(stderr, "OOM\n");
exit(-1);
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
exit(errno);
}
KMA_call_len += strlen(argv[args]);
}
} else if(strcmp(argv[args], "-v") == 0) {
fprintf(stderr, "KmerResistance-%s\n", VERSION);
exit(0);
} else if(strcmp(argv[args], "-h") == 0) {
helpMessage(0);
} else {
KMA_call_len += strlen(argv[args]) + 1;
if(HMM == 0 && strcmp(argv[args], "-i") == 0) {
if(strcmp(argv[args], "-i") == 0) {
stop = 0;
args++;
while(stop == 0 && args < argc) {
if(strncmp(argv[args], "-", 1) != 0) {
if(*(argv[args]) != '-') {
KMA_call_len += strlen(argv[args]) + 1;
if(strpos_last(argv[args], "fsa") != -1 || strpos_last(argv[args], "fna") != -1 || strpos_last(argv[args], "fasta") != -1) {
HMM = 1;
}
args++;
} else {
stop = 1;
......@@ -231,18 +246,12 @@ int main(int argc, char *argv[]) {
helpMessage(1);
}
line_size = 4096 * sizeof(char);
line = malloc(line_size);
/* prepare KMA alignment */
KMA_call_len += 5;
pos = strpos_last(argv[0], "/") + 1;
KMA_call = calloc(KMA_call_len + pos + 2, sizeof(char));
KMA_call_len += 8;
KMA_call = smalloc(KMA_call_len + strpos_last(*argv, "/") + 4);
/* add options to KMA */
*KMA_call = 0;
strcat(KMA_call, KMA_dest);
strcat(KMA_call, " ");
KMA_call_len = sprintf(KMA_call, "%s ", KMA_dest);
args = 1;
while(args < argc) {
if(strcmp(argv[args], "-t_db") == 0) {
......@@ -255,137 +264,126 @@ int main(int argc, char *argv[]) {
args++;
} else if(strcmp(argv[args], "-kma") == 0) {
args++;
} else if(strcmp(argv[args], "-Sparse") == 0) {
args++;
} else if(strcmp(argv[args], "-h") != 0) {
strcat(KMA_call, argv[args]);
strcat(KMA_call, " ");
KMA_call_len += sprintf(KMA_call + KMA_call_len, "%s ", argv[args]);
}
args++;
}
strcat(KMA_call, "-t_db ");
KMA_call_len += sprintf(KMA_call + KMA_call_len, "-t_db ");
/* mapp genes */
fprintf(stderr, "# Mapping gene data.\n");
KMA_call_len = strlen(KMA_call);
/* map genes */
fprintf(stderr, "# Aligning gene data.\n");
sprintf(KMA_call, "%s%s", KMA_call, t_db);
//fprintf(stderr, "%s\n", KMA_call);
input = popen(KMA_call, "r");
if(!input) {
fprintf(stderr, "Invalid KMA specified\n");
exit(-1);
}
if(pclose(input) != 0) {
fprintf(stderr, "# Gene mapping failed\n");
if(system(KMA_call) != 0) {
fprintf(stderr, "# Gene alignment failed\n");
exit(1);
}
KMA_call[KMA_call_len] = '\0';
KMA_call[KMA_call_len] = 0;
/* mapp species */
/* map species */
fprintf(stderr, "# Finding species\n");
sprintf(KMA_call, "%s%s -Sparse\n", KMA_call, s_db);
KMA_call[strpos_last(KMA_call, "\n")] = '\0';
input = popen(KMA_call, "r");
if(!input) {
fprintf(stderr, "Invalid KMA specified\n");
exit(-1);
}
if(pclose(input) != 0) {
sprintf(KMA_call, "%s%s -Sparse", KMA_call, s_db);
if(system(KMA_call) != 0) {
fprintf(stderr, "# Species mapping failed\n");
exit(1);
}
KMA_call[KMA_call_len] = '\0';
KMA_call[KMA_call_len] = 0;
/* correlate species */
fprintf(stderr, "# Mapping done\n");
fprintf(stderr, "# Correlating results\n");
template_name = malloc(256 * sizeof(char));
score = malloc(256 * sizeof(char));
expected = malloc(256 * sizeof(char));
t_len = malloc(256 * sizeof(char));
q = malloc(256 * sizeof(char));
p = malloc(256 * sizeof(char));
t_cov = malloc(256 * sizeof(char));
q_id = malloc(256 * sizeof(char));
q_cov = malloc(256 * sizeof(char));
output = fopen(outputfilename, "w");
if(!output) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
exit(-1);
}
line = setQseqs(256);
template_name = setQseqs(256);
score = setQseqs(32);
expected = setQseqs(32);
t_len = setQseqs(32);
q = setQseqs(32);
p = setQseqs(32);
t_cov = setQseqs(32);
q_id = setQseqs(32);
q_cov = setQseqs(32);
output = sfopen(outputfilename, "wb");
fprintf(output, "#Template\tScore\tExpected\ttemplate length\tq_value\tp_value\ttemplate_id\ttemplate_coverage\tquery_id\tquery_coverage\tdepth\tdepth_corr\n");
strncpy((outputfilename + out_len), ".spa", 4);
outputfilename[out_len + 4] = '\0';
input = fopen(outputfilename, "r");
if(!output) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
exit(-1);
}
strcpy((outputfilename + out_len), ".spa");
input = sfopen(outputfilename, "rb");
/* retrieve species information */
fgets(line, line_size, input);
fget_to(template_name, 256, input, '\t');
line_size = fget_to(line, line_size, input, '\t');
fget_to(score, 256, input, '\t');
fget_to(expected, 256, input, '\t');
fget_to(t_len, 256, input, '\t');
fget_to(q_cov, 256, input, '\t');
fget_to(t_cov, 256, input, '\t');
ID = atof(t_cov);
line_size = fget_to(line, line_size, input, '\t');
expDepth = atof(line);
line_size = fget_to(line, line_size, input, '\t');
line_size = fget_to(line, line_size, input, '\t');
line_size = fget_to(line, line_size, input, '\t');
fget_to(q, 256, input, '\t');
fget_to(p, 256, input, '\n');
ID_t *= (1 - pow(2.718281828, (-0.5) * expDepth));
fprintf(output, "%s\t%s\t%s\t%s\t%s\t%s\t%3.2f\t%s\t%s\t%s\t%5.2f\t%1.4f\n", template_name, score, expected, t_len, q, p, ID, t_cov, q_cov, q_cov, expDepth, (1 - pow(2.718281828, -1)));
skipLine(input, args, '\n');
fget2(template_name, input, '\t');
if(template_name->len) {
skipLine(input, args, '\t');
fget2(score, input, '\t');
fget2(expected, input, '\t');
fget2(t_len, input, '\t');
fget2(q_cov, input, '\t');
fget2(t_cov, input, '\t');
ID = strtod(stripSpace(t_cov->seq), &exeBasic);
if(*exeBasic != 0) {
fprintf(stderr, "%s\n", exeBasic);
exit(1);
}
fget2(line, input, '\t');
expDepth = strtod(stripSpace(line->seq), &exeBasic);
if(*exeBasic != 0) {
fprintf(stderr, "%s\n", exeBasic);
exit(1);
}
skipLine(input, args, '\t');
skipLine(input, args, '\t');
skipLine(input, args, '\t');
fget2(q, input, '\t');
fget2(p, input, '\n');
ID_t *= (1 - pow(2.718281828, (-0.5) * expDepth));
ID_t = ID_t < HID_t ? HID_t : ID_t;
fprintf(output, "%s\t%s\t%s\t%s\t%s\t%s\t%3.2f\t%s\t%s\t%s\t%5.2f\t%1.4f\n", template_name->seq, score->seq, expected->seq, t_len->seq, q->seq, p->seq, ID, t_cov->seq, q_cov->seq, q_cov->seq, expDepth, (1 - pow(2.718281828, -1)));
} else {
fprintf(stderr, "Could not determine host.\n");
expDepth = 1.0e-6;
ID_t = HID_t;
}
fclose(input);
fprintf(stderr, "# Adjusted ID threshold: %f\n", ID_t);
fprintf(stderr, "# Adjusted ID threshold: %.2f\n", ID_t);
/* retrieve template information */
strncpy((outputfilename + out_len), ".res", 4);
outputfilename[out_len + 4] = '\0';
input = fopen(outputfilename, "r");
if(!input) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
exit(-1);
}
fgets(line, line_size, input);
while(! feof(input)) {
fget_to(template_name, 256, input, '\t');
fget_to(score, 256, input, '\t');
fget_to(expected, 256, input, '\t');
fget_to(t_len, 256, input, '\t');
line_size = fget_to(line, line_size, input, '\t');
ID = atof(line);
fget_to(t_cov, 256, input, '\t');
fget_to(q_id, 256, input, '\t');
fget_to(q_cov, 256, input, '\t');
line_size = fget_to(line, line_size, input, '\t');
depth = atof(line);
fget_to(q, 256, input, '\t');
fget_to(p, 256, input, '\n');
//fgets(line, line_size, input);
strcpy((outputfilename + out_len), ".res");
input = sfopen(outputfilename, "rb");
skipLine(input, args, '\n');
while(!feof(input)) {
fget2(template_name, input, '\t');
fget2(score, input, '\t');
fget2(expected, input, '\t');
fget2(t_len, input, '\t');
fget2(line, input, '\t');
ID = strtod(stripSpace(line->seq), &exeBasic);
if(*exeBasic != 0) {
fprintf(stderr, "%s\n", exeBasic);
exit(1);
}
fget2(t_cov, input, '\t');
fget2(q_id, input, '\t');
fget2(q_cov, input, '\t');
fget2(line, input, '\t');
depth = strtod(stripSpace(line->seq), &exeBasic);
if(*exeBasic != 0) {
fprintf(stderr, "%s\n", exeBasic);
exit(1);
}
fget2(q, input, '\t');
fget2(p, input, '\n');
depth_corr = (1 - pow(2.718281828, (-1) * depth / expDepth));
if(depth_corr > depth_corr_t && ID > ID_t) {
fprintf(output, "%s\t%s\t%s\t%s\t%s\t%s\t%3.2f\t%s\t%s\t%s\t%5.2f\t%1.4f\n", template_name, score, expected, t_len, q, p, ID, t_cov, q_id, q_cov, depth, depth_corr);
if(depth_corr_t <= depth_corr && ID_t <= ID) {
fprintf(output, "%s\t%s\t%s\t%s\t%s\t%s\t%3.2f\t%s\t%s\t%s\t%5.2f\t%1.4f\n", template_name->seq, score->seq, expected->seq, t_len->seq, q->seq, p->seq, ID, t_cov->seq, q_id->seq, q_cov->seq, depth, depth_corr);
}
}
fclose(input);
fprintf(stdout, "DONE");
fclose(output);
exit(0);
fprintf(stderr, "# DONE\n");
return 0;
}
CFLAGS = -Wall -O3 -std=c99
BINS = kmerresistance
LIBS = pherror.o qseqs.o
.c .o:
$(CC) $(CFLAGS) -c -o $@ $<
all: $(BINS)
kmerresistance: KmerResistance.c $(LIBS)
$(CC) $(CFLAGS) -o $@ $< $(LIBS) -lm
clean:
$(RM) $(BINS) $(LIBS)
pherror.o: pherror.h
qseqs.o: qseqs.h pherror.h
/* Philip T.L.C. Clausen Jan 2017 plan@dtu.dk */
# Getting Started #
```
git clone https://bitbucket.org/genomicepidemiology/kmerresistance.git
cd kmerresistance && make;
/*
Copyright (c) 2017, Philip Clausen, Technical University of Denmark
All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
Introduction
./kmerresistance -i reads_se.fq.gz -o output/name -t_db templates -s_db species
./kmerresistance -ipe reads_1.fq.gz reads_2.fq.gz -o output/name -t_db templates -s_db species
```
# Introduction #
KmerResistance correlates mapped genes with the predicted species of WGS
samples, where this this allows for identification of genes in samples which
have been poorly sequenced or high accuracy predictions for samples with
contamination.
KmerResistance has one dependency, namely KMA to perform the mapping, which is
also freely available.
If you use KmerResistance for your published research, then please cite the
KmerResistance and KMA paper.
Compilation
Any c-compiler can be used, here GCC.
gcc -O3 -o kmerresistance KmerResistance.c -lm
Indexing
If you use KmerResistance for your published research, then please cite:
Philip T.L.C. Clausen, Ea Zankari, Frank M. Aarestrup & Ole Lund,
"Benchmarking of methods for identification of antimicrobial resistance genes in bacterial whole genome data",
J Antimicrob Chemother. 2016 Sep;71(9):2484-8.
Philip T.L.C. Clausen, Frank M. Aarestrup & Ole Lund,
"Rapid and precise alignment of raw reads against redundant databases with KMA",
BMC Bioinformatics, 2018;19:307.
# Indexing #
Databases has to set up with KMA. The resistance genes needs standard indexing
while the species database needs to Sparse.
# Templates, e.g. resistance genes:
kma_index -i ResFinder.fsa -o ResFinder
# Species database (or other host database):
kma_index -i bacteria.fsa -o bacteria -Sparse ATG
Mapping with KmerResistance
# KmerResistance-2.0 correlates genes to species from WGS data, by mapping reads with KMA
# Options are: Desc: Default: Requirements:
#
# -i Input/query file name REQUIRED
# -o Output file REQUIRED
# -t_db Template DB REQUIRED
# -s_db Species DB REQUIRED
# -t_k Template kmersize 16
# -s_k Species kmersize 16
# -id ID threshhold 80.0
# -dct Depth correction 0.1
# -kma alternative KMA kma
#
# KMA options: Desc: Default: Requirements:
# -i Input/query file name None REQUIRED
# -o Output file None REQUIRED
# -t_db Template DB None REQUIRED
# -k Kmersize 16
# -e evalue 0.05
# -delta Allocation size for sequences 512
# -mem_mode Use kmers to choose best
# template, and save memory False
# -ex_mode Searh kmers exhaustively False
# -deCon Remove contamination False
# -dense Do not allow insertions
# in assembly False
# -ref_fsa Consensus sequnce will
# have "n" instead of gaps False
# -matrix Print assembly matrix False
# -mp Minimum phred score 30
# -5p Cut a constant number of
# nucleotides from the 5 prime. 0
# -CS Chain size 8 MB
# -MS Max chain size 14 GB
# -Sparse Run KmerFinder False
# -ID Minimum ID 1.0%
# -ss Sparse sorting (q,c,d) q
# -NW Use Needleman-Wunsch False
# -shm Use shared DB made by kma_ssm False
# -1t1 Skip HMM False
# -mrs Minimum alignment score score,
# normalized to alignment length 0.0
# -h Shows this help message
#
Mapping reads against resistance genes:
kmerresistance -i sample_1.fastq sample_2.fastq -o out /
-t_db ResFinder -s_db bacteria
Output:
*.KmerRes KmerResistance output
*.res Result file, containing summary of output.
*.aln Consensus alignment.
*.fsa Consensus sequences drawn from mappings.
*.frag Information about each mapping read, containing:
Read, #matches, aln score, start, end, template, read name
*.mat.gz Count of each called nucleotide on each position in all mapped
templates, requires that the "-matrix" option is enabled when
mapping. The columns are:
Ref. nucleotide, #A, #T, #C, #G, #N, #-.
Questions
If you have any questions regarding KmerResistance or KMA, or wishes for
changes in future versions of KmerResistance or KMA, then feel free to mail me
at: plan@dtu.dk.
# Templates, e.g. resistance genes: #
```
kma index -i ResFinder.fsa -o ResFinder
```
# Species database (or other host database): #
```
kma index -i bacteria.fsa -o bacteria -Sparse ATG
```
# Gene identification #
Aligning reads against resistance genes:
```
kmerresistance -i sample_1.fastq sample_2.fastq -o out -t_db ResFinder -s_db bacteria
```
# Output: #
1. *.KmerRes KmerResistance output
2. *.res Result file, containing summary of output.
3. *.aln Consensus alignment.
4. *.fsa Consensus sequences drawn from mappings.
5. *.frag Information about each mapping read, containing: Read, #matches, aln score, start, end, template, read name
6. *.mat.gz Count of each called nucleotide on each position in all mapped templates, requires that the "-matrix" option is enabled when mapping. The columns are: Ref. nucleotide, #A, #T, #C, #G, #N, #-.
# Installation Requirements #
In order to run KmerResistance, you will need to install KMA. Available here:
*https://bitbucket.org/genomicepidemiology/kma.git*
# Help #
Usage and options are available with the "-h" option on all three programs.
If in doubt, please mail any concerns or problems to: *plan@dtu.dk*.
# Citation #
1. Philip T.L.C. Clausen, Ea Zankari, Frank M. Aarestrup & Ole Lund, "Benchmarking of methods for identification of antimicrobial resistance genes in bacterial whole genome data", J Antimicrob Chemother. 2016 Sep;71(9):2484-8.
2. Philip T.L.C. Clausen, Frank M. Aarestrup & Ole Lund, "Rapid and precise alignment of raw reads against redundant databases with KMA", BMC Bioinformatics, 2018;19:307.
# License #
Copyright (c) 2017, Philip Clausen, Technical University of Denmark
All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
This diff is collapsed.
/* Philip T.L.C. Clausen Jan 2017 plan@dtu.dk */
/*
* Copyright (c) 2017, Philip Clausen, Technical University of Denmark
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define _XOPEN_SOURCE 600
#include <errno.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include "pherror.h"
void * smalloc(const size_t size) {
void *dest = malloc(size);
if(!dest) {
ERROR();
}
return dest;
}
FILE * sfopen(char *filename, char *mode) {
FILE *file = fopen(filename, mode);
if(!file) {
fprintf(stderr, "Filename:\t%s\n", filename);
ERROR();
}
return file;
}
void cfread(void *src, size_t size, size_t nmemb, FILE *stream) {
unsigned char *ptr = src;
nmemb = fread(ptr, 1, (size *= nmemb), stream);
if((size -= nmemb)) {
ptr += nmemb;
while(size) {
if(0 < (nmemb = read(fileno(stream), ptr, size))) {
size -= nmemb;
ptr += nmemb;
} else if(nmemb == 0 || (nmemb == -1 && (errno & EAGAIN))) {
usleep(1000);
} else {
ERROR();
}
}
}
}
void cfwrite(const void *src, size_t size, size_t nmemb, FILE *stream) {
unsigned char *ptr;
size *= nmemb;
ptr = (unsigned char *)(src);
while(size) {
nmemb = fwrite(ptr, 1, size, stream);
if(nmemb == 0) {
ERROR();
}
size -= nmemb;
ptr += nmemb;
}
}
/* Philip T.L.C. Clausen Jan 2017 plan@dtu.dk */
/*
* Copyright (c) 2017, Philip Clausen, Technical University of Denmark
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define _XOPEN_SOURCE 600
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
/* errno error message */
#define ERROR() fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno)); exit(errno);
/* succes or exit */
#define sfwrite(ptr, size, nmemb, stream) if(fwrite(ptr, size, nmemb, stream) != nmemb) {if(errno) {ERROR();} else {fprintf(stderr, "Writing error.\n"); exit(1);}}
#define sfread(ptr, size, nmemb, stream) if(fread(ptr, size, nmemb, stream) != nmemb) {if(errno) {ERROR();} else {fprintf(stderr, "Reading error.\n"); exit(1);}}
void * smalloc(const size_t size);
FILE * sfopen(char *filename, char *mode);
/* cyclic */
void cfread(void *src, size_t size, size_t nmemb, FILE *stream);
void cfwrite(const void *src, size_t size, size_t nmemb, FILE *stream);
/* Philip T.L.C. Clausen Jan 2017 plan@dtu.dk */
/*
* Copyright (c) 2017, Philip Clausen, Technical University of Denmark
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdlib.h>
#include "pherror.h"
#include "qseqs.h"
Qseqs * setQseqs(int size) {
Qseqs *dest;
dest = smalloc(sizeof(Qseqs));
dest->len = 0;
dest->size = size;
dest->seq = smalloc(size);
return dest;
}
void destroyQseqs(Qseqs *dest) {
free(dest->seq);
free(dest);
}
/* Philip T.L.C. Clausen Jan 2017 plan@dtu.dk */
/*
* Copyright (c) 2017, Philip Clausen, Technical University of Denmark
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef QSEQS
typedef struct qseqs Qseqs;
struct qseqs {
int size;
int len;
unsigned char *seq;
};
#define QSEQS 1
#endif
/* initialize Qseqs */
Qseqs * setQseqs(int size);
/* destroy Qseqs */
void destroyQseqs(Qseqs *dest);