Skip to content
Commits on Source (7)
This diff is collapsed.
/* Philip T.L.C. Clausen Jan 2017 plan@dtu.dk */
/*
Copyright (c) 2017, Philip Clausen, Technical University of Denmark
All rights reserved.
* Copyright (c) 2017, Philip Clausen, Technical University of Denmark
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <stdio.h>
#include <ctype.h>
#include <errno.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <time.h>
#include <unistd.h>
#include <errno.h>
#define HU_LIMIT 65535
#define U_LIMIT 4294967295
/*
STRUCTURES
*/
struct hashMapKMA {
/* end product of script */
unsigned kmersize; // k
unsigned size; // size of DB
unsigned n; // k-mers stored
unsigned null_index; // null value
unsigned seqsize; // size of seq
unsigned v_index; // size of values
unsigned prefix_len; // prefix length
long unsigned *prefix; // prefix
unsigned *exist; // size long
long unsigned *seq; // compressed sequence of k-mers
unsigned *values; // compressed values
unsigned *key_index ; // Relative
unsigned *value_index; // Relative
long unsigned size; // size of DB
long unsigned n; // k-mers stored
long unsigned null_index; // null value
long unsigned v_index; // size of values
unsigned kmersize; // k
unsigned prefix_len; // prefix length
long unsigned prefix; // prefix
unsigned *exist; // size long
long unsigned *exist_l; // size long, big DBs
unsigned *values; // compressed values
short unsigned *values_s; // compressed values, few templates
unsigned *key_index; // Relative
long unsigned *key_index_l; // Relative, 16 < k
unsigned *value_index; // Relative
long unsigned *value_index_l; // Relative, big DBs
};
int version[3] = {1, 0, 0};
/*
FUNCTIONS
*/
void hashMap_shm_detach(struct hashMapKMA *dest) {
shmdt(dest->exist);
shmdt(dest->seq);
shmdt(dest->values);
shmdt(dest->key_index);
shmdt(dest->value_index);
}
void hashMapKMA_setupSHM(struct hashMapKMA *dest, FILE *file, const char *filename) {
int hashMapKMA_setupSHM(struct hashMapKMA *dest, FILE *file, const char *filename) {
int shmid;
long unsigned mask;
int shmid, kmersize, status;
unsigned DB_size;
long unsigned mask, size;
key_t key;
/* load sizes */
fseek(file, sizeof(int), SEEK_CUR);
fread(&DB_size, sizeof(unsigned), 1, file);
fread(&dest->kmersize, sizeof(unsigned), 1, file);
fread(&dest->prefix_len, sizeof(unsigned), 1, file);
fread(&dest->prefix, sizeof(long unsigned), 1, file);
fread(&dest->size, sizeof(long unsigned), 1, file);
fread(&dest->n, sizeof(unsigned), 1, file);
fread(&dest->seqsize, sizeof(unsigned), 1, file);
fread(&dest->v_index, sizeof(unsigned), 1, file);
fread(&dest->null_index, sizeof(unsigned), 1, file);
fread(&dest->n, sizeof(long unsigned), 1, file);
fread(&dest->v_index, sizeof(long unsigned), 1, file);
fread(&dest->null_index, sizeof(long unsigned), 1, file);
kmersize = dest->kmersize;
mask = 0;
mask = (~mask) >> (sizeof(long unsigned) * sizeof(long unsigned) - (kmersize << 1));
status = 0;
/* check shared memory, else load */
size = dest->size;
if((dest->size - 1) == mask) {
if(dest->v_index <= U_LIMIT) {
size *= sizeof(unsigned);
} else {
size *= sizeof(long unsigned);
}
} else {
if(dest->n <= U_LIMIT) {
size *= sizeof(unsigned);
} else {
size *= sizeof(long unsigned);
}
}
key = ftok(filename, 'e');
shmid = shmget(key, dest->size * sizeof(unsigned), IPC_CREAT | 0666);
shmid = shmget(key, size, IPC_CREAT | 0666);
if(shmid < 0) {
fprintf(stderr, "Could not setup the shared hashMap e\n");
fseek(file, dest->size * sizeof(unsigned), SEEK_CUR);
fseek(file, size, SEEK_CUR);
dest->exist = 0;
status = 1;
} else {
dest->exist = shmat(shmid, NULL, 0);
fread(dest->exist, sizeof(unsigned), dest->size, file);
fread(dest->exist, 1, size, file);
}
mask = 0;
mask = (~mask) >> (sizeof(long unsigned) * sizeof(long unsigned) - (dest->kmersize << 1));
/* values */
size = dest->v_index;
if(DB_size < HU_LIMIT) {
size *= sizeof(short unsigned);
} else {
size *= sizeof(unsigned);
}
key = ftok(filename, 'v');
shmid = shmget(key, size, IPC_CREAT | 0666);
if(shmid < 0) {
fprintf(stderr, "Could not setup the shared hashMap v\n");
fseek(file, size, SEEK_CUR);
dest->values = 0;
status = 1;
} else {
/* found */
dest->values = shmat(shmid, NULL, 0);
fread(dest->values, 1, size, file);
}
if((dest->size - 1) == mask) {
key = ftok(filename, 'v');
shmid = shmget(key, dest->v_index * sizeof(unsigned), IPC_CREAT | 0666);
if(shmid < 0) {
fprintf(stderr, "Could not setup the shared hashMap v\n");
fseek(file, dest->v_index * sizeof(unsigned), SEEK_CUR);
dest->values = 0;
} else {
/* found */
dest->values = shmat(shmid, NULL, 0);
fread(dest->values, sizeof(unsigned), dest->v_index, file);
}
return status;
}
/* kmers */
size = dest->n + 1;
if(dest->kmersize <= 16) {
size *= sizeof(unsigned);
} else {
key = ftok(filename, 's');
shmid = shmget(key, dest->seqsize * sizeof(long unsigned), IPC_CREAT | 0666);
if(shmid < 0) {
fprintf(stderr, "Could not setup the shared hashMap s\n");
fseek(file, dest->seqsize * sizeof(long unsigned), SEEK_CUR);
dest->seq = 0;
} else {
/* found */
dest->seq = shmat(shmid, NULL, 0);
fread(dest->seq, sizeof(long unsigned), dest->seqsize, file);
}
key = ftok(filename, 'v');
shmid = shmget(key, dest->v_index * sizeof(unsigned), IPC_CREAT | 0666);
if(shmid < 0) {
fprintf(stderr, "Could not setup the shared hashMap v\n");
fseek(file, dest->v_index * sizeof(unsigned), SEEK_CUR);
dest->values = 0;
} else {
/* found */
dest->values = shmat(shmid, NULL, 0);
fread(dest->values, sizeof(unsigned), dest->v_index, file);
}
key = ftok(filename, 'k');
shmid = shmget(key, (dest->n + 1) * sizeof(unsigned), IPC_CREAT | 0666);
if(shmid < 0) {
fprintf(stderr, "Could not setup the shared hashMap k\n");
fseek(file, (dest->n + 1) * sizeof(unsigned), SEEK_CUR);
dest->key_index = 0;
} else {
/* found */
dest->key_index = shmat(shmid, NULL, 0);
fread(dest->key_index, sizeof(unsigned), dest->n + 1, file);
}
key = ftok(filename, 'i');
shmid = shmget(key, dest->n * sizeof(unsigned), IPC_CREAT | 0666);
if(shmid < 0) {
fprintf(stderr, "Could not setup the shared hashMap i\n");
fseek(file, dest->n * sizeof(unsigned), SEEK_CUR);
dest->value_index = 0;
} else {
/* found */
dest->value_index = shmat(shmid, NULL, 0);
fread(dest->value_index, sizeof(unsigned), dest->n, file);
}
size *= sizeof(long unsigned);
}
key = ftok(filename, 'k');
shmid = shmget(key, size, IPC_CREAT | 0666);
if(shmid < 0) {
fprintf(stderr, "Could not setup the shared hashMap k\n");
fseek(file, size, SEEK_CUR);
dest->values = 0;
status = 1;
} else {
/* found */
dest->key_index = shmat(shmid, NULL, 0);
fread(dest->key_index, 1, size, file);
}
/* value indexes */
size = dest->n;
if(dest->v_index < U_LIMIT) {
size *= sizeof(unsigned);
} else {
size *= sizeof(long unsigned);
}
key = ftok(filename, 'i');
shmid = shmget(key, size, IPC_CREAT | 0666);
if(shmid < 0) {
fprintf(stderr, "Could not setup the shared hashMap i\n");
fseek(file, size, SEEK_CUR);
dest->value_index = 0;
status = 1;
} else {
/* found */
dest->value_index = shmat(shmid, NULL, 0);
fread(dest->value_index, 1, size, file);
}
return status;
}
void hashMapKMA_destroySHM(struct hashMapKMA *dest, FILE *file, const char *filename) {
int shmid;
long unsigned mask;
int shmid, kmersize;
unsigned DB_size;
long unsigned mask, size;
key_t key;
/* load sizes */
fseek(file, sizeof(int), SEEK_CUR);
fread(&DB_size, sizeof(unsigned), 1, file);
fread(&dest->kmersize, sizeof(unsigned), 1, file);
fread(&dest->prefix_len, sizeof(unsigned), 1, file);
fread(&dest->prefix, sizeof(long unsigned), 1, file);
fread(&dest->size, sizeof(long unsigned), 1, file);
fread(&dest->n, sizeof(unsigned), 1, file);
fread(&dest->seqsize, sizeof(unsigned), 1, file);
fread(&dest->v_index, sizeof(unsigned), 1, file);
fread(&dest->null_index, sizeof(unsigned), 1, file);
fread(&dest->n, sizeof(long unsigned), 1, file);
fread(&dest->v_index, sizeof(long unsigned), 1, file);
fread(&dest->null_index, sizeof(long unsigned), 1, file);
kmersize = dest->kmersize;
mask = 0;
mask = (~mask) >> (sizeof(long unsigned) * sizeof(long unsigned) - (kmersize << 1));
/* check shared memory, and destroy */
size = dest->size;
if((dest->size - 1) == mask) {
if(dest->v_index <= U_LIMIT) {
size *= sizeof(unsigned);
} else {
size *= sizeof(long unsigned);
}
} else {
if(dest->n <= U_LIMIT) {
size *= sizeof(unsigned);
} else {
size *= sizeof(long unsigned);
}
}
key = ftok(filename, 'e');
shmid = shmget(key, dest->size * sizeof(unsigned), 0666);
shmid = shmget(key, size, 0666);
if(shmid >= 0) {
shmctl(shmid, IPC_RMID, NULL);
}
mask = 0;
mask = (~mask) >> (sizeof(long unsigned) * sizeof(long unsigned) - (dest->kmersize << 1));
/* values */
size = dest->v_index;
if(DB_size < HU_LIMIT) {
size *= sizeof(short unsigned);
} else {
size *= sizeof(unsigned);
}
key = ftok(filename, 'v');
shmid = shmget(key, size, 0666);
if(shmid >= 0) {
shmctl(shmid, IPC_RMID, NULL);
}
if((dest->size - 1) == mask) {
key = ftok(filename, 'v');
shmid = shmget(key, dest->v_index * sizeof(unsigned), 0666);
if(shmid >= 0) {
shmctl(shmid, IPC_RMID, NULL);
}
/* kmers */
size = dest->n + 1;
if(dest->kmersize <= 16) {
size *= sizeof(unsigned);
} else {
key = ftok(filename, 's');
shmid = shmget(key, dest->seqsize * sizeof(long unsigned), 0666);
if(shmid >= 0) {
shmctl(shmid, IPC_RMID, NULL);
}
key = ftok(filename, 'v');
shmid = shmget(key, dest->v_index * sizeof(unsigned), 0666);
if(shmid >= 0) {
shmctl(shmid, IPC_RMID, NULL);
}
key = ftok(filename, 'k');
shmid = shmget(key, (dest->n + 1) * sizeof(unsigned), 0666);
if(shmid >= 0) {
shmctl(shmid, IPC_RMID, NULL);
}
key = ftok(filename, 'i');
shmid = shmget(key, dest->n * sizeof(unsigned), 0666);
if(shmid >= 0) {
shmctl(shmid, IPC_RMID, NULL);
}
size *= sizeof(long unsigned);
}
key = ftok(filename, 'k');
shmid = shmget(key, size, 0666);
if(shmid >= 0) {
shmctl(shmid, IPC_RMID, NULL);
}
/* value indexes */
size = dest->n;
if(dest->v_index < U_LIMIT) {
size *= sizeof(unsigned);
} else {
size *= sizeof(long unsigned);
}
key = ftok(filename, 'i');
shmid = shmget(key, size, 0666);
if(shmid >= 0) {
shmctl(shmid, IPC_RMID, NULL);
}
}
......@@ -354,7 +403,7 @@ char * name_setupSHM(FILE *file, const char *filename) {
} else {
template_names = shmat(shmid, NULL, 0);
fread(template_names, 1, size, file);
for(i = 0; i < size; i++) {
for(i = 0; i < size; ++i) {
if(template_names[i] == '\n') {
template_names[i] = 0;
}
......@@ -395,6 +444,7 @@ void helpMessage(int exeStatus) {
fprintf(helpOut, "#\t-destroy\tDestroy shared DB\t\tFalse\n");
fprintf(helpOut, "#\t-shmLvl\t\tLevel of shared memory\t\t1\n");
fprintf(helpOut, "#\t-shm-h\t\tExplain shm levels\n");
fprintf(helpOut, "#\t-v\t\tVersion\n");
fprintf(helpOut, "#\t-h\t\tShows this help message\n");
fprintf(helpOut, "#\n");
exit(exeStatus);
......@@ -402,7 +452,7 @@ void helpMessage(int exeStatus) {
int main(int argc, char *argv[]) {
int args, file_len, destroy, *template_lengths, *index;
int args, file_len, destroy, status, *template_lengths, *index;
unsigned shmLvl;
long unsigned *seq;
char *templatefilename, *template_names;
......@@ -413,12 +463,13 @@ int main(int argc, char *argv[]) {
templatefilename = 0;
destroy = 0;
shmLvl = 1;
status = 0;
/* PARSE COMMAND LINE OPTIONS */
args = 1;
while(args < argc) {
if(strcmp(argv[args], "-t_db") == 0) {
args++;
++args;
if(args < argc) {
templatefilename = malloc(strlen(argv[args]) + 64);
if(!templatefilename) {
......@@ -430,7 +481,7 @@ int main(int argc, char *argv[]) {
} else if(strcmp(argv[args], "-destroy") == 0) {
destroy = 1;
} else if(strcmp(argv[args], "-shmLvl") == 0) {
args++;
++args;
if(args < argc) {
shmLvl = atoi(argv[args]);
if(!shmLvl) {
......@@ -438,6 +489,9 @@ int main(int argc, char *argv[]) {
exit(0);
}
}
} else if(strcmp(argv[args], "-v") == 0) {
fprintf(stdout, "KMA_SHM-%d.%d.%d\n", version[0], version[1], version[2]);
exit(0);
} else if(strcmp(argv[args], "-h") == 0) {
helpMessage(0);
} else if(strcmp(argv[args], "-shm-h") == 0) {
......@@ -457,7 +511,7 @@ int main(int argc, char *argv[]) {
fprintf(stderr, "# Printing help message:\n");
helpMessage(-1);
}
args++;
++args;
}
if(templatefilename == 0) {
fprintf(stderr, "# Too few arguments handed\n");
......@@ -480,6 +534,7 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
hashMapKMA_destroySHM(templates, file, templatefilename);
fclose(file);
......@@ -493,6 +548,7 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
hashMapKMA_destroySHM(templates, file, templatefilename);
fclose(file);
......@@ -506,6 +562,7 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
length_destroySHM(file, templatefilename);
fclose(file);
......@@ -520,6 +577,7 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
seq_destroySHM(file, templatefilename);
fclose(file);
......@@ -531,6 +589,7 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
index_destroySHM(file, templatefilename);
fclose(file);
......@@ -544,6 +603,7 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
name_destroySHM(file, templatefilename);
fclose(file);
......@@ -557,8 +617,9 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
hashMapKMA_setupSHM(templates, file, templatefilename);
status |= hashMapKMA_setupSHM(templates, file, templatefilename);
hashMap_shm_detach(templates);
fclose(file);
}
......@@ -571,8 +632,9 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
hashMapKMA_setupSHM(templates, file, templatefilename);
status |= hashMapKMA_setupSHM(templates, file, templatefilename);
hashMap_shm_detach(templates);
fclose(file);
}
......@@ -585,10 +647,14 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
template_lengths = length_setupSHM(file, templatefilename);
if(template_lengths)
if(template_lengths) {
shmdt(template_lengths);
} else {
status |= 1;
}
fclose(file);
}
templatefilename[file_len] = 0;
......@@ -601,10 +667,14 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
seq = seq_setupSHM(file, templatefilename);
if(seq)
if(seq) {
shmdt(seq);
} else {
status |= 1;
}
fclose(file);
}
templatefilename[file_len] = 0;
......@@ -614,10 +684,14 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
index = index_setupSHM(file, templatefilename);
if(index)
if(index) {
shmdt(index);
} else {
status |= 1;
}
fclose(file);
}
templatefilename[file_len] = 0;
......@@ -629,10 +703,14 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
template_names = name_setupSHM(file, templatefilename);
if(template_names)
if(template_names) {
shmdt(template_names);
} else {
status |= 1;
}
fclose(file);
}
templatefilename[file_len] = 0;
......@@ -655,5 +733,5 @@ int main(int argc, char *argv[]) {
* ipcs -a
*/
return 0;
return status;
}
This diff is collapsed.
/* Philip T.L.C. Clausen Jan 2017 plan@dtu.dk */
/*
* Copyright (c) 2017, Philip Clausen, Technical University of Denmark
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define HU_LIMIT 65535
#define U_LIMIT 4294967295
struct hashMapKMA {
long unsigned size; // size of DB
long unsigned n; // k-mers stored
long unsigned null_index; // null value
long unsigned v_index; // size of values
unsigned kmersize; // k
unsigned prefix_len; // prefix length
long unsigned prefix; // prefix
unsigned *exist; // size long
long unsigned *exist_l; // size long, big DBs
unsigned *values; // compressed values
short unsigned *values_s; // compressed values, few templates
unsigned *key_index; // Relative
long unsigned *key_index_l; // Relative, 16 < k
unsigned *value_index; // Relative
long unsigned *value_index_l; // Relative, big DBs
};
unsigned DB_size, shifter;
/* BASIC FUNCTIONS */
void ERROR() {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
exit(errno);
}
void * smalloc(size_t size) {
void *dest;
dest = malloc(size);
if(!dest) {
ERROR();
}
return dest;
}
FILE * sfopen(char *filename, char *mode) {
FILE *file;
file = fopen(filename, mode);
if(!file) {
fprintf(stderr, "Filename:\t%s\n", filename);
ERROR();
}
return file;
}
long unsigned getKmer(long unsigned *compressor, unsigned cPos) {
unsigned iPos = (cPos & 31) << 1;
cPos >>= 5;
return (iPos <= shifter) ? ((compressor[cPos] << iPos) >> shifter) : (((compressor[cPos] << iPos) | (compressor[cPos + 1] >> (64-iPos))) >> shifter);
}
unsigned convertLength_014to015(char *filename) {
unsigned size, file_len;
int *lengths;
FILE *file;
file_len = strlen(filename);
strcat(filename, ".length.b");
file = sfopen(filename, "rb+");
filename[file_len] = 0;
fread(&size, sizeof(unsigned), 1, file);
lengths = smalloc(3 * size * sizeof(unsigned));
file_len = fread(lengths, sizeof(unsigned), 3 * size, file);
fseek(file, sizeof(unsigned), SEEK_SET);
if(file_len == size) {
file_len = 0;
} else if(file_len == 2 * size) {
fprintf(stderr, "DB is old.\n");
fprintf(stderr, "It will only work for \"-Sparse\" mapping!!!\n");
fwrite(lengths, sizeof(unsigned), size, file);
fwrite(lengths, sizeof(unsigned), 2 * size, file);
file_len = 0;
} else if(file_len == 3 * size) {
fwrite(lengths + 2 * size, sizeof(unsigned), size, file);
fwrite(lengths, sizeof(unsigned), 2 * size, file);
file_len = 1;
} else {
fprintf(stderr, "DB is malformed.\n");
exit(1);
}
fclose(file);
return file_len;
}
int hashMapKMA_014to015(char *filename, unsigned prefix) {
unsigned i, tmp, size, kmersize, seqsize, file_len;
long unsigned mask, *seq;
FILE *file;
struct hashMapKMA *dest;
/* rm filename.b */
file_len = strlen(filename);
strcat(filename, ".b");
remove(filename);
filename[file_len] = 0;
/* load DB */
strcat(filename, ".comp.b");
file = sfopen(filename, "rb");
filename[file_len] = 0;
/* load sizes */
dest = smalloc(sizeof(struct hashMapKMA));
fread(&DB_size, sizeof(unsigned), 1, file);
fread(&dest->kmersize, sizeof(unsigned), 1, file);
fread(&dest->prefix_len, sizeof(unsigned), 1, file);
fread(&dest->prefix, sizeof(long unsigned), 1, file);
fread(&dest->size, sizeof(long unsigned), 1, file);
kmersize = dest->kmersize;
mask = 0;
mask = (~mask) >> (sizeof(long unsigned) * sizeof(long unsigned) - (kmersize << 1));
shifter = sizeof(long unsigned) * sizeof(long unsigned) - (kmersize << 1);
/* load changed size */
fread(&tmp, sizeof(unsigned), 1, file);
dest->n = tmp;
fread(&seqsize, sizeof(unsigned), 1, file); //seq size
fread(&tmp, sizeof(unsigned), 1, file);
dest->v_index = tmp;
fread(&tmp, sizeof(unsigned), 1, file);
dest->null_index = tmp;
/* make checks */
if(dest->size < dest->n || dest->n == 0) {
fprintf(stderr, "DB is not of version 0.14\n");
exit(1);
}
/* load arrays */
dest->exist = smalloc(dest->size * sizeof(unsigned));
if(dest->size != fread(dest->exist, sizeof(unsigned), dest->size, file)) {
return 1;
}
if(mask != (dest->size - 1)) {
seq = smalloc(seqsize * sizeof(long unsigned));
if(seqsize != fread(seq, sizeof(long unsigned), seqsize, file)) {
return 1;
}
} else {
seq = 0;
}
dest->values = smalloc(dest->v_index * sizeof(int));
if(dest->v_index != fread(dest->values, sizeof(int), dest->v_index, file)) {
return 1;
}
if(mask != (dest->size - 1)) {
dest->key_index = smalloc((dest->n + 1) * sizeof(unsigned));
if((dest->n + 1) != fread(dest->key_index, sizeof(unsigned), dest->n + 1, file)) {
return 1;
}
dest->value_index = smalloc(dest->n * sizeof(unsigned));
if(dest->n != fread(dest->value_index, sizeof(unsigned), dest->n, file)) {
return 1;
}
}
/* convert to new format */
/* change prefix if sparse - */
if(prefix && dest->prefix_len == 0) {
dest->prefix = 1;
}
strcat(filename, ".comp.b");
file = sfopen(filename, "wb");
filename[file_len] = 0;
fwrite(&DB_size, sizeof(unsigned), 1, file);
fwrite(&dest->kmersize, sizeof(unsigned), 1, file);
fwrite(&dest->prefix_len, sizeof(unsigned), 1, file);
fwrite(&dest->prefix, sizeof(long unsigned), 1, file);
fwrite(&dest->size, sizeof(long unsigned), 1, file);
fwrite(&dest->n, sizeof(long unsigned), 1, file);
fwrite(&dest->v_index, sizeof(long unsigned), 1, file);
fwrite(&dest->null_index, sizeof(long unsigned), 1, file);
/* exist */
fwrite(dest->exist, sizeof(unsigned), dest->size, file);
/* values */
if(DB_size < HU_LIMIT) {
dest->values_s = (short unsigned *)(dest->values);
for(i = 0; i < dest->v_index; ++i) {
dest->values_s[i] = dest->values[i];
}
size = sizeof(short unsigned);
} else {
size = sizeof(unsigned);
}
fwrite(dest->values, size, dest->v_index, file);
free(dest->values);
if(mask == (dest->size - 1)) {
return 0;
}
if(dest->kmersize <= 16) {
fwrite(dest->key_index, sizeof(unsigned), dest->n + 1, file);
} else {
dest->key_index_l = realloc(dest->key_index, (dest->n + 1) * sizeof(long unsigned));
if(dest->key_index_l) {
dest->key_index = (unsigned *)(dest->key_index_l);
} else {
ERROR();
}
i = dest->n + 1;
while(i--) {
dest->key_index_l[i] = getKmer(seq, dest->key_index[i]);
}
fwrite(dest->key_index_l, sizeof(long unsigned), dest->n + 1, file);
free(seq);
}
free(dest->key_index);
/* value_index */
fwrite(dest->value_index, sizeof(unsigned), dest->n, file);
return 0;
}
int index_014to015(char *filename) {
unsigned prefix, file_len, returner;
FILE *file;
file_len = strlen(filename);
/* change prefix if sparse - */
prefix = convertLength_014to015(filename);
returner = hashMapKMA_014to015(filename, prefix);
/* check for deCon */
strcat(filename, ".decon.b");
file = fopen(filename, "rb");
if(file) {
fclose(file);
/* change filename to: "filename.decon" */
filename[file_len + 6] = 0;
returner += hashMapKMA_014to015(filename, prefix);
}
filename[file_len] = 0;
return returner;
}
void helpMessage(int exeStatus) {
FILE *helpOut;
if(exeStatus == 0) {
helpOut = stdout;
} else {
helpOut = stderr;
}
fprintf(helpOut, "# KMA_update syncronises kma-indexes to the needed version.\n");
fprintf(helpOut, "# Options are:\t\tDesc:\t\t\t\t\tRequirements:\n");
fprintf(helpOut, "#\n");
fprintf(helpOut, "#\t-t_db\t\tTemplate DB\t\t\t\tREQUIRED\n");
fprintf(helpOut, "#\t-v\t\t[XXYY], from version major version XX\n#\t\t\tto major version YY. Use minor version,\n#\t\t\tif major version is 0.\t\t\tREQUIRED\n");
fprintf(helpOut, "#\t-h\t\tShows this help message\n");
fprintf(helpOut, "#\n");
exit(exeStatus);
}
int main(int argc, char *argv[]) {
unsigned args, version;
char *filename, *error;
/* set defaults */
filename = 0;
version = 0;
args = 1;
while(args < argc) {
if(strcmp(argv[args], "-t_db") == 0) {
if(++args < argc) {
filename = smalloc(strlen(argv[args]) + 64);
strcpy(filename, argv[args]);
}
} else if(strcmp(argv[args], "-v") == 0) {
if(++args < argc) {
version = strtoul(argv[args], &error, 10);
if(*error != 0) {
fprintf(stderr, " Invalid version specified.\n");
exit(2);
}
}
} else if(strcmp(argv[args], "-h") == 0) {
helpMessage(0);
} else {
fprintf(stderr, " Invalid option:\t%s\n", argv[args]);
fprintf(stderr, " Printing help message:\n");
helpMessage(1);
}
++args;
}
if(!filename || !version) {
fprintf(stderr, "Insuffient amount of arguments handed!!!\n");
} else if(version == 1415) {
if(index_014to015(filename)) {
fprintf(stderr, "Conversion error.\n");
exit(3);
}
} else {
fprintf(stderr, "Invalid version swifting specified.\n");
fprintf(stderr, "Valid conversions:\n");
fprintf(stderr, "\t%d\t%.2f -> %.2f\n", 1415, 0.14, 0.15);
return 2;
}
return 0;
}
CFLAGS = -std=c99 -w -O3
BINS = kma kma_index kma_shm
CFLAGS = -w -O3
BINS = kma kma_index kma_shm kma_update
all: $(BINS)
......@@ -7,10 +7,13 @@ kma: KMA.c
$(CC) $(CFLAGS) -o $@ $< -lm -lpthread -lz
kma_index: KMA_index.c
$(CC) $(CFLAGS) -o $@ $< -lm
$(CC) $(CFLAGS) -o $@ $< -lm -lz
kma_shm: KMA_SHM.c
$(CC) $(CFLAGS) -o $@ $<
kma_update: KMA_update.c
$(CC) $(CFLAGS) -o $@ $<
clean:
$(RM) $(BINS)
......@@ -17,7 +17,10 @@ not exist. It works for long low quality reads as well, such as those from Nanop
Non-unique matches are resolved using the "ConClave" sorting scheme, and a consensus sequence are outputtet
in addition to other common attributes, based on our users demands.
If you use KMA for your published research, then please cite the KMA paper.
If you use KMA for your published research, then please cite:
Philip T.L.C. Clausen, Frank M. Aarestrup & Ole Lund,
"Rapid and precise alignment of raw reads against redundant databases with KMA",
BMC Bioinformatics, 2018;19:307.
# Usage #
......@@ -65,6 +68,7 @@ Some of the most important options:
-Mt1 Match to only one template in the database.
-ID Minimum identity to output template match.
-apm Paired end method, “p”: Reward if pairing the reads, “u”: unite best template matches in each read if possible, “f” force paired reads to pair.
-1t1 One read to one template, no splicing performed. Well suited for short reads and whole genome mapping.
-bc90 Basecalls should be significantly overrepresented, and have at least 90% agreement.
-bcNano Basecalls optimized for nanopore sequencing.
-mrs minimum alignment score normalized to alignment length.
......@@ -136,7 +140,7 @@ Usage and options are available with the "-h" option on all three programs.
If in doubt, please mail any concerns or problems to: *plan@dtu.dk*.
# Citation #
1. Philip T.L.C. Clausen, Frank M. Aarestrup & Ole Lund, "Rapid and precise alignment of raw reads against redundant databases with KMA", under review.
1. Philip T.L.C. Clausen, Frank M. Aarestrup & Ole Lund, "Rapid and precise alignment of raw reads against redundant databases with KMA", BMC Bioinformatics, 2018;19:307.
# License #
Copyright (c) 2017, Philip Clausen, Technical University of Denmark
......
kma (0.14.4+git20180611.7e14ef6-1) UNRELEASED; urgency=medium
kma (1.1.7-1) UNRELEASED; urgency=medium
* Initial release (Closes: #<bug>)
......
......@@ -10,7 +10,7 @@ Vcs-Browser: https://salsa.debian.org/med-team/kma
Vcs-Git: https://salsa.debian.org/med-team/kam.git
Homepage: https://bitbucket.org/genomicepidemiology/kma
Package: kme
Package: kma
Architecture: any
Depends: ${shlibs:Depends},
${misc:Depends}
......
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.6.
.TH KMA "1" "June 2018" "kma 0.14.4+git20180611.7e14ef6" "User Commands"
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.8.
.TH KMA "1" "February 2019" "kma 1.1.7" "User Commands"
.SH NAME
kma \- mapps raw reads to a template database, for optimal performance it is designed to use 3 threads
kma \- mapps raw reads to a template database
.SH DESCRIPTION
.IP
KMA\-0.14.4 mapps raw reads to a template database, for optimal performance it is designed to use 3 threads.
Options are: Desc: Default: Requirements:
mapps raw reads to a template database.
.SH OPTIONS
.TP
\fB\-o\fR
Output file None REQUIRED
......@@ -28,6 +27,9 @@ Kmersize DB defined
\fB\-e\fR
evalue 0.05
.TP
\fB\-ConClave\fR
ConClave version 1
.TP
\fB\-mem_mode\fR
Use kmers to choose best
template, and save memory False
......@@ -35,6 +37,12 @@ template, and save memory False
\fB\-ex_mode\fR
Searh kmers exhaustively False
.TP
\fB\-ef\fR
Print additional features False
.TP
\fB\-vcf\fR
Make vcf file, 2 to apply FT False/0
.TP
\fB\-deCon\fR
Remove contamination False
.TP
......@@ -89,12 +97,19 @@ Swap DB to disk 0 (lvl)
\fB\-1t1\fR
Skip HMM False
.TP
\fB\-ck\fR
Count kmers instead of
pseudo alignment False
.TP
\fB\-ca\fR
Make circular alignments False
.TP
\fB\-boot\fR
Bootstrap sequence False
.TP
\fB\-bc\fR
Base calls should be
significantly overrepresented. True
significantly overrepresented. [True]
.TP
\fB\-bc90\fR
Base calls should be both
......@@ -110,6 +125,9 @@ Both mrs and p_value thresholds
has to reached to in order to
report a template hit. or
.TP
\fB\-mq\fR
Minimum mapping quality 0
.TP
\fB\-mrs\fR
Minimum alignment score,
normalized to alignment length 0.50
......@@ -129,6 +147,9 @@ Penalty for gap extension \fB\-1\fR
\fB\-per\fR
Reward for pairing reads 7
.TP
\fB\-cge\fR
Set CGE penalties and rewards False
.TP
\fB\-t\fR
Number of threads 1
.TP
......
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.6.
.TH KMA_INDEX "1" "June 2018" "kma_index 0.14.4+git20180611.7e14ef6" "User Commands"
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.8.
.TH KMA_INDEX "1" "February 2019" "kma_index 1.1.7" "User Commands"
.SH NAME
kma_index \- creates the databases needed to run KMA, from a list of fasta files given
.SH DESCRIPTION
.IP
kma_index creates the databases needed to run KMA, from a list of fasta files given.
Options are: Desc: Default:
.SH OPTIONS
.TP
\fB\-i\fR
Input/query file name (STDIN: "\-\-") None
......@@ -38,11 +37,14 @@ Kmersize for indexing 16
Minimum length of templates kmersize (16)
.TP
\fB\-CS\fR
Start Chain size 1 M
Start Chain size 1 M
.TP
\fB\-ME\fR
Mega DB False
.TP
\fB\-NI\fR
Do not dump *.index.b False
.TP
\fB\-Sparse\fR
Make Sparse DB ('\-' for no prefix) None/False
.TP
......@@ -56,6 +58,9 @@ Homology query 1.0
Both homolgy thresholds
has to be reached or
.TP
\fB\-v\fR
Version
.TP
\fB\-h\fR
Shows this help message
.SH AUTHOR
......
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.6.
.TH KMA_SHM "1" "June 2018" "kma_shm 0.14.4+git20180611.7e14ef6" "User Commands"
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.8.
.TH KMA_SHM "1" "February 2019" "kma_shm 1.1.7" "User Commands"
.SH NAME
kma_shm \- sets up a shared database (sysV) for mapping with KMA
.SH DESCRIPTION
.IP
kma_shm sets up a shared database (sysV) for mapping with KMA.
Options are: Desc: Default: Requirements:
.SH OPTIONS
.TP
\fB\-t_db\fR
Template DB None REQUIRED
......@@ -19,6 +18,9 @@ Level of shared memory 1
\fB\-shm\-h\fR
Explain shm levels
.TP
\fB\-v\fR
Version
.TP
\fB\-h\fR
Shows this help message
.SH AUTHOR
......
......@@ -4,24 +4,28 @@ Description: Propagate hardening options
--- a/Makefile
+++ b/Makefile
@@ -1,16 +1,16 @@
-CFLAGS = -std=c99 -w -O3
+CFLAGS += -std=c99 -w -O3
BINS = kma kma_index kma_shm
@@ -1,19 +1,19 @@
-CFLAGS = -w -O3
+CFLAGS += -w -O3
BINS = kma kma_index kma_shm kma_update
all: $(BINS)
kma: KMA.c
- $(CC) $(CFLAGS) -o $@ $< -lm -lpthread -lz
+ $(CC) $(CFLAGS) -g -o $@ $< -lm -lpthread -lz $(LDFLAGS)
+ $(CC) $(CFLAGS) -o $@ $< -lm -lpthread -lz $(LDFLAGS)
kma_index: KMA_index.c
- $(CC) $(CFLAGS) -o $@ $< -lm
+ $(CC) $(CFLAGS) -g -o $@ $< -lm $(LDFLAGS)
- $(CC) $(CFLAGS) -o $@ $< -lm -lz
+ $(CC) $(CFLAGS) -o $@ $< -lm -lz $(LDFLAGS)
kma_shm: KMA_SHM.c
- $(CC) $(CFLAGS) -o $@ $<
+ $(CC) $(CFLAGS) -g -o $@ $< $(LDFLAGS)
+ $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
kma_update: KMA_update.c
- $(CC) $(CFLAGS) -o $@ $<
+ $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
clean:
$(RM) $(BINS)
version=4
opts="mode=git,pretty=0.14.4+git%cd.%h" \
https://bitbucket.org/genomicepidemiology/kma.git HEAD
https://bitbucket.org/genomicepidemiology/kma/downloads/?tab=tags .*/@ANY_VERSION@@ARCHIVE_EXT@
#opts="mode=git,pretty=1.1.7+git%cd.%h" \
# https://bitbucket.org/genomicepidemiology/kma.git HEAD