Skip to content
Commits on Source (2)
This diff is collapsed.
/* Philip T.L.C. Clausen Jan 2017 plan@dtu.dk */
/*
Copyright (c) 2017, Philip Clausen, Technical University of Denmark
All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
* Copyright (c) 2017, Philip Clausen, Technical University of Denmark
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <ctype.h>
#include <errno.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <time.h>
#include <unistd.h>
#include <errno.h>
#define HU_LIMIT 65535
#define U_LIMIT 4294967295
/*
STRUCTURES
*/
struct hashMapKMA {
/* end product of script */
long unsigned size; // size of DB
long unsigned n; // k-mers stored
long unsigned null_index; // null value
long unsigned v_index; // size of values
unsigned kmersize; // k
unsigned size; // size of DB
unsigned n; // k-mers stored
unsigned null_index; // null value
unsigned seqsize; // size of seq
unsigned v_index; // size of values
unsigned prefix_len; // prefix length
long unsigned *prefix; // prefix
long unsigned prefix; // prefix
unsigned *exist; // size long
long unsigned *seq; // compressed sequence of k-mers
long unsigned *exist_l; // size long, big DBs
unsigned *values; // compressed values
short unsigned *values_s; // compressed values, few templates
unsigned *key_index; // Relative
long unsigned *key_index_l; // Relative, 16 < k
unsigned *value_index; // Relative
long unsigned *value_index_l; // Relative, big DBs
};
int version[3] = {1, 0, 0};
/*
FUNCTIONS
*/
void hashMap_shm_detach(struct hashMapKMA *dest) {
shmdt(dest->exist);
shmdt(dest->seq);
shmdt(dest->values);
shmdt(dest->key_index);
shmdt(dest->value_index);
}
void hashMapKMA_setupSHM(struct hashMapKMA *dest, FILE *file, const char *filename) {
int hashMapKMA_setupSHM(struct hashMapKMA *dest, FILE *file, const char *filename) {
int shmid;
long unsigned mask;
int shmid, kmersize, status;
unsigned DB_size;
long unsigned mask, size;
key_t key;
/* load sizes */
fseek(file, sizeof(int), SEEK_CUR);
fread(&DB_size, sizeof(unsigned), 1, file);
fread(&dest->kmersize, sizeof(unsigned), 1, file);
fread(&dest->prefix_len, sizeof(unsigned), 1, file);
fread(&dest->prefix, sizeof(long unsigned), 1, file);
fread(&dest->size, sizeof(long unsigned), 1, file);
fread(&dest->n, sizeof(unsigned), 1, file);
fread(&dest->seqsize, sizeof(unsigned), 1, file);
fread(&dest->v_index, sizeof(unsigned), 1, file);
fread(&dest->null_index, sizeof(unsigned), 1, file);
fread(&dest->n, sizeof(long unsigned), 1, file);
fread(&dest->v_index, sizeof(long unsigned), 1, file);
fread(&dest->null_index, sizeof(long unsigned), 1, file);
kmersize = dest->kmersize;
mask = 0;
mask = (~mask) >> (sizeof(long unsigned) * sizeof(long unsigned) - (kmersize << 1));
status = 0;
/* check shared memory, else load */
size = dest->size;
if((dest->size - 1) == mask) {
if(dest->v_index <= U_LIMIT) {
size *= sizeof(unsigned);
} else {
size *= sizeof(long unsigned);
}
} else {
if(dest->n <= U_LIMIT) {
size *= sizeof(unsigned);
} else {
size *= sizeof(long unsigned);
}
}
key = ftok(filename, 'e');
shmid = shmget(key, dest->size * sizeof(unsigned), IPC_CREAT | 0666);
shmid = shmget(key, size, IPC_CREAT | 0666);
if(shmid < 0) {
fprintf(stderr, "Could not setup the shared hashMap e\n");
fseek(file, dest->size * sizeof(unsigned), SEEK_CUR);
fseek(file, size, SEEK_CUR);
dest->exist = 0;
status = 1;
} else {
dest->exist = shmat(shmid, NULL, 0);
fread(dest->exist, sizeof(unsigned), dest->size, file);
fread(dest->exist, 1, size, file);
}
mask = 0;
mask = (~mask) >> (sizeof(long unsigned) * sizeof(long unsigned) - (dest->kmersize << 1));
if((dest->size - 1) == mask) {
/* values */
size = dest->v_index;
if(DB_size < HU_LIMIT) {
size *= sizeof(short unsigned);
} else {
size *= sizeof(unsigned);
}
key = ftok(filename, 'v');
shmid = shmget(key, dest->v_index * sizeof(unsigned), IPC_CREAT | 0666);
shmid = shmget(key, size, IPC_CREAT | 0666);
if(shmid < 0) {
fprintf(stderr, "Could not setup the shared hashMap v\n");
fseek(file, dest->v_index * sizeof(unsigned), SEEK_CUR);
fseek(file, size, SEEK_CUR);
dest->values = 0;
status = 1;
} else {
/* found */
dest->values = shmat(shmid, NULL, 0);
fread(dest->values, sizeof(unsigned), dest->v_index, file);
fread(dest->values, 1, size, file);
}
} else {
key = ftok(filename, 's');
shmid = shmget(key, dest->seqsize * sizeof(long unsigned), IPC_CREAT | 0666);
if(shmid < 0) {
fprintf(stderr, "Could not setup the shared hashMap s\n");
fseek(file, dest->seqsize * sizeof(long unsigned), SEEK_CUR);
dest->seq = 0;
} else {
/* found */
dest->seq = shmat(shmid, NULL, 0);
fread(dest->seq, sizeof(long unsigned), dest->seqsize, file);
if((dest->size - 1) == mask) {
return status;
}
key = ftok(filename, 'v');
shmid = shmget(key, dest->v_index * sizeof(unsigned), IPC_CREAT | 0666);
if(shmid < 0) {
fprintf(stderr, "Could not setup the shared hashMap v\n");
fseek(file, dest->v_index * sizeof(unsigned), SEEK_CUR);
dest->values = 0;
/* kmers */
size = dest->n + 1;
if(dest->kmersize <= 16) {
size *= sizeof(unsigned);
} else {
/* found */
dest->values = shmat(shmid, NULL, 0);
fread(dest->values, sizeof(unsigned), dest->v_index, file);
size *= sizeof(long unsigned);
}
key = ftok(filename, 'k');
shmid = shmget(key, (dest->n + 1) * sizeof(unsigned), IPC_CREAT | 0666);
shmid = shmget(key, size, IPC_CREAT | 0666);
if(shmid < 0) {
fprintf(stderr, "Could not setup the shared hashMap k\n");
fseek(file, (dest->n + 1) * sizeof(unsigned), SEEK_CUR);
dest->key_index = 0;
fseek(file, size, SEEK_CUR);
dest->values = 0;
status = 1;
} else {
/* found */
dest->key_index = shmat(shmid, NULL, 0);
fread(dest->key_index, sizeof(unsigned), dest->n + 1, file);
fread(dest->key_index, 1, size, file);
}
/* value indexes */
size = dest->n;
if(dest->v_index < U_LIMIT) {
size *= sizeof(unsigned);
} else {
size *= sizeof(long unsigned);
}
key = ftok(filename, 'i');
shmid = shmget(key, dest->n * sizeof(unsigned), IPC_CREAT | 0666);
shmid = shmget(key, size, IPC_CREAT | 0666);
if(shmid < 0) {
fprintf(stderr, "Could not setup the shared hashMap i\n");
fseek(file, dest->n * sizeof(unsigned), SEEK_CUR);
fseek(file, size, SEEK_CUR);
dest->value_index = 0;
status = 1;
} else {
/* found */
dest->value_index = shmat(shmid, NULL, 0);
fread(dest->value_index, sizeof(unsigned), dest->n, file);
}
fread(dest->value_index, 1, size, file);
}
return status;
}
void hashMapKMA_destroySHM(struct hashMapKMA *dest, FILE *file, const char *filename) {
int shmid;
long unsigned mask;
int shmid, kmersize;
unsigned DB_size;
long unsigned mask, size;
key_t key;
/* load sizes */
fseek(file, sizeof(int), SEEK_CUR);
fread(&DB_size, sizeof(unsigned), 1, file);
fread(&dest->kmersize, sizeof(unsigned), 1, file);
fread(&dest->prefix_len, sizeof(unsigned), 1, file);
fread(&dest->prefix, sizeof(long unsigned), 1, file);
fread(&dest->size, sizeof(long unsigned), 1, file);
fread(&dest->n, sizeof(unsigned), 1, file);
fread(&dest->seqsize, sizeof(unsigned), 1, file);
fread(&dest->v_index, sizeof(unsigned), 1, file);
fread(&dest->null_index, sizeof(unsigned), 1, file);
fread(&dest->n, sizeof(long unsigned), 1, file);
fread(&dest->v_index, sizeof(long unsigned), 1, file);
fread(&dest->null_index, sizeof(long unsigned), 1, file);
kmersize = dest->kmersize;
mask = 0;
mask = (~mask) >> (sizeof(long unsigned) * sizeof(long unsigned) - (kmersize << 1));
/* check shared memory, and destroy */
size = dest->size;
if((dest->size - 1) == mask) {
if(dest->v_index <= U_LIMIT) {
size *= sizeof(unsigned);
} else {
size *= sizeof(long unsigned);
}
} else {
if(dest->n <= U_LIMIT) {
size *= sizeof(unsigned);
} else {
size *= sizeof(long unsigned);
}
}
key = ftok(filename, 'e');
shmid = shmget(key, dest->size * sizeof(unsigned), 0666);
shmid = shmget(key, size, 0666);
if(shmid >= 0) {
shmctl(shmid, IPC_RMID, NULL);
}
mask = 0;
mask = (~mask) >> (sizeof(long unsigned) * sizeof(long unsigned) - (dest->kmersize << 1));
if((dest->size - 1) == mask) {
key = ftok(filename, 'v');
shmid = shmget(key, dest->v_index * sizeof(unsigned), 0666);
if(shmid >= 0) {
shmctl(shmid, IPC_RMID, NULL);
}
/* values */
size = dest->v_index;
if(DB_size < HU_LIMIT) {
size *= sizeof(short unsigned);
} else {
key = ftok(filename, 's');
shmid = shmget(key, dest->seqsize * sizeof(long unsigned), 0666);
if(shmid >= 0) {
shmctl(shmid, IPC_RMID, NULL);
size *= sizeof(unsigned);
}
key = ftok(filename, 'v');
shmid = shmget(key, dest->v_index * sizeof(unsigned), 0666);
shmid = shmget(key, size, 0666);
if(shmid >= 0) {
shmctl(shmid, IPC_RMID, NULL);
}
/* kmers */
size = dest->n + 1;
if(dest->kmersize <= 16) {
size *= sizeof(unsigned);
} else {
size *= sizeof(long unsigned);
}
key = ftok(filename, 'k');
shmid = shmget(key, (dest->n + 1) * sizeof(unsigned), 0666);
shmid = shmget(key, size, 0666);
if(shmid >= 0) {
shmctl(shmid, IPC_RMID, NULL);
}
/* value indexes */
size = dest->n;
if(dest->v_index < U_LIMIT) {
size *= sizeof(unsigned);
} else {
size *= sizeof(long unsigned);
}
key = ftok(filename, 'i');
shmid = shmget(key, dest->n * sizeof(unsigned), 0666);
shmid = shmget(key, size, 0666);
if(shmid >= 0) {
shmctl(shmid, IPC_RMID, NULL);
}
}
}
int * length_setupSHM(FILE *file, const char *filename) {
......@@ -354,7 +403,7 @@ char * name_setupSHM(FILE *file, const char *filename) {
} else {
template_names = shmat(shmid, NULL, 0);
fread(template_names, 1, size, file);
for(i = 0; i < size; i++) {
for(i = 0; i < size; ++i) {
if(template_names[i] == '\n') {
template_names[i] = 0;
}
......@@ -395,6 +444,7 @@ void helpMessage(int exeStatus) {
fprintf(helpOut, "#\t-destroy\tDestroy shared DB\t\tFalse\n");
fprintf(helpOut, "#\t-shmLvl\t\tLevel of shared memory\t\t1\n");
fprintf(helpOut, "#\t-shm-h\t\tExplain shm levels\n");
fprintf(helpOut, "#\t-v\t\tVersion\n");
fprintf(helpOut, "#\t-h\t\tShows this help message\n");
fprintf(helpOut, "#\n");
exit(exeStatus);
......@@ -402,7 +452,7 @@ void helpMessage(int exeStatus) {
int main(int argc, char *argv[]) {
int args, file_len, destroy, *template_lengths, *index;
int args, file_len, destroy, status, *template_lengths, *index;
unsigned shmLvl;
long unsigned *seq;
char *templatefilename, *template_names;
......@@ -413,12 +463,13 @@ int main(int argc, char *argv[]) {
templatefilename = 0;
destroy = 0;
shmLvl = 1;
status = 0;
/* PARSE COMMAND LINE OPTIONS */
args = 1;
while(args < argc) {
if(strcmp(argv[args], "-t_db") == 0) {
args++;
++args;
if(args < argc) {
templatefilename = malloc(strlen(argv[args]) + 64);
if(!templatefilename) {
......@@ -430,7 +481,7 @@ int main(int argc, char *argv[]) {
} else if(strcmp(argv[args], "-destroy") == 0) {
destroy = 1;
} else if(strcmp(argv[args], "-shmLvl") == 0) {
args++;
++args;
if(args < argc) {
shmLvl = atoi(argv[args]);
if(!shmLvl) {
......@@ -438,6 +489,9 @@ int main(int argc, char *argv[]) {
exit(0);
}
}
} else if(strcmp(argv[args], "-v") == 0) {
fprintf(stdout, "KMA_SHM-%d.%d.%d\n", version[0], version[1], version[2]);
exit(0);
} else if(strcmp(argv[args], "-h") == 0) {
helpMessage(0);
} else if(strcmp(argv[args], "-shm-h") == 0) {
......@@ -457,7 +511,7 @@ int main(int argc, char *argv[]) {
fprintf(stderr, "# Printing help message:\n");
helpMessage(-1);
}
args++;
++args;
}
if(templatefilename == 0) {
fprintf(stderr, "# Too few arguments handed\n");
......@@ -480,6 +534,7 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
hashMapKMA_destroySHM(templates, file, templatefilename);
fclose(file);
......@@ -493,6 +548,7 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
hashMapKMA_destroySHM(templates, file, templatefilename);
fclose(file);
......@@ -506,6 +562,7 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
length_destroySHM(file, templatefilename);
fclose(file);
......@@ -520,6 +577,7 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
seq_destroySHM(file, templatefilename);
fclose(file);
......@@ -531,6 +589,7 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
index_destroySHM(file, templatefilename);
fclose(file);
......@@ -544,6 +603,7 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
name_destroySHM(file, templatefilename);
fclose(file);
......@@ -557,8 +617,9 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
hashMapKMA_setupSHM(templates, file, templatefilename);
status |= hashMapKMA_setupSHM(templates, file, templatefilename);
hashMap_shm_detach(templates);
fclose(file);
}
......@@ -571,8 +632,9 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
hashMapKMA_setupSHM(templates, file, templatefilename);
status |= hashMapKMA_setupSHM(templates, file, templatefilename);
hashMap_shm_detach(templates);
fclose(file);
}
......@@ -585,10 +647,14 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
template_lengths = length_setupSHM(file, templatefilename);
if(template_lengths)
if(template_lengths) {
shmdt(template_lengths);
} else {
status |= 1;
}
fclose(file);
}
templatefilename[file_len] = 0;
......@@ -601,10 +667,14 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
seq = seq_setupSHM(file, templatefilename);
if(seq)
if(seq) {
shmdt(seq);
} else {
status |= 1;
}
fclose(file);
}
templatefilename[file_len] = 0;
......@@ -614,10 +684,14 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
index = index_setupSHM(file, templatefilename);
if(index)
if(index) {
shmdt(index);
} else {
status |= 1;
}
fclose(file);
}
templatefilename[file_len] = 0;
......@@ -629,10 +703,14 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
status |= errno;
} else {
template_names = name_setupSHM(file, templatefilename);
if(template_names)
if(template_names) {
shmdt(template_names);
} else {
status |= 1;
}
fclose(file);
}
templatefilename[file_len] = 0;
......@@ -655,5 +733,5 @@ int main(int argc, char *argv[]) {
* ipcs -a
*/
return 0;
return status;
}
This diff is collapsed.
/* Philip T.L.C. Clausen Jan 2017 plan@dtu.dk */
/*
* Copyright (c) 2017, Philip Clausen, Technical University of Denmark
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define HU_LIMIT 65535
#define U_LIMIT 4294967295
struct hashMapKMA {
long unsigned size; // size of DB
long unsigned n; // k-mers stored
long unsigned null_index; // null value
long unsigned v_index; // size of values
unsigned kmersize; // k
unsigned prefix_len; // prefix length
long unsigned prefix; // prefix
unsigned *exist; // size long
long unsigned *exist_l; // size long, big DBs
unsigned *values; // compressed values
short unsigned *values_s; // compressed values, few templates
unsigned *key_index; // Relative
long unsigned *key_index_l; // Relative, 16 < k
unsigned *value_index; // Relative
long unsigned *value_index_l; // Relative, big DBs
};
unsigned DB_size, shifter;
/* BASIC FUNCTIONS */
void ERROR() {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
exit(errno);
}
void * smalloc(size_t size) {
void *dest;
dest = malloc(size);
if(!dest) {
ERROR();
}
return dest;
}
FILE * sfopen(char *filename, char *mode) {
FILE *file;
file = fopen(filename, mode);
if(!file) {
fprintf(stderr, "Filename:\t%s\n", filename);
ERROR();
}
return file;
}
long unsigned getKmer(long unsigned *compressor, unsigned cPos) {
unsigned iPos = (cPos & 31) << 1;
cPos >>= 5;
return (iPos <= shifter) ? ((compressor[cPos] << iPos) >> shifter) : (((compressor[cPos] << iPos) | (compressor[cPos + 1] >> (64-iPos))) >> shifter);
}
unsigned convertLength_014to015(char *filename) {
unsigned size, file_len;
int *lengths;
FILE *file;
file_len = strlen(filename);
strcat(filename, ".length.b");
file = sfopen(filename, "rb+");
filename[file_len] = 0;
fread(&size, sizeof(unsigned), 1, file);
lengths = smalloc(3 * size * sizeof(unsigned));
file_len = fread(lengths, sizeof(unsigned), 3 * size, file);
fseek(file, sizeof(unsigned), SEEK_SET);
if(file_len == size) {
file_len = 0;
} else if(file_len == 2 * size) {
fprintf(stderr, "DB is old.\n");
fprintf(stderr, "It will only work for \"-Sparse\" mapping!!!\n");
fwrite(lengths, sizeof(unsigned), size, file);
fwrite(lengths, sizeof(unsigned), 2 * size, file);
file_len = 0;
} else if(file_len == 3 * size) {
fwrite(lengths + 2 * size, sizeof(unsigned), size, file);
fwrite(lengths, sizeof(unsigned), 2 * size, file);
file_len = 1;
} else {
fprintf(stderr, "DB is malformed.\n");
exit(1);
}
fclose(file);
return file_len;
}
int hashMapKMA_014to015(char *filename, unsigned prefix) {
unsigned i, tmp, size, kmersize, seqsize, file_len;
long unsigned mask, *seq;
FILE *file;
struct hashMapKMA *dest;
/* rm filename.b */
file_len = strlen(filename);
strcat(filename, ".b");
remove(filename);
filename[file_len] = 0;
/* load DB */
strcat(filename, ".comp.b");
file = sfopen(filename, "rb");
filename[file_len] = 0;
/* load sizes */
dest = smalloc(sizeof(struct hashMapKMA));
fread(&DB_size, sizeof(unsigned), 1, file);
fread(&dest->kmersize, sizeof(unsigned), 1, file);
fread(&dest->prefix_len, sizeof(unsigned), 1, file);
fread(&dest->prefix, sizeof(long unsigned), 1, file);
fread(&dest->size, sizeof(long unsigned), 1, file);
kmersize = dest->kmersize;
mask = 0;
mask = (~mask) >> (sizeof(long unsigned) * sizeof(long unsigned) - (kmersize << 1));
shifter = sizeof(long unsigned) * sizeof(long unsigned) - (kmersize << 1);
/* load changed size */
fread(&tmp, sizeof(unsigned), 1, file);
dest->n = tmp;
fread(&seqsize, sizeof(unsigned), 1, file); //seq size
fread(&tmp, sizeof(unsigned), 1, file);
dest->v_index = tmp;
fread(&tmp, sizeof(unsigned), 1, file);
dest->null_index = tmp;
/* make checks */
if(dest->size < dest->n || dest->n == 0) {
fprintf(stderr, "DB is not of version 0.14\n");
exit(1);
}
/* load arrays */
dest->exist = smalloc(dest->size * sizeof(unsigned));
if(dest->size != fread(dest->exist, sizeof(unsigned), dest->size, file)) {
return 1;
}
if(mask != (dest->size - 1)) {
seq = smalloc(seqsize * sizeof(long unsigned));
if(seqsize != fread(seq, sizeof(long unsigned), seqsize, file)) {
return 1;
}
} else {
seq = 0;
}
dest->values = smalloc(dest->v_index * sizeof(int));
if(dest->v_index != fread(dest->values, sizeof(int), dest->v_index, file)) {
return 1;
}
if(mask != (dest->size - 1)) {
dest->key_index = smalloc((dest->n + 1) * sizeof(unsigned));
if((dest->n + 1) != fread(dest->key_index, sizeof(unsigned), dest->n + 1, file)) {
return 1;
}
dest->value_index = smalloc(dest->n * sizeof(unsigned));
if(dest->n != fread(dest->value_index, sizeof(unsigned), dest->n, file)) {
return 1;
}
}
/* convert to new format */
/* change prefix if sparse - */
if(prefix && dest->prefix_len == 0) {
dest->prefix = 1;
}
strcat(filename, ".comp.b");
file = sfopen(filename, "wb");
filename[file_len] = 0;
fwrite(&DB_size, sizeof(unsigned), 1, file);
fwrite(&dest->kmersize, sizeof(unsigned), 1, file);
fwrite(&dest->prefix_len, sizeof(unsigned), 1, file);
fwrite(&dest->prefix, sizeof(long unsigned), 1, file);
fwrite(&dest->size, sizeof(long unsigned), 1, file);
fwrite(&dest->n, sizeof(long unsigned), 1, file);
fwrite(&dest->v_index, sizeof(long unsigned), 1, file);
fwrite(&dest->null_index, sizeof(long unsigned), 1, file);
/* exist */
fwrite(dest->exist, sizeof(unsigned), dest->size, file);
/* values */
if(DB_size < HU_LIMIT) {
dest->values_s = (short unsigned *)(dest->values);
for(i = 0; i < dest->v_index; ++i) {
dest->values_s[i] = dest->values[i];
}
size = sizeof(short unsigned);
} else {
size = sizeof(unsigned);
}
fwrite(dest->values, size, dest->v_index, file);
free(dest->values);
if(mask == (dest->size - 1)) {
return 0;
}
if(dest->kmersize <= 16) {
fwrite(dest->key_index, sizeof(unsigned), dest->n + 1, file);
} else {
dest->key_index_l = realloc(dest->key_index, (dest->n + 1) * sizeof(long unsigned));
if(dest->key_index_l) {
dest->key_index = (unsigned *)(dest->key_index_l);
} else {
ERROR();
}
i = dest->n + 1;
while(i--) {
dest->key_index_l[i] = getKmer(seq, dest->key_index[i]);
}
fwrite(dest->key_index_l, sizeof(long unsigned), dest->n + 1, file);
free(seq);
}
free(dest->key_index);
/* value_index */
fwrite(dest->value_index, sizeof(unsigned), dest->n, file);
return 0;
}
int index_014to015(char *filename) {
unsigned prefix, file_len, returner;
FILE *file;
file_len = strlen(filename);
/* change prefix if sparse - */
prefix = convertLength_014to015(filename);
returner = hashMapKMA_014to015(filename, prefix);
/* check for deCon */
strcat(filename, ".decon.b");
file = fopen(filename, "rb");
if(file) {
fclose(file);
/* change filename to: "filename.decon" */
filename[file_len + 6] = 0;
returner += hashMapKMA_014to015(filename, prefix);
}
filename[file_len] = 0;
return returner;
}
void helpMessage(int exeStatus) {
FILE *helpOut;
if(exeStatus == 0) {
helpOut = stdout;
} else {
helpOut = stderr;
}
fprintf(helpOut, "# KMA_update syncronises kma-indexes to the needed version.\n");
fprintf(helpOut, "# Options are:\t\tDesc:\t\t\t\t\tRequirements:\n");
fprintf(helpOut, "#\n");
fprintf(helpOut, "#\t-t_db\t\tTemplate DB\t\t\t\tREQUIRED\n");
fprintf(helpOut, "#\t-v\t\t[XXYY], from version major version XX\n#\t\t\tto major version YY. Use minor version,\n#\t\t\tif major version is 0.\t\t\tREQUIRED\n");
fprintf(helpOut, "#\t-h\t\tShows this help message\n");
fprintf(helpOut, "#\n");
exit(exeStatus);
}
int main(int argc, char *argv[]) {
unsigned args, version;
char *filename, *error;
/* set defaults */
filename = 0;
version = 0;
args = 1;
while(args < argc) {
if(strcmp(argv[args], "-t_db") == 0) {
if(++args < argc) {
filename = smalloc(strlen(argv[args]) + 64);
strcpy(filename, argv[args]);
}
} else if(strcmp(argv[args], "-v") == 0) {
if(++args < argc) {
version = strtoul(argv[args], &error, 10);
if(*error != 0) {
fprintf(stderr, " Invalid version specified.\n");
exit(2);
}
}
} else if(strcmp(argv[args], "-h") == 0) {
helpMessage(0);
} else {
fprintf(stderr, " Invalid option:\t%s\n", argv[args]);
fprintf(stderr, " Printing help message:\n");
helpMessage(1);
}
++args;
}
if(!filename || !version) {
fprintf(stderr, "Insuffient amount of arguments handed!!!\n");
} else if(version == 1415) {
if(index_014to015(filename)) {
fprintf(stderr, "Conversion error.\n");
exit(3);
}
} else {
fprintf(stderr, "Invalid version swifting specified.\n");
fprintf(stderr, "Valid conversions:\n");
fprintf(stderr, "\t%d\t%.2f -> %.2f\n", 1415, 0.14, 0.15);
return 2;
}
return 0;
}
CFLAGS = -std=c99 -w -O3
BINS = kma kma_index kma_shm
CFLAGS = -w -O3
BINS = kma kma_index kma_shm kma_update
all: $(BINS)
......@@ -7,10 +7,13 @@ kma: KMA.c
$(CC) $(CFLAGS) -o $@ $< -lm -lpthread -lz
kma_index: KMA_index.c
$(CC) $(CFLAGS) -o $@ $< -lm
$(CC) $(CFLAGS) -o $@ $< -lm -lz
kma_shm: KMA_SHM.c
$(CC) $(CFLAGS) -o $@ $<
kma_update: KMA_update.c
$(CC) $(CFLAGS) -o $@ $<
clean:
$(RM) $(BINS)
......@@ -17,7 +17,10 @@ not exist. It works for long low quality reads as well, such as those from Nanop
Non-unique matches are resolved using the "ConClave" sorting scheme, and a consensus sequence are outputtet
in addition to other common attributes, based on our users demands.
If you use KMA for your published research, then please cite the KMA paper.
If you use KMA for your published research, then please cite:
Philip T.L.C. Clausen, Frank M. Aarestrup & Ole Lund,
"Rapid and precise alignment of raw reads against redundant databases with KMA",
BMC Bioinformatics, 2018;19:307.
# Usage #
......@@ -65,6 +68,7 @@ Some of the most important options:
-Mt1 Match to only one template in the database.
-ID Minimum identity to output template match.
-apm Paired end method, “p”: Reward if pairing the reads, “u”: unite best template matches in each read if possible, “f” force paired reads to pair.
-1t1 One read to one template, no splicing performed. Well suited for short reads and whole genome mapping.
-bc90 Basecalls should be significantly overrepresented, and have at least 90% agreement.
-bcNano Basecalls optimized for nanopore sequencing.
-mrs minimum alignment score normalized to alignment length.
......@@ -136,7 +140,7 @@ Usage and options are available with the "-h" option on all three programs.
If in doubt, please mail any concerns or problems to: *plan@dtu.dk*.
# Citation #
1. Philip T.L.C. Clausen, Frank M. Aarestrup & Ole Lund, "Rapid and precise alignment of raw reads against redundant databases with KMA", under review.
1. Philip T.L.C. Clausen, Frank M. Aarestrup & Ole Lund, "Rapid and precise alignment of raw reads against redundant databases with KMA", BMC Bioinformatics, 2018;19:307.
# License #
Copyright (c) 2017, Philip Clausen, Technical University of Denmark
......