Skip to content
Commits on Source (7)
......@@ -22,13 +22,14 @@
#define PATHSEP "/"
#endif
static char *Usage = "[-vfd] <path:db|dam> <track:name>";
static char *Usage = "[-vfd] <path:db|dam> <track:name> ...";
int main(int argc, char *argv[])
{ char *prefix;
FILE *aout, *dout;
int nblocks;
int nfiles;
FILE *aout, *dout;
int c;
int VERBOSE;
int FORCE;
int DELETE;
......@@ -52,7 +53,7 @@ int main(int argc, char *argv[])
FORCE = flags['f'];
DELETE = flags['d'];
if (argc != 3)
if (argc < 3)
{ fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
fprintf(stderr,"\n");
fprintf(stderr," -v: verbose\n");
......@@ -62,6 +63,8 @@ int main(int argc, char *argv[])
}
}
// Open DB stub and get number of blocks
{ char *pwd, *root;
int i, plen, index, isdam;
FILE *dstub;
......@@ -103,268 +106,288 @@ int main(int argc, char *argv[])
free(dstub_name);
free(pwd);
free(root);
}
aout = fopen(Catenate(prefix,argv[2],".","anno"),"r");
if (aout != NULL && !FORCE)
{ fprintf(stderr,"%s: Track file %s%s.anno already exists!\n",Prog_Name,prefix,argv[2]);
exit (1);
}
// For each track do
dout = fopen(Catenate(prefix,argv[2],".","data"),"r");
if (dout != NULL && !FORCE)
{ fprintf(stderr,"%s: Track file %s%s.data already exists!\n",Prog_Name,prefix,argv[2]);
exit (1);
}
for (c = 2; c < argc; c++)
{ int nfiles;
int tracktot, tracksiz;
int64 trackoff;
char data[1024];
void *anno;
FILE *lfile = NULL;
DAZZ_EXTRA *extra;
int nextra;
int64 extail;
aout = Fopen(Catenate(prefix,argv[2],".","anno"),"w");
if (aout == NULL)
exit (1);
dout = NULL;
}
{ int tracktot, tracksiz;
int64 trackoff;
char data[1024];
void *anno;
FILE *lfile = NULL;
DAZZ_EXTRA *extra;
int nextra;
int64 extail;
extra = NULL;
anno = NULL;
trackoff = 0;
tracktot = tracksiz = 0;
if (fwrite(&tracktot,sizeof(int),1,aout) != 1)
SYSTEM_WRITE_ERROR
if (fwrite(&tracksiz,sizeof(int),1,aout) != 1)
SYSTEM_WRITE_ERROR
nextra = 0;
nfiles = 0;
while (1)
{ FILE *dfile, *afile;
char *dfile_name, *afile_name;
int i, apos, size, esize, tracklen;
afile_name = Strdup(Numbered_Suffix(prefix,nfiles+1,Catenate(".",argv[2],".","anno")),
"Allocating .anno file name");
dfile_name = Strdup(Numbered_Suffix(prefix,nfiles+1,Catenate(".",argv[2],".","data")),
"Allocating .data file name");
if (afile_name == NULL || dfile_name == NULL)
goto error;
afile = fopen(afile_name,"r");
if (afile == NULL)
break;
dfile = fopen(Numbered_Suffix(prefix,nfiles+1,Catenate(".",argv[2],".","data")),"r");
if (dfile == NULL && errno != ENOENT)
{ fprintf(stderr,"%s: The file %s is corrupted\n",Prog_Name,dfile_name);
goto error;
}
// Open the output .anno for writing, and output header stub
if (nfiles > 0)
fclose(lfile);
lfile = afile;
if (VERBOSE)
{ fprintf(stderr,"\nConstructing %s%s:\n",prefix,argv[c]);
fflush(stderr);
}
if (VERBOSE)
{ fprintf(stderr,"Concatenating %s%d.%s ...\n",prefix,nfiles+1,argv[2]);
fflush(stderr);
}
aout = fopen(Catenate(prefix,argv[c],".","anno"),"r");
if (aout != NULL)
{ if (!FORCE)
{ fprintf(stderr,"%s: Track file %s%s.anno already exists!\n",Prog_Name,prefix,argv[c]);
exit (1);
}
fclose(aout);
}
dout = fopen(Catenate(prefix,argv[c],".","data"),"r");
if (dout != NULL)
{ if (!FORCE)
{ fprintf(stderr,"%s: Track file %s%s.data already exists!\n",Prog_Name,prefix,argv[c]);
exit (1);
}
fclose(dout);
}
aout = Fopen(Catenate(prefix,argv[c],".","anno"),"w");
if (aout == NULL)
exit (1);
dout = NULL;
extra = NULL;
anno = NULL;
trackoff = 0;
tracktot = tracksiz = 0;
if (fwrite(&tracktot,sizeof(int),1,aout) != 1)
SYSTEM_WRITE_ERROR
if (fwrite(&tracksiz,sizeof(int),1,aout) != 1)
SYSTEM_WRITE_ERROR
// OPen and catenate in each block .anno and .data file
nextra = 0;
nfiles = 0;
while (1)
{ FILE *dfile, *afile;
char *dfile_name, *afile_name;
int i, apos, size, esize, tracklen;
FREAD(&tracklen,sizeof(int),1,afile)
FREAD(&size,sizeof(int),1,afile)
if (size == 0)
esize = 8;
else
esize = size;
if (nfiles == 0)
{ tracksiz = size;
if (dfile != NULL)
{ dout = Fopen(Catenate(prefix,argv[2],".","data"),"w");
if (dout == NULL)
goto error;
}
else
{ anno = Malloc(esize,"Allocating annotation record");
if (anno == NULL)
goto error;
}
}
else
{ int escape = 1;
if (tracksiz != size)
{ fprintf(stderr,"%s: Track block %d does not have the same annotation size (%d)",
Prog_Name,nfiles+1,size);
fprintf(stderr," as previous blocks (%d)\n",tracksiz);
}
else if (dfile == NULL && dout != NULL)
fprintf(stderr,"%s: Track block %d does not have data but previous blocks do\n",
Prog_Name,nfiles+1);
else if (dfile != NULL && dout == NULL)
fprintf(stderr,"%s: Track block %d has data but previous blocks do not\n",
Prog_Name,nfiles+1);
else
escape = 0;
if (escape)
afile_name = Strdup(Numbered_Suffix(prefix,nfiles+1,Catenate(".",argv[c],".","anno")),
"Allocating .anno file name");
dfile_name = Strdup(Numbered_Suffix(prefix,nfiles+1,Catenate(".",argv[c],".","data")),
"Allocating .data file name");
if (afile_name == NULL || dfile_name == NULL)
goto error;
afile = fopen(afile_name,"r");
if (afile == NULL)
break;
dfile = fopen(Numbered_Suffix(prefix,nfiles+1,Catenate(".",argv[c],".","data")),"r");
if (dfile == NULL && errno != ENOENT)
{ fprintf(stderr,"%s: The file %s is corrupted\n",Prog_Name,dfile_name);
goto error;
}
}
if (dfile != NULL)
{ int64 dlen;
if (esize == 4)
{ int anno4;
if (nfiles > 0)
fclose(lfile);
lfile = afile;
for (i = 0; i < tracklen; i++)
{ FREAD(&anno4,sizeof(int),1,afile)
anno4 += trackoff;
FWRITE(&anno4,sizeof(int),1,aout)
}
FREAD(&anno4,sizeof(int),1,afile)
dlen = anno4;
}
else
{ int64 anno8;
if (VERBOSE)
{ fprintf(stderr," Concatenating %s%d.%s ...\n",prefix,nfiles+1,argv[c]);
fflush(stderr);
}
FFREAD(&tracklen,sizeof(int),1,afile)
FFREAD(&size,sizeof(int),1,afile)
if (size == 0)
esize = 8;
else
esize = size;
for (i = 0; i < tracklen; i++)
{ FREAD(&anno8,sizeof(int64),1,afile)
anno8 += trackoff;
FWRITE(&anno8,sizeof(int64),1,aout)
}
FREAD(&anno8,sizeof(int64),1,afile)
dlen = anno8;
}
trackoff += dlen;
for (i = 1024; i < dlen; i += 1024)
{ FREAD(data,1024,1,dfile)
FWRITE(data,1024,1,dout)
}
i -= 1024;
if (i < dlen)
{ FREAD(data,dlen-i,1,dfile)
FWRITE(data,dlen-i,1,dout)
}
}
else
{ for (i = 0; i < tracklen; i++)
{ FREAD(anno,esize,1,afile)
FWRITE(anno,esize,1,aout)
}
}
FSEEKO(afile,0,SEEK_END)
FTELLO(apos,afile)
if (dfile != NULL)
extail = apos - (esize*(tracklen+1) + 2*sizeof(int));
else
extail = apos - (esize*tracklen + 2*sizeof(int));
FSEEKO(afile,-extail,SEEK_END)
if (extail >= 20)
{ if (nfiles == 0)
{ nextra = 0;
while (1)
if (Read_Extra(afile,afile_name,NULL))
break;
else
nextra += 1;
extra = (DAZZ_EXTRA *) Malloc(sizeof(DAZZ_EXTRA)*(nextra+1),"Allocating extras");
if (extra == NULL)
goto error;
FSEEKO(afile,-extail,SEEK_END)
for (i = 0; i < nextra; i++)
{ extra[i].nelem = 0;
Read_Extra(afile,afile_name,extra+i);
}
}
else
{ for (i = 0; i < nextra; i++)
if (Read_Extra(afile,afile_name,extra+i))
{ fprintf(stderr,"%s: File %s has fewer extras than previous .anno files\n",
Prog_Name,afile_name);
goto error;
if (nfiles == 0)
{ tracksiz = size;
if (dfile != NULL)
{ dout = Fopen(Catenate(prefix,argv[c],".","data"),"w");
if (dout == NULL)
goto error;
}
else
{ anno = Malloc(esize,"Allocating annotation record");
if (anno == NULL)
goto error;
}
}
else
{ int escape = 1;
if (tracksiz != size)
{ fprintf(stderr,"%s: Track block %d does not have the same annotation size (%d)",
Prog_Name,nfiles+1,size);
fprintf(stderr," as previous blocks (%d)\n",tracksiz);
}
else if (dfile == NULL && dout != NULL)
fprintf(stderr,"%s: Track block %d does not have data but previous blocks do\n",
Prog_Name,nfiles+1);
else if (dfile != NULL && dout == NULL)
fprintf(stderr,"%s: Track block %d has data but previous blocks do not\n",
Prog_Name,nfiles+1);
else
escape = 0;
if (escape)
goto error;
}
if (dfile != NULL)
{ int64 dlen, d;
if (esize == 4)
{ int anno4;
for (i = 0; i < tracklen; i++)
{ FFREAD(&anno4,sizeof(int),1,afile)
anno4 += trackoff;
FFWRITE(&anno4,sizeof(int),1,aout)
}
FFREAD(&anno4,sizeof(int),1,afile)
dlen = anno4;
}
else
{ int64 anno8;
for (i = 0; i < tracklen; i++)
{ FFREAD(&anno8,sizeof(int64),1,afile)
anno8 += trackoff;
FFWRITE(&anno8,sizeof(int64),1,aout)
}
if (Read_Extra(afile,afile_name,extra+nextra) == 0)
{ fprintf(stderr,"%s: File %s has more extras than previous .anno files\n",
Prog_Name,afile_name);
FFREAD(&anno8,sizeof(int64),1,afile)
dlen = anno8;
}
trackoff += dlen;
for (d = 1024; d < dlen; d += 1024)
{ FFREAD(data,1024,1,dfile)
FFWRITE(data,1024,1,dout)
}
d -= 1024;
if (d < dlen)
{ FFREAD(data,dlen-d,1,dfile)
FFWRITE(data,dlen-d,1,dout)
}
}
else
{ for (i = 0; i < tracklen; i++)
{ FFREAD(anno,esize,1,afile)
FFWRITE(anno,esize,1,aout)
}
}
FSEEKO(afile,0,SEEK_END)
FTELLO(apos,afile)
if (dfile != NULL)
extail = apos - (esize*(tracklen+1) + 2*sizeof(int));
else
extail = apos - (esize*tracklen + 2*sizeof(int));
FSEEKO(afile,-extail,SEEK_END)
if (extail >= 20)
{ if (nfiles == 0)
{ nextra = 0;
while (1)
if (Read_Extra(afile,afile_name,NULL))
break;
else
nextra += 1;
extra = (DAZZ_EXTRA *) Malloc(sizeof(DAZZ_EXTRA)*(nextra+1),"Allocating extras");
if (extra == NULL)
goto error;
}
}
}
FSEEKO(afile,-extail,SEEK_END)
tracktot += tracklen;
nfiles += 1;
if (dfile != NULL)
fclose(dfile);
}
for (i = 0; i < nextra; i++)
{ extra[i].nelem = 0;
Read_Extra(afile,afile_name,extra+i);
}
}
if (nfiles == 0)
{ fprintf(stderr,"%s: Couldn't find first track block %s1.%s.anno\n",
Prog_Name,prefix,argv[2]);
goto error;
}
else
{ char *byte;
if (dout != NULL)
{ if (tracksiz == 4)
{ int anno4 = trackoff;
FWRITE(&anno4,sizeof(int),1,aout)
}
else
{ int64 anno8 = trackoff;
FWRITE(&anno8,sizeof(int64),1,aout)
}
}
else
{ for (i = 0; i < nextra; i++)
if (Read_Extra(afile,afile_name,extra+i))
{ fprintf(stderr,"%s: File %s has fewer extras than previous .anno files\n",
Prog_Name,afile_name);
goto error;
}
if (Read_Extra(afile,afile_name,extra+nextra) == 0)
{ fprintf(stderr,"%s: File %s has more extras than previous .anno files\n",
Prog_Name,afile_name);
goto error;
}
}
}
tracktot += tracklen;
nfiles += 1;
if (dfile != NULL)
fclose(dfile);
}
if (nextra == 0)
{ while (fread(&byte,1,1,lfile) == 1)
FWRITE(&byte,1,1,aout)
}
else
{ int i;
for (i = 0; i < nextra; i++)
Write_Extra(aout,extra+i);
}
fclose(lfile);
if (nfiles == 0)
{ fprintf(stderr,"%s: Couldn't find first track block %s1.%s.anno\n",
Prog_Name,prefix,argv[c]);
goto error;
}
else
{ char *byte;
FSEEKO(aout,0,SEEK_SET)
FWRITE(&tracktot,sizeof(int),1,aout)
FWRITE(&tracksiz,sizeof(int),1,aout)
}
}
if (dout != NULL)
{ if (tracksiz == 4)
{ int anno4 = trackoff;
FFWRITE(&anno4,sizeof(int),1,aout)
}
else
{ int64 anno8 = trackoff;
FFWRITE(&anno8,sizeof(int64),1,aout)
}
}
if (nextra == 0)
{ while (fread(&byte,1,1,lfile) == 1)
FFWRITE(&byte,1,1,aout)
}
else
{ int i;
for (i = 0; i < nextra; i++)
Write_Extra(aout,extra+i);
}
fclose(lfile);
if (nfiles != nblocks)
{ fprintf(stderr,"%s: Did not catenate all tracks of DB (nfiles %d != nblocks %d)\n",
Prog_Name, nfiles, nblocks);
goto error;
}
FSEEKO(aout,0,SEEK_SET)
FFWRITE(&tracktot,sizeof(int),1,aout)
FFWRITE(&tracksiz,sizeof(int),1,aout)
}
if (nfiles != nblocks)
{ fprintf(stderr,"%s: Did not catenate all tracks of DB (nfiles %d != nblocks %d)\n",
Prog_Name, nfiles, nblocks);
goto error;
}
FCLOSE(aout);
if (dout != NULL)
FCLOSE(dout);
if (DELETE)
{ int i;
char *name;
for (i = 1; i <= nblocks ;i++)
{ name = Numbered_Suffix(prefix,i,Catenate(".",argv[2],".","anno"));
if (unlink(name) != 0)
fprintf(stderr,"%s: [WARNING] Couldn't delete file %s\n",Prog_Name,name);
if (dout != NULL)
{ name = Numbered_Suffix(prefix,i,Catenate(".",argv[2],".","data"));
FCLOSE(aout);
if (dout != NULL)
FCLOSE(dout);
if (DELETE)
{ int i;
char *name;
for (i = 1; i <= nblocks ;i++)
{ name = Numbered_Suffix(prefix,i,Catenate(".",argv[c],".","anno"));
if (unlink(name) != 0)
fprintf(stderr,"%s: [WARNING] Couldn't delete file %s\n",Prog_Name,name);
if (dout != NULL)
{ name = Numbered_Suffix(prefix,i,Catenate(".",argv[c],".","data"));
if (unlink(name) != 0)
fprintf(stderr,"%s: [WARNING] Couldn't delete file %s\n",Prog_Name,name);
}
}
}
}
free(prefix);
......@@ -374,17 +397,17 @@ error:
{ char *name;
fclose(aout);
name = Catenate(prefix,argv[2],".","anno");
name = Catenate(prefix,argv[c],".","anno");
if (unlink(name) != 0)
fprintf(stderr,"%s: [WARNING] Couldn't delete file %s during abort\n",Prog_Name,name);
if (dout != NULL)
{ fclose(dout);
name = Catenate(prefix,argv[2],".","data");
name = Catenate(prefix,argv[c],".","data");
if (unlink(name) != 0)
fprintf(stderr,"%s: [WARNING] Couldn't delete file %s during abort\n",Prog_Name,name);
}
free(prefix);
}
free(prefix);
exit (1);
}
This diff is collapsed.
This diff is collapsed.
#include <stdlib.h>
#include <stdio.h>
#include "DB.h"
int main(int argc, char *argv[])
{ char code, which;
int64 total;
int len, rno, mno, qual;
int vec[4];
char *buffer[256];
char **mname;
int **masks;
(void) argv;
if (argc > 1)
{ fprintf(stderr,"Usage: DBa2b <(ascii) >(binary)\n");
exit (1);
}
while (scanf(" %c",&code) == 1) // Header lines
if (code == '@' || code == '+')
{ scanf(" %c",&which);
fwrite(&code,sizeof(char),1,stdout);
fwrite(&which,sizeof(char),1,stdout);
if (which == 'T')
{ scanf("%d %lld",&mno,&total);
fwrite(&mno,sizeof(int),1,stdout);
fwrite(&total,sizeof(int64),1,stdout);
if (code == '@')
{ masks[mno] = (int *) malloc(sizeof(int)*2*total);
scanf(" %d",&len);
mname[mno] = (char *) malloc(sizeof(char)*(len+1));
scanf(" %s",mname[mno]);
fwrite(&len,sizeof(int),1,stdout);
fwrite(mname[mno],sizeof(char),len,stdout);
}
}
else
{ scanf(" %lld",&total);
if (which == 'M')
{ masks = (int **) malloc(sizeof(int *)*total);
mname = (char **) malloc(sizeof(char *)*total);
}
else if (code == '@')
buffer[(int) which] = malloc(total+1);
fwrite(&total,sizeof(int64),1,stdout);
}
}
else
{ ungetc(code,stdin);
break;
}
buffer['A'] = buffer['c'] = buffer['d'] =
buffer['i'] = buffer['m'] = buffer['s'] = buffer['S'];
while (scanf(" %c",&code) == 1) // For each data line do
{ fwrite(&code,sizeof(char),1,stdout);
switch (code)
{ case 'R': // Read
scanf(" %d",&rno);
fwrite(&rno,sizeof(int),1,stdout);
break;
case 'Q': // Read
scanf(" %d",&qual);
fwrite(&qual,sizeof(int),1,stdout);
break;
case 'L': // Well, Pulse range
scanf(" %d %d %d",vec,vec+1,vec+2);
fwrite(vec,sizeof(int),3,stdout);
break;
case 'N': // SNR values
scanf(" %d %d %d %d",vec,vec+1,vec+2,vec+3);
fwrite(vec,sizeof(int),4,stdout);
break;
case 'S': case 'A': case 'c': // DNA strings (2-bit compressible)
scanf(" %d",&len);
scanf(" %s",buffer[(int) code]);
if (code == 'A')
Number_Arrow(buffer[(int) code]);
else
Number_Read(buffer[(int) code]);
Compress_Read(len,buffer[(int) code]);
fwrite(&len,sizeof(int),1,stdout);
fwrite(buffer[(int) code],sizeof(char),COMPRESSED_LEN(len),stdout);
break;
case 'H': case 'I': case 'F': // All other string fields
case 'd': case 'i':
case 'm': case 's':
scanf(" %d",&len);
scanf(" %s",buffer[(int) code]);
fwrite(&len,sizeof(int),1,stdout);
fwrite(buffer[(int) code],sizeof(char),len,stdout);
break;
case 'T': // Mask
scanf("%d %d",&mno,&len);
for (int i = 0; i < len; i++)
scanf(" %d %d",masks[mno]+2*i,masks[mno]+2*i+1);
fwrite(&mno,sizeof(int),1,stdout);
fwrite(&len,sizeof(int),1,stdout);
fwrite(masks[mno],sizeof(int),2*len,stdout);
}
}
exit (0);
}
#include <stdlib.h>
#include <stdio.h>
#include "DB.h"
int main(int argc, char *argv[])
{ char code, which;
int64 total;
int len, rno, mno, qual;
int vec[4];
char *buffer[256];
char **mname;
int **masks;
(void) argv;
if (argc > 1)
{ fprintf(stderr,"Usage: DBb2a <(binary) >(ascii)\n");
exit (1);
}
if (fread(&code,sizeof(char),1,stdin) == 0)
code = 0;
while (code == '@' || code == '+')
{ fread(&which,sizeof(char),1,stdin);
printf("%c %c",code,which);
if (which == 'T')
{ fread(&mno,sizeof(int),1,stdin);
fread(&total,sizeof(int64),1,stdin);
printf("%d %lld",mno,total);
if (code == '@')
{ masks[mno] = (int *) malloc(sizeof(int)*2*total);
fread(&len,sizeof(int),1,stdin);
mname[mno] = (char *) malloc(sizeof(char)*(len+1));
fread(mname[mno],sizeof(char),len,stdin);
printf(" %d %.*s",len,len,mname[mno]);
}
printf("\n");
}
else
{ fread(&total,sizeof(int64),1,stdin);
if (which == 'M')
{ masks = (int **) malloc(sizeof(int *)*total);
mname = (char **) malloc(sizeof(char *)*total);
}
else if (code == '@')
buffer[(int) which] = malloc(total+1);
printf(" %lld\n",total);
}
if (fread(&code,sizeof(char),1,stdin) == 0)
code = 0;
}
buffer['A'] = buffer['c'] = buffer['d'] =
buffer['i'] = buffer['m'] = buffer['s'] = buffer['S'];
while (code != 0) // For each data line do
{ switch (code)
{ case 'R': // Read
fread(&rno,sizeof(int),1,stdin);
printf("R %d\n",rno);
break;
case 'Q': // Qual Value
fread(&qual,sizeof(int),1,stdin);
printf("Q %d\n",qual);
break;
case 'L': // Well, Pulse range
fread(vec,sizeof(int),3,stdin);
printf("L %d %d %d\n",vec[0],vec[1],vec[2]);
break;
case 'N': // SNR values
fread(vec,sizeof(int),4,stdin);
printf("N %d %d %d %d\n",vec[0],vec[1],vec[2],vec[3]);
break;
case 'S': case 'A': case 'c': // DNA strings (2-bit compressible)
fread(&len,sizeof(int),1,stdin);
fread(buffer[(int) code],sizeof(char),COMPRESSED_LEN(len),stdin);
Uncompress_Read(len,buffer[(int) code]);
if (code == 'A')
Letter_Arrow(buffer[(int) code]);
else
Lower_Read(buffer[(int) code]);
printf("%c %d %.*s\n",code,len,len,buffer[(int) code]);
break;
case 'H': case 'I': case 'F': // All other string fields
case 'd': case 'i':
case 'm': case 's':
fread(&len,sizeof(int),1,stdin);
fread(buffer[(int) code],sizeof(char),len,stdin);
printf("%c %d %.*s\n",code,len,len,buffer[(int) code]);
break;
case 'T': // Mask
fread(&mno,sizeof(int),1,stdin);
fread(&len,sizeof(int),1,stdin);
fread(masks[mno],sizeof(int),2*len,stdin);
printf("T%d %d",mno,len);
for (int i = 0; i < len; i++)
printf(" %d %d",masks[mno][2*i],masks[mno][2*i+1]);
printf("\n");
}
if (fread(&code,sizeof(char),1,stdin) == 0)
code = 0;
}
exit (0);
}
This diff is collapsed.
......@@ -133,11 +133,11 @@ int main(int argc, char *argv[])
dfile = Fopen(Catenate(pwd,PATHSEP,root,".dust.data"),"w");
if (dfile == NULL || afile == NULL)
exit (1);
FWRITE(&(db->nreads),sizeof(int),1,afile)
FWRITE(&size,sizeof(int),1,afile)
FFWRITE(&(db->nreads),sizeof(int),1,afile)
FFWRITE(&size,sizeof(int),1,afile)
nreads = 0;
indx = 0;
FWRITE(&indx,sizeof(int64),1,afile)
FFWRITE(&indx,sizeof(int64),1,afile)
}
else
{ dfile = Fopen(Catenate(pwd,PATHSEP,root,".dust.data"),"r+");
......@@ -151,8 +151,8 @@ int main(int argc, char *argv[])
exit(0);
}
FSEEKO(afile,0,SEEK_SET)
FWRITE(&(db->nreads),sizeof(int),1,afile)
FWRITE(&size,sizeof(int),1,afile)
FFWRITE(&(db->nreads),sizeof(int),1,afile)
FFWRITE(&size,sizeof(int),1,afile)
FSEEKO(afile,0,SEEK_END)
FSEEKO(dfile,0,SEEK_END)
FTELLO(indx,dfile)
......@@ -435,8 +435,8 @@ int main(int argc, char *argv[])
}
mtop = mask + ntop;
indx += ntop*sizeof(int);
FWRITE(&indx,sizeof(int64),1,afile)
FWRITE(mask1,sizeof(int),ntop,dfile)
FFWRITE(&indx,sizeof(int64),1,afile)
FFWRITE(mask1,sizeof(int),ntop,dfile)
}
#ifdef DEBUG
......
......@@ -226,14 +226,20 @@ int main(int argc, char *argv[])
}
}
// Load QVs if requested
// Load QVs or Arrows if requested
if (DOQVS)
{ if (Load_QVs(db) < 0)
{ if (Open_QVs(db) < 0)
{ fprintf(stderr,"%s: QVs requested, but no .qvs for data base\n",Prog_Name);
exit (1);
}
}
if (DOARR)
{ if (Open_Arrow(db) < 0)
{ fprintf(stderr,"%s: Arrow requested, but no .arr for data base\n",Prog_Name);
exit (1);
}
}
// Check tracks and load tracks for untrimmed DB
......@@ -248,7 +254,7 @@ int main(int argc, char *argv[])
else if (kind != MASK_TRACK)
printf("%s: Warning: %s track is not a mask track.\n",Prog_Name,MASK[i]);
else if (status == 0)
Load_Track(db,MASK[i]);
Open_Track(db,MASK[i]);
else if (status == 1 && !TRIM)
printf("%s: Warning: %s track is for a trimmed db but -u is set.\n",Prog_Name,MASK[i]);
}
......@@ -336,10 +342,8 @@ int main(int argc, char *argv[])
for (i = 0; i < MTOP; i++)
{ status = Check_Track(db,MASK[i],&kind);
if (status < 0)
continue;
else if (status == 1 && kind == MASK_TRACK)
Load_Track(db,MASK[i]);
if (status == 1 && kind == MASK_TRACK)
Open_Track(db,MASK[i]);
}
}
......@@ -432,26 +436,35 @@ int main(int argc, char *argv[])
// range pairs in pts[0..reps) and according to the display options.
{ DAZZ_READ *reads;
DAZZ_TRACK *first;
DAZZ_TRACK *first, *track;
char *read, *arrow, **entry;
int *data[MTOP];
int c, b, e, i;
int hilight, substr;
int map;
int (*iscase)(int);
read = New_Read_Buffer(db);
read = New_Read_Buffer(db);
if (DOQVS)
{ entry = New_QV_Buffer(db);
arrow = NULL;
first = db->tracks->next;
}
else if (DOARR)
{ entry = NULL;
arrow = New_Read_Buffer(db);
first = db->tracks->next;
}
else
{ entry = NULL;
arrow = NULL;
first = db->tracks;
}
if (DOARR)
arrow = New_Read_Buffer(db);
else
arrow = NULL;
c = 0;
for (track = first; track != NULL; track = track->next)
data[c++] = (int *) New_Track_Buffer(track);
if (UPPER == 1)
{ hilight = 'A'-'a';
......@@ -491,7 +504,6 @@ int main(int argc, char *argv[])
int flags, qv;
float snr[4];
DAZZ_READ *r;
DAZZ_TRACK *track;
r = reads + i;
len = r->rlen;
......@@ -549,32 +561,30 @@ int main(int argc, char *argv[])
if (DOARR)
Load_Arrow(db,i,arrow,1);
for (track = first; track != NULL; track = track->next)
{ int64 *anno;
int *data;
int64 s, f, j;
int bd, ed, m;
anno = (int64 *) track->anno;
data = (int *) track->data;
s = (anno[i] >> 2);
f = (anno[i+1] >> 2);
if (s < f)
{ for (j = s; j < f; j += 2)
{ bd = data[j];
ed = data[j+1];
if (DOSEQ)
for (m = bd; m < ed; m++)
if (iscase(read[m]))
read[m] = (char) (read[m] + hilight);
if (j == s)
PRINTF("> %s:",track->name)
PRINTF(" [%d,%d]",bd,ed)
}
PRINTF("\n")
}
}
{ int t;
for (t = 0, track = first; track != NULL; track = track->next, t += 1)
{ int *d;
int64 j, v;
int bd, ed, m;
d = data[t];
v = (Load_Track_Data(track,i,d) >> 2);
if (v > 0)
{ PRINTF("> %s:",track->name)
for (j = 0; j < v; j += 2)
{ bd = d[j];
ed = d[j+1];
if (DOSEQ)
for (m = bd; m < ed; m++)
if (iscase(read[m]))
read[m] = (char) (read[m] + hilight);
PRINTF(" [%d,%d]",bd,ed)
}
PRINTF("\n")
}
}
}
if (QUIVA)
{ int k;
......
......@@ -158,7 +158,7 @@ int main(int argc, char *argv[])
for (i = 0; i < nfiles; i++)
FGETS(buffer,2*MAX_NAME+100,dbfile)
FREAD(&dbs,sizeof(DAZZ_DB),1,ixfile)
FFREAD(&dbs,sizeof(DAZZ_DB),1,ixfile)
if (dbs.cutoff >= 0 && !FORCE)
{ printf("You are about to overwrite the current partition settings. This\n");
......@@ -286,9 +286,9 @@ int main(int argc, char *argv[])
dbs.allarr &= ~DB_ALL;
dbs.treads = treads;
FSEEKO(ixfile,0,SEEK_SET)
FWRITE(&dbs,sizeof(DAZZ_DB),1,ixfile)
FFWRITE(&dbs,sizeof(DAZZ_DB),1,ixfile)
if (SELECT >= 0)
FWRITE(reads,sizeof(DAZZ_READ),nreads,ixfile)
FFWRITE(reads,sizeof(DAZZ_READ),nreads,ixfile)
}
FCLOSE(ixfile)
......
......@@ -105,7 +105,7 @@ int main(int argc, char *argv[])
else if (kind != MASK_TRACK)
fprintf(stderr,"%s: Warning: %s track is not a mask track.\n",Prog_Name,MASK[i]);
else if (status == 0)
Load_Track(db,MASK[i]);
Open_Track(db,MASK[i]);
else if (status == 1 && !TRIM)
fprintf(stderr,"%s: Warning: %s track is for a trimmed db but -u is set.\n",
Prog_Name,MASK[i]);
......@@ -124,7 +124,7 @@ int main(int argc, char *argv[])
if (status < 0)
continue;
else if (status == 1)
Load_Track(db,MASK[i]);
Open_Track(db,MASK[i]);
}
}
}
......@@ -267,12 +267,17 @@ int main(int argc, char *argv[])
DAZZ_TRACK *track;
for (track = db->tracks; track != NULL; track = track->next)
{ char *data = track->data;
int64 *anno = (int64 *) track->anno;
{ char *data;
int64 *anno;
int *idata, *edata;
int64 ave, dev, btot;
int k, rlen, cum;
Load_All_Track_Data(track);
data = track->data;
anno = (int64 *) track->anno;
totlen = 0;
numint = 0;
maxlen = 0;
......@@ -341,6 +346,8 @@ int main(int argc, char *argv[])
}
}
printf("\n");
Close_Track(db,track);
}
}
......
......@@ -139,7 +139,7 @@ int main(int argc, char *argv[])
for (i = 0; i < nfiles; i++)
FGETS(buffer,2*MAX_NAME+100,dbfile)
FREAD(&dbs,sizeof(DAZZ_DB),1,ixfile)
FFREAD(&dbs,sizeof(DAZZ_DB),1,ixfile)
if (dbs.cutoff >= 0)
{ if (!FORCE)
......@@ -248,9 +248,9 @@ int main(int argc, char *argv[])
dbs.allarr &= ~DB_ALL;
dbs.treads = t;
FSEEKO(ixfile,0,SEEK_SET)
FWRITE(&dbs,sizeof(DAZZ_DB),1,ixfile)
FFWRITE(&dbs,sizeof(DAZZ_DB),1,ixfile)
if (SELECT >= 0)
FWRITE(reads,sizeof(DAZZ_READ),nreads,ixfile)
FFWRITE(reads,sizeof(DAZZ_READ),nreads,ixfile)
}
FCLOSE(ixfile)
......
......@@ -89,8 +89,8 @@ int main(int argc, char *argv[])
if (index_name == NULL || index == NULL)
exit (1);
FWRITE(&db,sizeof(DAZZ_DB),1,index)
FWRITE(db.reads,sizeof(DAZZ_READ),db.nreads,index)
FFWRITE(&db,sizeof(DAZZ_DB),1,index)
FFWRITE(db.reads,sizeof(DAZZ_READ),db.nreads,index)
FCLOSE(index);
}
......
......@@ -3,7 +3,7 @@ DEST_DIR = ~/bin
CFLAGS = -O3 -Wall -Wextra -Wno-unused-result -fno-strict-aliasing
ALL = fasta2DB DB2fasta quiva2DB DB2quiva DBsplit DBdust Catrack DBshow DBstats DBrm DBmv \
simulator fasta2DAM DAM2fasta DBdump rangen arrow2DB DB2arrow DBwipe DBtrim
simulator fasta2DAM DAM2fasta DBdump rangen arrow2DB DB2arrow DBwipe DBtrim DBa2b DBb2a
all: $(ALL)
......@@ -67,6 +67,11 @@ DAM2fasta: DAM2fasta.c DB.c DB.h QV.c QV.h
DBwipe: DBwipe.c DB.c DB.h QV.c QV.h
gcc $(CFLAGS) -o DBwipe DBwipe.c DB.c QV.c -lm
DBa2b: DBa2b.c DB.c DB.h QV.c QV.h
gcc $(CFLAGS) -o DBa2b DBa2b.c DB.c QV.c -lm
DBb2a: DBb2a.c DB.c DB.h QV.c QV.h
gcc $(CFLAGS) -o DBb2a DBb2a.c DB.c QV.c -lm
clean:
rm -f $(ALL)
......
......@@ -2,6 +2,7 @@
## _Author: Gene Myers_
## _First: July 17, 2013_
## _Current: April 19, 2019_
For typeset documentation, examples of use, and design philosophy please go to
my [blog](https://dazzlerblog.wordpress.com/command-guides/dazz_db-command-guide).
......@@ -157,7 +158,9 @@ in a given named input (i.e. all sources other than -i without a name) can only
added consecutively to the DB (this is checked by the command). The .fasta headers must
be in the "Pacbio" format (i.e. the output of the Pacbio tools or our dextract program)
and the well, pulse interval, and read quality are extracted from the header and kept
with each read record. If the files are being added to an existing database, and the
with each read record. The headers may now also be those Pacbio outputs for CCS data
wherein the pulse range is replaced by the string "ccs" and in which case only the
well number is recorded. If the files are being added to an existing database, and the
partition settings of the DB have already been set (see DBsplit below), then the
partitioning of the database is updated to include the new data. A file may contain
the data from multiple SMRT cells provided the reads for each SMRT cell are consecutive
......@@ -314,11 +317,12 @@ This permits job parallelism in block-sized chunks, and the resulting sequence o
block tracks can then be merged into a track for the entire untrimmed DB with Catrack.
```
12. Catrack [-vfd] <path:db|dam> <track:name>
12. Catrack [-vfd] <path:db|dam> <track:name> ...
```
Find all block tracks of the form .\<path\>.#.\<track\>... and concatenate them into a single
track, .\<path\>.\<track\>..., for the given DB or DAM. The block track files must all
track, .\<path\>.\<track\>..., for the given DB or DAM. Do so for each track name present on
the command line. The block track files must all
encode the same kind of track data (this is checked), and the files must exist for
block 1, 2, 3, ... up to the last block number. If the -f option is set, then the
concatenation takes place regardless of whether or not the single, combined track
......@@ -365,7 +369,7 @@ fasta2DB, quiva2D, and arrow2DB, giving one a simple way to make a DB of a subse
the reads for testing purposes.
```
14. DBdump [-rhsaqip] [-uU] [-m<mask>]+
14. DBdump [-rhsaqif] [-uU] [-m<mask>]+
<path:db|dam> [ <reads:FILE> | <reads:range> ... ]
```
......@@ -390,7 +394,7 @@ the 4 SNR channel values on an N-line,
* -i requests that the intrinsic quality values be output on an I-line.
* -p requests the repeat profile be output (if available) on a P-line, on a P-line
* -f requests the source file name is output just before the first read data in the file on a F-line.
* -m\<track\> requests that mask \<track\> be output on a T-line.
......@@ -404,9 +408,8 @@ separated by a single blank space. Strings are output as first an integer givin
length of the string, a blank space, and then the string terminated by a new-line.
Intrinsic quality values are between 0 and 50, inclusive, and a vector of said are
displayed as an alphabetic string where 'a' is 0, 'b' is '1', ... 'z' is 25, 'A' is
26, 'B' is 27, ... and 'Y' is 50. Repeat profiles are also displayed as string where
'_' denotes 0 repetitions, and then 'a' through 'N' denote the values 1 through 40,
respectively. The set of all possible lines is as follows:
26, 'B' is 27, ... and 'Y' is 50.
The set of all possible lines is as follows:
```
R # - read number
......@@ -418,28 +421,38 @@ respectively. The set of all possible lines is as follows:
S # string - sequence string
A # string - arrow pulse-width string
I # string - intrinsic quality vector (as an ASCII string)
P # string - repeat profile vector (as an ASCII string)
F # string - name of source file of following data
d # string - Quiva deletion values (as an ASCII string)
c # string - Quiva deletion character string
i # string - Quiva insertion value string
m # string - Quiva merge value string
s # string - Quiva substitution value string
+ X # - Total amount of X (X = H or S or I or P or R or M or T#)
@ X # - Maximum amount of X (X = H or S or I or P or T#)
+ X # - Total amount of X (X = H or S or I or F or R or M or T#)
@ X # - Maximum amount of X (X = H or S or I or F or T#)
```
1-code lines that begin with + or @ are always the first lines in the output. They
give size information about what is contained in the output. That is '+ X #' gives
the number of reads (X=R), the number of masks (X=M), or the total number of
characters in all headers (X=H), sequences (X=S), intrinsic quality vectors (X=I),
read profile vector (X=P), or track (X=T#). And '@ X #' gives the maximum number of
characters in any header (X=H), sequence (X=S), intrincic quality vector (X=I), read
profile vector (X=P), or track (X=T#). The size numbers for the Quiva strings and
file names (X=F), or track (X=T#). And '@ X #' gives the maximum number of
characters in any header (X=H), sequence (X=S), intrincic quality vector (X=I),
names (X=F), or track (X=T#). The size numbers for the Quiva strings and
Arrow pulse width strings are identical to that for the sequence as they are all of
the same length for any given entry.
```
15. DBstats [-nu] [-b<int(1000)] [-m<mask>]+ <path:db|dam>
15a. DBa2b
15b. DBb2a
```
Pipes (stdin to stdout) that convert an ASCII output produced by DBdump into a compressed
binary representation (DBa2b) and vice verse (DBb2a). The idea is to save disk space by
keeping the dumps in a more compessed format.
```
16. DBstats [-nu] [-b<int(1000)] [-m<mask>]+ <path:db|dam>
```
Show overview statistics for all the reads in the trimmed data base \<path\>.db or
......@@ -451,7 +464,7 @@ intervals along the read can be specified with the -m option in which case a sum
and a histogram of the interval lengths is displayed.
```
16. DBrm [-v] <path:db|dam> ...
17. DBrm [-v] <path:db|dam> ...
```
Delete all the files for the given data bases. Do not use rm to remove a database, as
......@@ -460,14 +473,14 @@ files, and all of these are removed by DBrm.
If the -v option is set then every file deleted is listed.
```
17. DBmv [-v] <old:db|dam> <new:db|dam>
18. DBmv [-v] <old:db|dam> <new:db|dam>
```
Rename all the files for the data base old to use the new root.
If the -v option is set then every file move is displayed.
```
18. DBwipe <path:db|dam> ...
19. DBwipe <path:db|dam>
```
Delete any Arrow or Quiver data from the given databases. This removes the .arw or
......@@ -475,7 +488,7 @@ Delete any Arrow or Quiver data from the given databases. This removes the .arw
or Quiver. Basically, converts an A-DB or Q-DB back to a simple S-DB.
```
19. simulator <genome:dam> [-CU] [-m<int(10000)>] [-s<int(2000)>] [-e<double(.15)]
20. simulator <genome:dam> [-CU] [-m<int(10000)>] [-s<int(2000)>] [-e<double(.15)]
[-c<double(50.)>] [-f<double(.5)>] [-x<int(4000)>]
[-w<int(80)>] [-r<int>] [-M<file>]
```
......@@ -510,7 +523,7 @@ a read is say 's b e' then if b \< e the read is a perturbed copy of s[b,e] in t
forward direction, and a perturbed copy s[e,b] in the reverse direction otherwise.
```
20. rangen <genlen:double> [-U] [-b<double(.5)>] [-w<int(80)>] [-r<int>]
21. rangen <genlen:double> [-U] [-b<double(.5)>] [-w<int(80)>] [-r<int>]
```
Generate a random DNA sequence of length genlen*1Mbp that has an AT-bias of -b.
......
dazzdb (1.0+git20190616.034f1ab-1) unstable; urgency=medium
* Afif removed himself from Uploaders
* Add myself to Uploaders
* New upstream version
* debhelper-compat 12
* Standards-Version: 4.4.0
-- Andreas Tille <tille@debian.org> Tue, 20 Aug 2019 12:37:09 +0200
dazzdb (1.0+git20180908.0bd5e07-1) unstable; urgency=medium
* Team upload.
......
Source: dazzdb
Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.org>
Uploaders: Andreas Tille <tille@debian.org>
Section: science
Priority: optional
Build-Depends: debhelper (>= 11~),
Build-Depends: debhelper-compat (= 12),
zlib1g-dev
Standards-Version: 4.2.1
Standards-Version: 4.4.0
Vcs-Browser: https://salsa.debian.org/med-team/dazzdb
Vcs-Git: https://salsa.debian.org/med-team/dazzdb.git
Homepage: https://github.com/thegenemyers/DAZZ_DB
......
......@@ -2,8 +2,8 @@ Description: Make upstream build system append to CFLAGS
Author: Afif Elghraoui <afif@ghraoui.name>
Forwarded: no
Last-Update: 2015-09-13
--- dazzdb.orig/Makefile
+++ dazzdb/Makefile
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
DEST_DIR = ~/bin
......@@ -11,4 +11,4 @@ Last-Update: 2015-09-13
+CFLAGS += -O3 -Wall -Wextra -Wno-unused-result -fno-strict-aliasing
ALL = fasta2DB DB2fasta quiva2DB DB2quiva DBsplit DBdust Catrack DBshow DBstats DBrm DBmv \
simulator fasta2DAM DAM2fasta DBdump rangen arrow2DB DB2arrow DBwipe DBtrim
simulator fasta2DAM DAM2fasta DBdump rangen arrow2DB DB2arrow DBwipe DBtrim DBa2b DBb2a
......@@ -2,8 +2,8 @@ Description: Honor LDFLAGS in upstream Makefile
Author: Afif Elghraoui <afif@debian.org>
Forwarded: https://github.com/thegenemyers/DAZZ_DB/pull/26
Last-Update: 2017-01-18
--- dazzdb.orig/Makefile
+++ dazzdb/Makefile
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,7 @@
DEST_DIR = ~/bin
......@@ -11,8 +11,8 @@ Last-Update: 2017-01-18
+LDLIBS = -lm
ALL = fasta2DB DB2fasta quiva2DB DB2quiva DBsplit DBdust Catrack DBshow DBstats DBrm DBmv \
simulator fasta2DAM DAM2fasta DBdump rangen arrow2DB DB2arrow DBwipe DBtrim
@@ -8,64 +9,48 @@
simulator fasta2DAM DAM2fasta DBdump rangen arrow2DB DB2arrow DBwipe DBtrim DBa2b DBb2a
@@ -8,64 +9,48 @@ ALL = fasta2DB DB2fasta quiva2DB DB2quiv
all: $(ALL)
fasta2DB: fasta2DB.c DB.c DB.h QV.c QV.h
......@@ -79,5 +79,5 @@ Last-Update: 2017-01-18
DBwipe: DBwipe.c DB.c DB.h QV.c QV.h
- gcc $(CFLAGS) -o DBwipe DBwipe.c DB.c QV.c -lm
clean:
DBa2b: DBa2b.c DB.c DB.h QV.c QV.h
gcc $(CFLAGS) -o DBa2b DBa2b.c DB.c QV.c -lm
......@@ -3,9 +3,9 @@ Description: Set destination directory in upstream Makefile
Author: Afif Elghraoui <afif@ghraoui.name>
Forwarded: no
Last-Update: 2015-09-13
--- dazzdb.orig/Makefile
+++ dazzdb/Makefile
@@ -59,7 +59,8 @@
--- a/Makefile
+++ b/Makefile
@@ -64,7 +64,8 @@ clean:
rm -f dazz.db.tar.gz
install:
......