Skip to content
Commits on Source (9)
......@@ -131,6 +131,7 @@ int main(int argc, char *argv[])
int nextra;
int64 extail;
extra = NULL;
anno = NULL;
trackoff = 0;
tracktot = tracksiz = 0;
......@@ -144,7 +145,7 @@ int main(int argc, char *argv[])
while (1)
{ FILE *dfile, *afile;
char *dfile_name, *afile_name;
int i, size, esize, tracklen;
int i, apos, size, esize, tracklen;
afile_name = Strdup(Numbered_Suffix(prefix,nfiles+1,Catenate(".",argv[2],".","anno")),
"Allocating .anno file name");
......@@ -256,10 +257,11 @@ int main(int argc, char *argv[])
}
FSEEKO(afile,0,SEEK_END)
FTELLO(apos,afile)
if (dfile != NULL)
extail = FTELLO(afile) - (esize*(tracklen+1) + 2*sizeof(int));
extail = apos - (esize*(tracklen+1) + 2*sizeof(int));
else
extail = FTELLO(afile) - (esize*tracklen + 2*sizeof(int));
extail = apos - (esize*tracklen + 2*sizeof(int));
FSEEKO(afile,-extail,SEEK_END)
if (extail >= 20)
......@@ -339,16 +341,16 @@ int main(int argc, char *argv[])
}
}
FCLOSE(aout);
if (dout != NULL)
FCLOSE(dout);
if (nfiles != nblocks)
{ fprintf(stderr,"%s: Did not catenate all tracks of DB (nfiles %d != nblocks %d)\n",
Prog_Name, nfiles, nblocks);
goto error;
}
FCLOSE(aout);
if (dout != NULL)
FCLOSE(dout);
if (DELETE)
{ int i;
char *name;
......
......@@ -17,6 +17,8 @@
#include <ctype.h>
#include <unistd.h>
#include <dirent.h>
#include <limits.h>
#include <sys/stat.h>
#include "DB.h"
......@@ -599,8 +601,8 @@ error:
// for the retained reads.
void Trim_DB(DAZZ_DB *db)
{ int i, j, r;
int allflag, cutoff;
{ int i, j, r, f;
int allflag, cutoff, css;
int64 totlen;
int maxlen, nreads;
DAZZ_TRACK *record;
......@@ -676,14 +678,23 @@ void Trim_DB(DAZZ_DB *db)
record->anno = Realloc(record->anno,record->size*(j+1),NULL);
}
css = 0;
totlen = maxlen = 0;
for (j = i = 0; i < nreads; i++)
{ r = reads[i].rlen;
if ((reads[i].flags & DB_BEST) >= allflag && r >= cutoff)
{ f = reads[i].flags;
if ((f & DB_CSS) == 0)
css = 0;
r = reads[i].rlen;
if ((f & DB_BEST) >= allflag && r >= cutoff)
{ totlen += r;
if (r > maxlen)
maxlen = r;
reads[j++] = reads[i];
reads[j] = reads[i];
if (css)
reads[j++].flags |= DB_CSS;
else
reads[j++].flags &= ~DB_CSS;
css = 1;
}
}
......@@ -1989,3 +2000,196 @@ void Print_Read(char *s, int width)
printf("\n");
}
}
/*******************************************************************************************
*
* COMMAND LINE BLOCK PARSER
* Take a command line argument and interpret the '@' block number ranges.
* Parse_Block_Arg produces an Block_Looper iterator object that can then
* be invoked multiple times to iterate through all the files implied by
* the @ pattern/range.
*
********************************************************************************************/
typedef struct
{ int first, last, next;
char *root, *pwd, *ppnt;
char *slice;
} _Block_Looper;
// Advance the iterator e_parse to the next file, open it, and return the file pointer
// to it. Return NULL if at the end of the list of files.
int Next_Block_Exists(Block_Looper *e_parse)
{ _Block_Looper *parse = (_Block_Looper *) e_parse;
char *disp;
struct stat sts;
if (parse->next+1 > parse->last)
return (0);
if (parse->next < 0)
disp = parse->root;
else
disp = Numbered_Suffix(parse->root,parse->next+1,parse->ppnt);
if (stat(Catenate(parse->pwd,"/",disp,".las"),&sts))
return (0);
else
return (1);
}
FILE *Next_Block_Arg(Block_Looper *e_parse)
{ _Block_Looper *parse = (_Block_Looper *) e_parse;
char *disp;
FILE *input;
parse->next += 1;
if (parse->next > parse->last)
return (NULL);
if (parse->next < 0)
disp = parse->root;
else
disp = Numbered_Suffix(parse->root,parse->next,parse->ppnt);
if ((input = fopen(Catenate(parse->pwd,"/",disp,".las"),"r")) == NULL)
{ if (parse->last != INT_MAX)
{ fprintf(stderr,"%s: %s.las is not present\n",Prog_Name,disp);
exit (1);
}
return (NULL);
}
return (input);
}
// Reset the iterator e_parse to the first file
void Reset_Block_Arg(Block_Looper *e_parse)
{ _Block_Looper *parse = (_Block_Looper *) e_parse;
parse->next = parse->first - 1;
}
// Return a pointer to the path for the current file
char *Block_Arg_Path(Block_Looper *e_parse)
{ _Block_Looper *parse = (_Block_Looper *) e_parse;
return (parse->pwd);
}
// Return a pointer to the root name for the current file
char *Block_Arg_Root(Block_Looper *e_parse)
{ _Block_Looper *parse = (_Block_Looper *) e_parse;
if (parse->next < 0)
return (parse->root);
else
return (Numbered_Suffix(parse->root,parse->next,parse->ppnt));
}
// Free the iterator
void Free_Block_Arg(Block_Looper *e_parse)
{ _Block_Looper *parse = (_Block_Looper *) e_parse;
free(parse->root);
free(parse->pwd);
free(parse->slice);
free(parse);
}
char *Next_Block_Slice(Block_Looper *e_parse, int slice)
{ _Block_Looper *parse = (_Block_Looper *) e_parse;
if (parse->slice == NULL)
{ int size = strlen(parse->pwd) + strlen(Block_Arg_Root(parse)) + 30;
parse->slice = (char *) Malloc(size,"Block argument slice");
if (parse->slice == NULL)
exit (1);
}
if (parse->first < 0)
sprintf(parse->slice,"%s/%s",parse->pwd,parse->root);
else
sprintf(parse->slice,"%s/%s%c%d-%d%s",parse->pwd,parse->root,BLOCK_SYMBOL,parse->next+1,
parse->next+slice,parse->ppnt);
parse->next += slice;
return (parse->slice);
}
// Parse the command line argument and return an iterator to move through the
// file names, setting it up to report the first file.
Block_Looper *Parse_Block_Arg(char *arg)
{ _Block_Looper *parse;
char *pwd, *root;
char *ppnt, *cpnt;
int first, last;
parse = (_Block_Looper *) Malloc(sizeof(_Block_Looper),"Allocating parse node");
pwd = PathTo(arg);
root = Root(arg,".las");
if (parse == NULL || pwd == NULL || root == NULL)
exit (1);
ppnt = index(root,BLOCK_SYMBOL);
if (ppnt == NULL)
first = last = -1;
else
{ if (index(ppnt+1,BLOCK_SYMBOL) != NULL)
{ fprintf(stderr,"%s: Two or more occurences of %c-sign in source name '%s'\n",
Prog_Name,BLOCK_SYMBOL,root);
exit (1);
}
*ppnt++ = '\0';
first = strtol(ppnt,&cpnt,10);
if (cpnt == ppnt)
{ first = 1;
last = INT_MAX;
}
else
{ if (first < 0)
{ fprintf(stderr,
"%s: Integer following %c-sigan is less than 0 in source name '%s'\n",
Prog_Name,BLOCK_SYMBOL,root);
exit (1);
}
if (*cpnt == '-')
{ ppnt = cpnt+1;
last = strtol(ppnt,&cpnt,10);
if (cpnt == ppnt)
{ fprintf(stderr,"%s: Second integer must follow - in source name '%s'\n",
Prog_Name,root);
exit (1);
}
if (last < first)
{ fprintf(stderr,
"%s: 2nd integer is less than 1st integer in source name '%s'\n",
Prog_Name,root);
exit (1);
}
ppnt = cpnt;
}
else
{ last = INT_MAX;
ppnt = cpnt;
}
}
}
parse->pwd = pwd;
parse->root = root;
parse->ppnt = ppnt;
parse->first = first;
parse->last = last;
parse->next = first-1;
parse->slice = NULL;
return ((Block_Looper *) parse);
}
......@@ -59,6 +59,8 @@ typedef signed long long int64;
typedef float float32;
typedef double float64;
#define LAST_READ_SYMBOL '$'
#define BLOCK_SYMBOL '@'
/*******************************************************************************************
*
......@@ -215,12 +217,11 @@ int Count_Args(char *arg);
SYSTEM_READ_ERROR \
}
#define FTELLO(file) \
( { int x = ftello(file); \
if (x < 0) \
#define FTELLO(val,file) \
{ val = ftello(file); \
if (val < 0) \
SYSTEM_READ_ERROR \
; x; \
} )
}
/*******************************************************************************************
*
......@@ -567,4 +568,25 @@ int Read_All_Sequences(DAZZ_DB *db, int ascii);
int List_DB_Files(char *path, void actor(char *path, char *extension));
// Take a command line argument and interpret the '@' block number ranges.
// Parse_Block_Arg produces a Block_Looper iterator object that can then
// be invoked multiple times to iterate through all the files implied by
// the @ pattern/range. Next_Block_Slice returns a string encoing the next
// slice files represented by an @-notation, and advances the iterator by
// that many files.
typedef void Block_Looper;
Block_Looper *Parse_Block_Arg(char *arg);
FILE *Next_Block_Arg(Block_Looper *e_parse);
int Next_Block_Exists(Block_Looper *e_parse);
char *Next_Block_Slice(Block_Looper *e_parse,int slice);
void Reset_Block_Arg(Block_Looper *e_parse); // Reset iterator to first file
char *Block_Arg_Path(Block_Looper *e_parse); // Path of current file
char *Block_Arg_Root(Block_Looper *e_parse); // Root name of current file
void Free_Block_Arg(Block_Looper *e_parse); // Free the iterator
#endif // _DAZZ_DB
......@@ -94,6 +94,7 @@ static int prof_map[41] =
int main(int argc, char *argv[])
{ DAZZ_DB _db, *db = &_db;
int Quiva_DB, Arrow_DB;
int FirstRead;
FILE *hdrs = NULL;
char *hdrs_name = NULL;
int64 *qv_idx = NULL;
......@@ -377,7 +378,10 @@ int main(int argc, char *argv[])
else if (status == 1)
MTRACK[i] = Load_Track(db,MASK[i]);
}
FirstRead = db->tfirst;
}
else
FirstRead = db->ufirst;
if (DOIQV)
{ int status, kind;
......@@ -713,7 +717,7 @@ int main(int argc, char *argv[])
r = reads + i;
len = r->rlen;
if (DORED)
printf("R %d\n",i+1);
printf("R %d\n",FirstRead + (i+1));
flags = r->flags;
qv = (flags & DB_QV);
......
......@@ -155,7 +155,7 @@ int main(int argc, char *argv[])
FWRITE(&size,sizeof(int),1,afile)
FSEEKO(afile,0,SEEK_END)
FSEEKO(dfile,0,SEEK_END)
indx = FTELLO(dfile);
FTELLO(indx,dfile)
}
free(pwd);
......
......@@ -175,8 +175,8 @@ int main(int argc, char *argv[])
fprintf(stderr," -n: Do not show the default read DNA sequences.\n");
fprintf(stderr," -m: Show mask intervals and highlight in sequence.\n");
fprintf(stderr,"\n");
fprintf(stderr," -Q: Produce a .quiva file (ignore all other options but -uU.\n");
fprintf(stderr," -A: Produce a .arrow file (ignore all other options but -uw.\n");
fprintf(stderr," -Q: Produce a .quiva file (ignore all other options but -uU).\n");
fprintf(stderr," -A: Produce a .arrow file (ignore all other options but -uw).\n");
fprintf(stderr,"\n");
fprintf(stderr," -U: Use upper case for DNA (default is lower case).\n");
fprintf(stderr," -w: Print -w bp per line (default is 80).\n");
......@@ -349,14 +349,13 @@ int main(int argc, char *argv[])
if (argc == 3)
{ if (argv[2][0] != LAST_READ_SYMBOL || argv[2][1] != '\0')
{ char *eptr, *fptr;
int b, e;
b = strtol(argv[2],&eptr,10);
if (eptr > argv[2] && b > 0)
strtol(argv[2],&eptr,10);
if (eptr > argv[2])
{ if (*eptr == '-')
{ if (eptr[1] != LAST_READ_SYMBOL || eptr[2] != '\0')
{ e = strtol(eptr+1,&fptr,10);
input_pts = (fptr <= eptr+1 || *fptr != '\0' || e <= 0);
{ strtol(eptr+1,&fptr,10);
input_pts = (fptr <= eptr+1 || *fptr != '\0');
}
}
else
......
......@@ -32,7 +32,15 @@
#define PATHSEP "/"
#endif
static char *Usage = "[-af] [-x<int>] [-s<double(200.)>] <path:db|dam>";
static char *Usage = "[-aflm] [-x<int>] [-s<double(200.)>] <path:db|dam>";
static DAZZ_READ *reads;
static int MSORT(const void *l, const void *r)
{ int x = *((int *) l);
int y = *((int *) r);
return (reads[x].rlen - reads[y].rlen);
}
int main(int argc, char *argv[])
{ DAZZ_DB db, dbs;
......@@ -40,11 +48,16 @@ int main(int argc, char *argv[])
FILE *dbfile, *ixfile;
char *dbfile_name, *ixfile_name;
int status;
int nreads;
int medmax = 0;
int *msort = NULL;
int FORCE;
int ALL;
int CUTOFF;
int64 SIZE;
int SELECT;
{ int i, j, k;
int flags[128];
......@@ -53,6 +66,7 @@ int main(int argc, char *argv[])
ARG_INIT("DBsplit")
SELECT = -1;
CUTOFF = 0;
size = 200;
......@@ -61,7 +75,7 @@ int main(int argc, char *argv[])
if (argv[i][0] == '-')
switch (argv[i][1])
{ default:
ARG_FLAGS("af")
ARG_FLAGS("aflm")
break;
case 'x':
ARG_NON_NEGATIVE(CUTOFF,"Min read length cutoff")
......@@ -82,6 +96,16 @@ int main(int argc, char *argv[])
ALL = flags['a'];
FORCE = flags['f'];
if (flags['l'])
{ SELECT = 0;
if (flags['m'])
{ fprintf(stderr,"%s: Cannot set both -l and -m, mutually exclusive\n",Prog_Name);
exit (1);
}
}
else if (flags['m'])
SELECT = 1;
if (argc != 2)
{ fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
fprintf(stderr,"\n");
......@@ -89,6 +113,9 @@ int main(int argc, char *argv[])
fprintf(stderr," -x: Trimmed DB has reads >= this threshold.\n");
fprintf(stderr," -a: Trimmed DB contains all reads from a well (not just longest).\n");
fprintf(stderr," -f: Force the split to occur even if already split.\n");
fprintf(stderr,"\n");
fprintf(stderr," -l: Set primary read for a well to be the longest.\n");
fprintf(stderr," -m: Set primary read for a well to be the median.\n");
exit (1);
}
}
......@@ -102,6 +129,8 @@ int main(int argc, char *argv[])
{ fprintf(stderr,"%s: Cannot be called on a block: %s\n",Prog_Name,argv[1]);
exit (1);
}
reads = db.reads;
nreads = db.ureads;
{ char *pwd, *root;
char buffer[2*MAX_NAME+100];
......@@ -146,18 +175,59 @@ int main(int argc, char *argv[])
}
}
dbpos = FTELLO(dbfile);
FTELLO(dbpos,dbfile)
FSEEKO(dbfile,dbpos,SEEK_SET)
FPRINTF(dbfile,DB_NBLOCK,0)
FPRINTF(dbfile,DB_PARAMS,SIZE,CUTOFF,ALL)
}
{ DAZZ_READ *reads = db.reads;
int nreads = db.ureads;
int64 totlen;
if (SELECT >= 0)
{ int i, j, k, h;
int off;
off = (~ DB_BEST);
for (i = 0; i < nreads; i = j)
{ j = i+1;
reads[i].flags &= off;
while ((reads[j].flags & DB_CSS) != 0)
reads[j++].flags &= off;
if (j-i <= 1)
{ reads[i].flags |= DB_BEST;
continue;
}
if (SELECT == 0)
{ int mlen;
mlen = reads[h = i].rlen;
for (k = i+1; k < j; k++)
if (mlen < reads[k].rlen)
mlen = reads[h = k].rlen;
}
else
{ if (j-i > medmax)
{ medmax = 1.2*(j-i) + 1000;
msort = (int *) Realloc(msort,sizeof(int)*medmax,"Allocating Median Array");
if (msort == NULL)
exit (1);
}
for (k = i; k < j; k++)
msort[k-i] = k;
qsort(msort,j-i,sizeof(int),MSORT);
h = msort[(j-i)/2];
}
reads[h].flags |= DB_BEST;
}
}
{ int64 totlen;
int nblock, ireads, treads, rlen, fno;
int i;
int i, upos, css;
css = 0;
nblock = 0;
totlen = 0;
ireads = 0;
......@@ -166,16 +236,19 @@ int main(int argc, char *argv[])
if (ALL)
for (i = 0; i < nreads; i++)
{ rlen = reads[i].rlen;
if ((reads[i].flags & DB_CSS) == 0)
css = 0;
if (rlen >= CUTOFF)
{ ireads += 1;
treads += 1;
totlen += rlen;
if (totlen >= SIZE)
{ FPRINTF(dbfile,DB_BDATA,i+1,treads)
{ if (css == 0 && totlen >= SIZE)
{ FPRINTF(dbfile,DB_BDATA,i,treads)
totlen = 0;
ireads = 0;
nblock += 1;
}
ireads += 1;
treads += 1;
totlen += rlen;
css = 1;
}
}
else
......@@ -199,7 +272,8 @@ int main(int argc, char *argv[])
nblock += 1;
}
fno = fileno(dbfile);
if (ftruncate(fno,FTELLO(dbfile)) < 0)
FTELLO(upos,dbfile)
if (ftruncate(fno,upos) < 0)
SYSTEM_WRITE_ERROR
FSEEKO(dbfile,dbpos,SEEK_SET)
......@@ -208,9 +282,13 @@ int main(int argc, char *argv[])
dbs.cutoff = CUTOFF;
if (ALL)
dbs.allarr |= DB_ALL;
else
dbs.allarr &= ~DB_ALL;
dbs.treads = treads;
FSEEKO(ixfile,0,SEEK_SET)
FWRITE(&dbs,sizeof(DAZZ_DB),1,ixfile)
if (SELECT >= 0)
FWRITE(reads,sizeof(DAZZ_READ),nreads,ixfile)
}
FCLOSE(ixfile)
......
......@@ -22,7 +22,15 @@
#define PATHSEP "/"
#endif
static char *Usage = "[-af] [-x<int>] <path:db|dam>";
static char *Usage = "[-aflm] [-x<int>] <path:db|dam>";
static DAZZ_READ *reads;
static int MSORT(const void *l, const void *r)
{ int x = *((int *) l);
int y = *((int *) r);
return (reads[x].rlen - reads[y].rlen);
}
int main(int argc, char *argv[])
{ DAZZ_DB db, dbs;
......@@ -31,10 +39,15 @@ int main(int argc, char *argv[])
char *dbfile_name, *ixfile_name;
int nblocks;
int status;
int nreads;
int medmax = 0;
int *msort = NULL;
int FORCE;
int ALL;
int CUTOFF;
int SELECT;
{ int i, j, k;
int flags[128];
......@@ -42,6 +55,7 @@ int main(int argc, char *argv[])
ARG_INIT("DBtrim")
SELECT = -1;
CUTOFF = 0;
j = 1;
......@@ -49,7 +63,7 @@ int main(int argc, char *argv[])
if (argv[i][0] == '-')
switch (argv[i][1])
{ default:
ARG_FLAGS("af")
ARG_FLAGS("aflm")
break;
case 'x':
ARG_NON_NEGATIVE(CUTOFF,"Min read length cutoff")
......@@ -62,12 +76,25 @@ int main(int argc, char *argv[])
ALL = flags['a'];
FORCE = flags['f'];
if (flags['l'])
{ SELECT = 0;
if (flags['m'])
{ fprintf(stderr,"%s: Cannot set both -l and -m, mutually exclusive\n",Prog_Name);
exit (1);
}
}
else if (flags['m'])
SELECT = 1;
if (argc != 2)
{ fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
fprintf(stderr,"\n");
fprintf(stderr," -x: Trimmed DB has reads >= this threshold.\n");
fprintf(stderr," -a: Trimmed DB contains all reads from a well (not just longest).\n");
fprintf(stderr," -f: Force the new trim setting even if already set.\n");
fprintf(stderr,"\n");
fprintf(stderr," -l: Set primary read for a well to be the longest.\n");
fprintf(stderr," -m: Set primary read for a well to be the median.\n");
exit (1);
}
}
......@@ -81,6 +108,8 @@ int main(int argc, char *argv[])
{ fprintf(stderr,"%s: Cannot be called on a block: %s\n",Prog_Name,argv[1]);
exit (1);
}
reads = db.reads;
nreads = db.ureads;
{ char *pwd, *root;
char buffer[2*MAX_NAME+100];
......@@ -135,12 +164,53 @@ int main(int argc, char *argv[])
FSCANF(dbfile,DB_NBLOCK,&nblocks)
dbpos = FTELLO(dbfile);
FTELLO(dbpos,dbfile)
FSCANF(dbfile,DB_PARAMS,&size,&cutoff,&all)
FSEEKO(dbfile,dbpos,SEEK_SET)
FPRINTF(dbfile,DB_PARAMS,size,CUTOFF,ALL)
}
if (SELECT >= 0)
{ int i, j, k, h;
int off;
off = (~ DB_BEST);
for (i = 0; i < nreads; i = j)
{ j = i+1;
reads[i].flags &= off;
while ((reads[j].flags & DB_CSS) != 0)
reads[j++].flags &= off;
if (j-i <= 1)
{ reads[i].flags |= DB_BEST;
continue;
}
if (SELECT == 0)
{ int mlen;
mlen = reads[h = i].rlen;
for (k = i+1; k < j; k++)
if (mlen < reads[k].rlen)
mlen = reads[h = k].rlen;
}
else
{ if (j-i > medmax)
{ medmax = 1.2*(j-i) + 1000;
msort = (int *) Realloc(msort,sizeof(int)*medmax,"Allocating Median Array");
if (msort == NULL)
exit (1);
}
for (k = i; k < j; k++)
msort[k-i] = k;
qsort(msort,j-i,sizeof(int),MSORT);
h = msort[(j-i)/2];
}
reads[h].flags |= DB_BEST;
}
}
{ DAZZ_READ *reads = db.reads;
int uread, tread;
int rlen;
......@@ -150,7 +220,7 @@ int main(int argc, char *argv[])
t = 0;
fprintf(dbfile,DB_BDATA,0,0);
for (b = 0; b < nblocks; b++)
{ dbpos = FTELLO(dbfile);
{ FTELLO(dbpos,dbfile);
FSCANF(dbfile,DB_BDATA,&uread,&tread)
if (ALL)
......@@ -174,9 +244,13 @@ int main(int argc, char *argv[])
dbs.cutoff = CUTOFF;
if (ALL)
dbs.allarr |= DB_ALL;
else
dbs.allarr &= ~DB_ALL;
dbs.treads = t;
FSEEKO(ixfile,0,SEEK_SET)
FWRITE(&dbs,sizeof(DAZZ_DB),1,ixfile)
if (SELECT >= 0)
FWRITE(reads,sizeof(DAZZ_READ),nreads,ixfile)
}
FCLOSE(ixfile)
......
......@@ -116,8 +116,9 @@ refer to a database we are referring to either a DB or a DAM.
The command DBsplit sets or resets the current partition for a database which is
determined by 3 parameters: (i) the total number of basepairs to place in each block,
(ii) the minimum read length of reads to include within a block, and (iii) whether or
not to only include the longest read from a given well or all reads from a well (NB:
several reads of the same insert in a given well can be produced by the Pacbio
not to only include the longest (-l) or median (-m) read from a given well or all
reads from a well (-a) (NB: several reads of the same insert in a given well can be
produced by the Pacbio
instrument). Note that the length and uniqueness parameters effectively select a
subset of the reads that contribute to the size of a block. We call this subset the
*trimmed* data base. Some commands operate on the entire database, others on the
......@@ -259,7 +260,7 @@ should be used, and the characters per line, or line width, can be set to any po
value with the -w option.
```
9. DBsplit [-af] [-x<int>] [-s<double(200.)>] <path:db|dam>
9. DBsplit [-aflm] [-x<int>] [-s<double(200.)>] <path:db|dam>
```
Divide the database \<path\>.db or \<path\>.dam conceptually into a series of blocks
......@@ -278,6 +279,12 @@ If the -f option is set, the split is forced regardless of whether or not the DB
question has previously bin split, i.e. one is not interactively asked if they wish
to proceed.
By default, the primary read for each well consists of the longest subread of the insert
from the well. By setting the -m parameter, the subread of median length becomes the
primary read instead. One can at any later time change this back to the default longest
by splitting again with the -l parameter set. The setting of the primary reads occurs
regardless of whether the -a parameter is set or not.
```
10. DBtrim [-af] [-x<int>] <path:db|dam>
```
......
dazzdb (1.0+20180115-2) UNRELEASED; urgency=medium
dazzdb (1.0+git20180908.0bd5e07-1) unstable; urgency=medium
* Team upload.
[ Jelmer Vernooij ]
* Use secure copyright file specification URI.
-- Jelmer Vernooij <jelmer@debian.org> Mon, 08 Oct 2018 21:48:05 +0100
[ Andreas Tille ]
* d/watch: Use git mode
* New Git commit
* debhelper 11
* Point Vcs fields to salsa.debian.org
* Standards-Version: 4.2.1
* Remove trailing whitespace in debian/copyright
* hardening=+all
-- Andreas Tille <tille@debian.org> Sun, 28 Oct 2018 10:29:24 +0100
dazzdb (1.0+20180115-1) unstable; urgency=medium
......
......@@ -3,12 +3,11 @@ Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.
Uploaders: Afif Elghraoui <afif@debian.org>
Section: science
Priority: optional
Build-Depends:
debhelper (>= 10),
zlib1g-dev,
Standards-Version: 4.1.3
Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/dazzdb.git
Vcs-Git: https://anonscm.debian.org/git/debian-med/dazzdb.git
Build-Depends: debhelper (>= 11~),
zlib1g-dev
Standards-Version: 4.2.1
Vcs-Browser: https://salsa.debian.org/med-team/dazzdb
Vcs-Git: https://salsa.debian.org/med-team/dazzdb.git
Homepage: https://github.com/thegenemyers/DAZZ_DB
Package: dazzdb
......
......@@ -4,6 +4,8 @@
include /usr/share/dpkg/default.mk
export DEB_BUILD_MAINT_OPTIONS=hardening=+all
%:
dh $@
......
version=3
version=4
opts="filenamemangle=s/(?:.*)?v?(\d[\d\.]*)\.tar\.gz/dazzdb-$1.tar.gz/" \
https://github.com/thegenemyers/DAZZ_DB/tags (?:.*/)?[vV]?(\d[\d\.]*)\.tar\.gz
opts="mode=git,pretty=1.0+git%cd.%h" \
https://github.com/thegenemyers/DAZZ_DB.git HEAD
#opts="filenamemangle=s/(?:.*)?v?(\d[\d\.]*)\.tar\.gz/dazzdb-$1.tar.gz/" \
# https://github.com/thegenemyers/DAZZ_DB/tags (?:.*/)?[vV]?(\d[\d\.]*)\.tar\.gz
......@@ -407,8 +407,12 @@ int main(int argc, char *argv[])
nline = 1;
eof = (fgets(read,MAX_NAME,input) == NULL);
if (eof || strlen(read) < 1)
{ fprintf(stderr,"Skipping '%s', file is empty!\n",core);
fclose(input);
{ fclose(input);
if (PIPE != NULL)
{ fprintf(stderr,"Standard input is empty, terminating!\n");
break;
}
fprintf(stderr,"Skipping '%s', file is empty!\n",core);
free(core);
continue;
}
......
......@@ -390,17 +390,15 @@ int main(int argc, char *argv[])
eof = (fgets(read,MAX_NAME,input) == NULL);
if (eof || strlen(read) < 1)
{ free(core);
fclose(input);
{ fclose(input);
if (PIPE != NULL)
{ fprintf(stderr,"Standard input is empty, terminating!\n");
break;
}
else
{ fprintf(stderr,"Skipping '%s', file is empty!\n",core);
fprintf(stderr,"Skipping '%s', file is empty!\n",core);
free(core);
continue;
}
}
// Check that core is not too long and name is unique or last source if PIPE'd
......
......@@ -148,6 +148,11 @@ int main(int argc, char *argv[])
if (argc != 2)
{ fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
fprintf(stderr,"\n");
fprintf(stderr," -r: Random number generator seed (default is process id).\n");
fprintf(stderr," -b: AT vs GC ratio or bias.\n");
fprintf(stderr," -w: Print -w bp per line (default is 80).\n");
fprintf(stderr," -U: Output sequence in upper case (default is lower case).\n");
exit (1);
}
......
......@@ -567,6 +567,19 @@ int main(int argc, char *argv[])
{ fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage[0]);
fprintf(stderr," %*s %s\n",(int) strlen(Prog_Name),"",Usage[1]);
fprintf(stderr," %*s %s\n",(int) strlen(Prog_Name),"",Usage[2]);
fprintf(stderr,"\n");
fprintf(stderr," -m: average read length (log normal distribution).\n");
fprintf(stderr," -s: standard deviation of read lengths (log normal)\n");
fprintf(stderr," -x: ignore reads below this length\n");
fprintf(stderr," -f: forward/reverse strand sampling fraction\n");
fprintf(stderr," -e: error rate\n");
fprintf(stderr," -c: coverage of genome\n");
fprintf(stderr," -C: assume genome is circular (default is linear)\n");
fprintf(stderr,"\n");
fprintf(stderr," -r: Random number generator seed (default is process id).\n");
fprintf(stderr," -w: Print -w bp per line (default is 80).\n");
fprintf(stderr," -U: Use upper case for DNA (default is lower case).\n");
fprintf(stderr," -M: create a map file that indicates where every read was sampled\n");
exit (1);
}
}
......