Skip to content
Commits on Source (8)
......@@ -18,6 +18,7 @@
#include <unistd.h>
#include <dirent.h>
#include <limits.h>
#include <sys/stat.h>
#include "DB.h"
......@@ -169,7 +170,8 @@ char *Catenate(char *path, char *sep, char *root, char *suffix)
len += strlen(suffix);
if (len > max)
{ max = ((int) (1.2*len)) + 100;
if ((cat = (char *) realloc(cat,max+1)) == NULL)
cat = (char *) realloc(cat,max+1);
if (cat == NULL)
{ EPRINTF(EPLACE,"%s: Out of memory (Making path name for %s)\n",Prog_Name,root);
return (NULL);
}
......@@ -179,7 +181,51 @@ char *Catenate(char *path, char *sep, char *root, char *suffix)
}
char *Numbered_Suffix(char *left, int num, char *right)
{ static char *suffix = NULL;
{ static char *sfx = NULL;
static int max = -1;
int len;
if (left == NULL || right == NULL)
return (NULL);
len = strlen(left);
len += strlen(right) + 40;
if (len > max)
{ max = ((int) (1.2*len)) + 100;
sfx = (char *) realloc(sfx,max+1);
if (sfx == NULL)
{ EPRINTF(EPLACE,"%s: Out of memory (Making number suffix for %d)\n",Prog_Name,num);
return (NULL);
}
}
sprintf(sfx,"%s%d%s",left,num,right);
return (sfx);
}
static char *MyCatenate(char *path, char *sep, char *root, char *suffix)
{ static char *cat = NULL;
static int max = -1;
int len;
if (path == NULL || root == NULL || sep == NULL || suffix == NULL)
return (NULL);
len = strlen(path);
len += strlen(sep);
len += strlen(root);
len += strlen(suffix);
if (len > max)
{ max = ((int) (1.2*len)) + 100;
cat = (char *) realloc(cat,max+1);
if (cat == NULL)
{ EPRINTF(EPLACE,"%s: Out of memory (Making path name for %s)\n",Prog_Name,root);
return (NULL);
}
}
sprintf(cat,"%s%s%s%s",path,sep,root,suffix);
return (cat);
}
static char *MyNumbered_Suffix(char *left, int num, char *right)
{ static char *sfx = NULL;
static int max = -1;
int len;
......@@ -189,13 +235,14 @@ char *Numbered_Suffix(char *left, int num, char *right)
len += strlen(right) + 40;
if (len > max)
{ max = ((int) (1.2*len)) + 100;
if ((suffix = (char *) realloc(suffix,max+1)) == NULL)
sfx = (char *) realloc(sfx,max+1);
if (sfx == NULL)
{ EPRINTF(EPLACE,"%s: Out of memory (Making number suffix for %d)\n",Prog_Name,num);
return (NULL);
}
}
sprintf(suffix,"%s%d%s",left,num,right);
return (suffix);
sprintf(sfx,"%s%d%s",left,num,right);
return (sfx);
}
......@@ -393,10 +440,34 @@ void Number_Arrow(char *s)
*s = 4;
}
void Change_Read(char *s)
{ static char change[128] =
{ 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 'a', 0, 'c', 0, 0, 0, 'g',
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 't', 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 'A', 0, 'C', 0, 0, 0, 'G',
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 'T', 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
for ( ; *s != '\0'; s++)
*s = change[(int) *s];
}
/*******************************************************************************************
*
* DB OPEN, TRIM & CLOSE ROUTINES
* DB OPEN, TRIM, SIZE_OF, LIST_FILES & CLOSE ROUTINES
*
********************************************************************************************/
......@@ -409,11 +480,14 @@ void Number_Arrow(char *s)
// 0: Open of DB proceeded without mishap
// 1: Open of DAM proceeded without mishap
static char *atrack_name = ".@arw";
static char *qtrack_name = ".@qvs";
int Open_DB(char* path, DAZZ_DB *db)
{ DAZZ_DB dbcopy;
char *root, *pwd, *bptr, *fptr, *cat;
int nreads;
FILE *index, *dbvis;
FILE *index, *dbvis, *bases;
int status, plen, isdam;
int part, cutoff, all;
int ufirst, tfirst, ulast, tlast;
......@@ -423,9 +497,16 @@ int Open_DB(char* path, DAZZ_DB *db)
plen = strlen(path);
if (strcmp(path+(plen-4),".dam") == 0)
root = Root(path,".dam");
{ root = Root(path,".dam");
isdam = 1;
}
else
{ if (strcmp(path+(plen-3),".db") == 0)
isdam = -1;
else
isdam = 0;
root = Root(path,".db");
}
pwd = PathTo(path);
bptr = rindex(root,'.');
......@@ -439,22 +520,34 @@ int Open_DB(char* path, DAZZ_DB *db)
else
part = 0;
isdam = 0;
cat = Catenate(pwd,"/",root,".db");
if (isdam > 0)
cat = MyCatenate(pwd,"/",root,".dam");
else
cat = MyCatenate(pwd,"/",root,".db");
if (cat == NULL)
return (-1);
if ((dbvis = fopen(cat,"r")) == NULL)
{ cat = Catenate(pwd,"/",root,".dam");
{ if (isdam < 0)
{ EPRINTF(EPLACE,"%s: Could not open DB %s\n",Prog_Name,path);
goto error;
}
if (isdam > 0)
{ EPRINTF(EPLACE,"%s: Could not open DAM %s\n",Prog_Name,path);
goto error;
}
cat = MyCatenate(pwd,"/",root,".dam");
if (cat == NULL)
return (-1);
if ((dbvis = fopen(cat,"r")) == NULL)
{ EPRINTF(EPLACE,"%s: Could not open database %s\n",Prog_Name,path);
{ EPRINTF(EPLACE,"%s: Could not open %s as a DB or a DAM\n",Prog_Name,path);
goto error;
}
isdam = 1;
}
if (isdam < 0)
isdam = 0;
if ((index = Fopen(Catenate(pwd,PATHSEP,root,".idx"),"r")) == NULL)
if ((index = Fopen(MyCatenate(pwd,PATHSEP,root,".idx"),"r")) == NULL)
goto error1;
if (fread(db,sizeof(DAZZ_DB),1,index) != 1)
{ EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
......@@ -527,6 +620,7 @@ int Open_DB(char* path, DAZZ_DB *db)
{ db->reads = (DAZZ_READ *) Malloc(sizeof(DAZZ_READ)*(nreads+2),"Allocating Open_DB index");
if (db->reads == NULL)
goto error2;
db->reads += 1;
if (fread(db->reads,sizeof(DAZZ_READ),nreads,index) != (size_t) nreads)
{ EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
......@@ -569,10 +663,18 @@ int Open_DB(char* path, DAZZ_DB *db)
((int *) (db->reads))[-2] = tlast - tfirst;
db->nreads = nreads;
db->path = Strdup(Catenate(pwd,PATHSEP,root,""),"Allocating Open_DB path");
db->path = Strdup(MyCatenate(pwd,PATHSEP,root,""),"Allocating Open_DB path");
if (db->path == NULL)
{ free(db->reads-1);
goto error2;
}
bases = Fopen(MyCatenate(db->path,"","",".bps"),"r");
if (bases == NULL)
{ free(db->path);
free(db->reads-1);
goto error2;
db->bases = NULL;
}
db->bases = (void *) bases;
db->loaded = 0;
status = isdam;
......@@ -595,7 +697,7 @@ error:
}
// Trim the DB or part thereof and all loaded tracks according to the cuttof and all settings
// Trim the DB or part thereof and all opened tracks according to the cuttof and all settings
// of the current DB partition. Reallocate smaller memory blocks for the information kept
// for the retained reads.
......@@ -611,6 +713,24 @@ void Trim_DB(DAZZ_DB *db)
if (db->cutoff <= 0 && (db->allarr & DB_ALL) != 0) return;
{ int load_error;
load_error = db->loaded;
for (record = db->tracks; record != NULL; record = record->next)
if (record->name == atrack_name)
{ if (((DAZZ_ARROW *) record)->loaded)
load_error = 1;
}
else if (record->name != qtrack_name)
{ if (record->loaded)
load_error = 1;
}
if (load_error)
{ EPRINTF(EPLACE,"%s: Cannot load anything before trim (Trim_DB)\n",Prog_Name);
return;
}
}
cutoff = db->cutoff;
if ((db->allarr & DB_ALL) != 0)
allflag = 0;
......@@ -621,7 +741,7 @@ void Trim_DB(DAZZ_DB *db)
nreads = db->nreads;
for (record = db->tracks; record != NULL; record = record->next)
if (strcmp(record->name,".@qvs") == 0)
if (record->name == qtrack_name)
{ uint16 *table = ((DAZZ_QV *) record)->table;
j = 0;
......@@ -629,52 +749,56 @@ void Trim_DB(DAZZ_DB *db)
if ((reads[i].flags & DB_BEST) >= allflag && reads[i].rlen >= cutoff)
table[j++] = table[i];
}
else if (record->name == atrack_name)
{ DAZZ_ARROW *atrack = (DAZZ_ARROW *) record;
int64 *aoff = atrack->aoff;
for (j = i = 0; i < nreads; i++)
if ((reads[i].flags & DB_BEST) >= allflag && reads[i].rlen >= cutoff)
aoff[j++] = aoff[i];
atrack->aoff = Realloc(aoff,sizeof(int64)*j,NULL);
}
else
{ int *anno4, size;
int64 *anno8;
char *anno, *data;
{ int size;
size = record->size;
data = (char *) record->data;
if (data == NULL)
{ anno = (char *) record->anno;
if (record->data == NULL)
{ char *anno = (char *) record->anno;
j = 0;
for (i = r = 0; i < db->nreads; i++, r += size)
if ((reads[i].flags & DB_BEST) >= allflag && reads[i].rlen >= cutoff)
{ memmove(anno+j,anno+r,size);
j += size;
}
memmove(anno+j,anno+r,size);
}
else if (size == 4)
{ int ai;
{ int *anno4 = (int *) (record->anno);
int *alen = record->alen;
anno4 = (int *) (record->anno);
j = anno4[0] = 0;
j = 0;
for (i = 0; i < db->nreads; i++)
if ((reads[i].flags & DB_BEST) >= allflag && reads[i].rlen >= cutoff)
{ ai = anno4[i];
anno4[j+1] = anno4[j] + (anno4[i+1]-ai);
memmove(data+anno4[j],data+ai,anno4[i+1]-ai);
{ anno4[j] = anno4[i];
alen[j] = alen[i];
j += 1;
}
record->data = Realloc(record->data,anno4[j],NULL);
record->alen = Realloc(record->alen,sizeof(int)*j,NULL);
}
else // size == 8
{ int64 ai;
{ int64 *anno8 = (int64 *) (record->anno);
int *alen = record->alen;
anno8 = (int64 *) (record->anno);
j = anno8[0] = 0;
j = 0;
for (i = 0; i < db->nreads; i++)
if ((reads[i].flags & DB_BEST) >= allflag && reads[i].rlen >= cutoff)
{ ai = anno8[i];
anno8[j+1] = anno8[j] + (anno8[i+1]-ai);
memmove(data+anno8[j],data+ai,anno8[i+1]-ai);
{ anno8[j] = anno8[i];
alen[j] = alen[i];
j += 1;
}
record->data = Realloc(record->data,anno8[j],NULL);
record->alen = Realloc(record->alen,sizeof(int)*j,NULL);
}
record->anno = Realloc(record->anno,record->size*(j+1),NULL);
record->nreads = j;
}
css = 0;
......@@ -708,136 +832,6 @@ void Trim_DB(DAZZ_DB *db)
}
}
// The DB has already been trimmed, but a track over the untrimmed DB needs to be loaded.
// Trim the track by rereading the untrimmed DB index from the file system.
static int Late_Track_Trim(DAZZ_DB *db, DAZZ_TRACK *track, int ispart)
{ int i, j, r;
int allflag, cutoff;
int ureads;
char *root;
DAZZ_READ read;
FILE *indx;
if (!db->trimmed) return (0);
if (db->cutoff <= 0 && (db->allarr & DB_ALL) != 0) return (0);
cutoff = db->cutoff;
if ((db->allarr & DB_ALL) != 0)
allflag = 0;
else
allflag = DB_BEST;
root = rindex(db->path,'/') + 2;
indx = Fopen(Catenate(db->path,"","",".idx"),"r");
fseeko(indx,sizeof(DAZZ_DB) + sizeof(DAZZ_READ)*db->ufirst,SEEK_SET);
if (ispart)
ureads = ((int *) (db->reads))[-1];
else
ureads = db->ureads;
if (strcmp(track->name,".@qvs") == 0)
{ EPRINTF(EPLACE,"%s: Cannot load QV track after trimming\n",Prog_Name);
fclose(indx);
EXIT(1);
}
{ int *anno4, size;
int64 *anno8;
char *anno, *data;
size = track->size;
data = (char *) track->data;
if (data == NULL)
{ anno = (char *) track->anno;
j = r = 0;
for (i = r = 0; i < ureads; i++, r += size)
{ if (fread(&read,sizeof(DAZZ_READ),1,indx) != 1)
{ EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
fclose(indx);
EXIT(1);
}
if ((read.flags & DB_BEST) >= allflag && read.rlen >= cutoff)
{ memmove(anno+j,anno+r,size);
j += size;
}
r += size;
}
memmove(anno+j,anno+r,size);
}
else if (size == 4)
{ int ai;
anno4 = (int *) (track->anno);
j = anno4[0] = 0;
for (i = 0; i < ureads; i++)
{ if (fread(&read,sizeof(DAZZ_READ),1,indx) != 1)
{ EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
fclose(indx);
EXIT(1);
}
if ((read.flags & DB_BEST) >= allflag && read.rlen >= cutoff)
{ ai = anno4[i];
anno4[j+1] = anno4[j] + (anno4[i+1]-ai);
memmove(data+anno4[j],data+ai,anno4[i+1]-ai);
j += 1;
}
}
track->data = Realloc(track->data,anno4[j],NULL);
}
else // size == 8
{ int64 ai;
anno8 = (int64 *) (track->anno);
j = anno8[0] = 0;
for (i = 0; i < ureads; i++)
{ if (fread(&read,sizeof(DAZZ_READ),1,indx) != 1)
{ EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
fclose(indx);
EXIT(1);
}
if ((read.flags & DB_BEST) >= allflag && read.rlen >= cutoff)
{ ai = anno8[i];
anno8[j+1] = anno8[j] + (anno8[i+1]-ai);
memmove(data+anno8[j],data+ai,anno8[i+1]-ai);
j += 1;
}
}
track->data = Realloc(track->data,anno8[j],NULL);
}
track->anno = Realloc(track->anno,track->size*(j+1),NULL);
}
fclose(indx);
return (0);
}
// Shut down an open 'db' by freeing all associated space, including tracks and QV structures,
// and any open file pointers. The record pointed at by db however remains (the user
// supplied it and so should free it).
void Close_DB(DAZZ_DB *db)
{ DAZZ_TRACK *t, *p;
if (db->loaded)
free(((char *) (db->bases)) - 1);
else if (db->bases != NULL)
fclose((FILE *) db->bases);
if (db->reads != NULL)
free(db->reads-1);
free(db->path);
Close_QVs(db);
for (t = db->tracks; t != NULL; t = p)
{ p = t->next;
free(t->anno);
free(t->data);
free(t);
}
}
// Return the size in bytes of the memory occupied by a given DB
......@@ -876,274 +870,566 @@ int64 sizeof_DB(DAZZ_DB *db)
}
/*******************************************************************************************
*
* QV LOAD & CLOSE ROUTINES
*
********************************************************************************************/
DAZZ_DB *Active_DB = NULL; // Last db/qv used by "Load_QVentry"
DAZZ_QV *Active_QV; // Becomes invalid after closing
// For the DB or DAM "path" = "prefix/root.[db|dam]", find all the files for that DB, i.e. all
// those of the form "prefix/[.]root.part" and call actor with the complete path to each file
// pointed at by path, and the suffix of the path by extension. The . proceeds the root
// name if the defined constant HIDE_FILES is set. Always the first call is with the
// path "prefix/root.[db|dam]" and extension "db" or "dam". There will always be calls for
// "prefix/[.]root.idx" and "prefix/[.]root.bps". All other calls are for *tracks* and
// so this routine gives one a way to know all the tracks associated with a given DB.
// -1 is returned if the path could not be found, and 1 is returned if an error (reported
// to EPLACE) occured and INTERACTIVE is defined. Otherwise a 0 is returned.
int Load_QVs(DAZZ_DB *db)
{ FILE *quiva, *istub, *indx;
char *root;
uint16 *table;
DAZZ_QV *qvtrk;
QVcoding *coding, *nx;
int ncodes = 0;
int List_DB_Files(char *path, void actor(char *path, char *extension))
{ int status, plen, rlen, dlen;
char *root, *pwd, *name;
int isdam;
DIR *dirp;
struct dirent *dp;
if (db->tracks != NULL && strcmp(db->tracks->name,".@qvs") == 0)
return (0);
status = 0;
pwd = PathTo(path);
plen = strlen(path);
if (strcmp(path+(plen-4),".dam") == 0)
root = Root(path,".dam");
else
root = Root(path,".db");
rlen = strlen(root);
if (db->trimmed)
{ EPRINTF(EPLACE,"%s: Cannot load QVs after trimming the DB\n",Prog_Name);
if (root == NULL || pwd == NULL)
{ free(pwd);
free(root);
EXIT(1);
}
if (db->reads[db->nreads-1].coff < 0)
{ if (db->part > 0)
{ EPRINTF(EPLACE,"%s: All QVs for this block have not been added to the DB!\n",Prog_Name);
EXIT(1);
if ((dirp = opendir(pwd)) == NULL)
{ EPRINTF(EPLACE,"%s: Cannot open directory %s (List_DB_Files)\n",Prog_Name,pwd);
status = -1;
goto error;
}
else
{ EPRINTF(EPLACE,"%s: All QVs for this DB have not been added!\n",Prog_Name);
EXIT(1);
isdam = 0;
while ((dp = readdir(dirp)) != NULL) // Get case dependent root name (if necessary)
{ name = dp->d_name;
if (strcmp(name,MyCatenate("","",root,".db")) == 0)
break;
if (strcmp(name,MyCatenate("","",root,".dam")) == 0)
{ isdam = 1;
break;
}
}
if (dp == NULL)
{ status = -1;
closedir(dirp);
goto error;
}
// Open .qvs, .idx, and .db files
quiva = Fopen(Catenate(db->path,"","",".qvs"),"r");
if (quiva == NULL)
return (-1);
istub = NULL;
indx = NULL;
table = NULL;
coding = NULL;
qvtrk = NULL;
if (isdam)
actor(MyCatenate(pwd,"/",root,".dam"),"dam");
else
actor(MyCatenate(pwd,"/",root,".db"),"db");
root = rindex(db->path,'/');
if (root[1] == '.')
{ *root = '\0';
istub = Fopen(Catenate(db->path,"/",root+2,".db"),"r");
*root = '/';
rewinddir(dirp); // Report each auxiliary file
while ((dp = readdir(dirp)) != NULL)
{ name = dp->d_name;
dlen = strlen(name);
#ifdef HIDE_FILES
if (name[0] != '.')
continue;
dlen -= 1;
name += 1;
#endif
if (dlen < rlen+1)
continue;
if (name[rlen] != '.')
continue;
if (strncmp(name,root,rlen) != 0)
continue;
actor(MyCatenate(pwd,PATHSEP,name,""),name+(rlen+1));
}
closedir(dirp);
error:
free(pwd);
free(root);
return (status);
}
void Print_Read(char *s, int width)
{ int i;
if (s[0] < 4)
{ for (i = 0; s[i] != 4; i++)
{ if (i%width == 0 && i != 0)
printf("\n");
printf("%d",s[i]);
}
printf("\n");
}
else
istub = Fopen(Catenate(db->path,"","",".db"),"r");
if (istub == NULL)
goto error;
{ for (i = 0; s[i] != '\0'; i++)
{ if (i%width == 0 && i != 0)
printf("\n");
printf("%c",s[i]);
}
printf("\n");
}
}
{ int first, last, nfiles;
char prolog[MAX_NAME], fname[MAX_NAME];
int i, j;
if (fscanf(istub,DB_NFILE,&nfiles) != 1)
{ EPRINTF(EPLACE,"%s: Stub file (.db) of %s is junk\n",Prog_Name,root);
goto error;
// Shut down an open 'db' by freeing all associated space, including tracks and QV structures,
// and any open file pointers. The record pointed at by db however remains (the user
// supplied it and so should free it).
void Close_DB(DAZZ_DB *db)
{ if (db->loaded)
free(((char *) (db->bases)) - 1);
else if (db->bases != NULL)
fclose((FILE *) db->bases);
if (db->reads != NULL)
free(db->reads-1);
free(db->path);
Close_QVs(db);
Close_Arrow(db);
while (db->tracks != NULL)
Close_Track(db,db->tracks);
}
if (db->part > 0)
{ int pfirst, plast;
int fbeg, fend;
int n, k;
FILE *indx;
// Determine first how many and which files span the block (fbeg to fend)
/*******************************************************************************************
*
* READ AND ARROW BUFFER ALLOCATION, LOAD, & LOAD_ALL
*
********************************************************************************************/
pfirst = db->ufirst;
plast = pfirst + db->nreads;
// Allocate and return a buffer big enough for the largest read in 'db', leaving room
// for an initial delimiter character
first = 0;
for (fbeg = 0; fbeg < nfiles; fbeg++)
{ if (fscanf(istub,DB_FDATA,&last,fname,prolog) != 3)
{ EPRINTF(EPLACE,"%s: Stub file (.db) of %s is junk\n",Prog_Name,root);
goto error;
char *New_Read_Buffer(DAZZ_DB *db)
{ char *read;
read = (char *) Malloc(db->maxlen+4,"Allocating New Read Buffer");
if (read == NULL)
EXIT(NULL);
return (read+1);
}
if (last > pfirst)
break;
first = last;
// Load into 'read' the i'th read in 'db'. As an upper case ASCII string if ascii is 2, as a
// lower-case ASCII string is ascii is 1, and as a numeric string over 0(A), 1(C), 2(G), and
// 3(T) otherwise.
//
// **NB**, the byte before read will be set to a delimiter character!
int Load_Read(DAZZ_DB *db, int i, char *read, int ascii)
{ FILE *bases = (FILE *) db->bases;
int64 off;
int len, clen;
DAZZ_READ *r = db->reads;
if (i < 0 || i >= db->nreads)
{ EPRINTF(EPLACE,"%s: Index out of bounds (Load_Read)\n",Prog_Name);
EXIT(1);
}
for (fend = fbeg+1; fend <= nfiles; fend++)
{ if (last >= plast)
break;
if (fscanf(istub,DB_FDATA,&last,fname,prolog) != 3)
{ EPRINTF(EPLACE,"%s: Stub file (.db) of %s is junk\n",Prog_Name,root);
goto error;
if (db->loaded)
{ len = r[i].rlen;
strncpy(read,(char *) bases + r[i].boff,len);
if (ascii == 0)
{ if (*read < 4)
read[-1] = read[len] = 4;
else
{ read[len] = '\0';
Number_Read(read);
read[-1] = 4;
}
first = last;
}
else
{ if (*read < 4)
{ read[len] = 4;
if (ascii == 1)
Lower_Read(read);
else
Upper_Read(read);
read[-1] = '\0';
}
else
{ read[len] = '\0';
if ((ascii == 1) != islower(*read))
Change_Read(read);
}
read[-1] = '\0';
}
return (0);
}
indx = Fopen(Catenate(db->path,"","",".idx"),"r");
ncodes = fend-fbeg;
coding = (QVcoding *) Malloc(sizeof(QVcoding)*ncodes,"Allocating coding schemes");
table = (uint16 *) Malloc(sizeof(uint16)*db->nreads,"Allocating QV table indices");
if (indx == NULL || coding == NULL || table == NULL)
{ ncodes = 0;
goto error;
off = r[i].boff;
len = r[i].rlen;
if (ftello(bases) != off)
fseeko(bases,off,SEEK_SET);
clen = COMPRESSED_LEN(len);
if (clen > 0)
{ if (fread(read,clen,1,bases) != 1)
{ EPRINTF(EPLACE,"%s: Failed read of .bps file (Load_Read)\n",Prog_Name);
EXIT(1);
}
}
Uncompress_Read(len,read);
if (ascii == 1)
{ Lower_Read(read);
read[-1] = '\0';
}
else if (ascii == 2)
{ Upper_Read(read);
read[-1] = '\0';
}
else
read[-1] = 4;
return (0);
}
// Carefully get the first coding scheme (its offset is most likely in a DAZZ_RECORD
// in .idx that is *not* in memory). Get all the other coding schemes normally and
// assign the tables # for each read in the block in "tables".
rewind(istub);
(void) fscanf(istub,DB_NFILE,&nfiles);
// Load into 'read' the subread [beg,end] of the i'th read in 'db' and return a pointer to the
// the start of the subinterval (not necessarily = to read !!! ). As a lower case ascii
// string if ascii is 1, an upper case ascii string if ascii is 2, and a numeric string
// over 0(A), 1(C), 2(G), and 3(T) otherwise. A '\0' (or 4) is prepended and appended to
// the string holding the substring so it has a delimeter for traversals in either direction.
// A NULL pointer is returned if an error occured and INTERACTIVE is defined.
first = 0;
for (n = 0; n < fbeg; n++)
{ (void) fscanf(istub,DB_FDATA,&last,fname,prolog);
first = last;
char *Load_Subread(DAZZ_DB *db, int i, int beg, int end, char *read, int ascii)
{ FILE *bases = (FILE *) db->bases;
int64 off;
int len, clen;
int bbeg, bend;
DAZZ_READ *r = db->reads;
if (i < 0 || i >= db->nreads)
{ EPRINTF(EPLACE,"%s: Index out of bounds (Load_Read)\n",Prog_Name);
EXIT(NULL);
}
for (n = fbeg; n < fend; n++)
{ (void) fscanf(istub,DB_FDATA,&last,fname,prolog);
if (db->loaded)
{ len = end-beg;
strncpy(read,(char *) bases + r[i].boff + beg,len);
if (ascii == 0)
{ if (*read < 4)
read[-1] = read[len] = 4;
else
{ read[len] = '\0';
Number_Read(read);
read[-1] = 4;
}
}
else
{ if (*read < 4)
{ read[len] = 4;
if (ascii == 1)
Lower_Read(read);
else
Upper_Read(read);
read[-1] = '\0';
}
else
{ read[len] = '\0';
if ((ascii == 1) != islower(*read))
Change_Read(read);
}
read[-1] = '\0';
}
return (read);
}
i = n-fbeg;
if (first < pfirst)
{ DAZZ_READ read;
bbeg = beg/4;
bend = (end-1)/4+1;
fseeko(indx,sizeof(DAZZ_DB) + sizeof(DAZZ_READ)*first,SEEK_SET);
if (fread(&read,sizeof(DAZZ_READ),1,indx) != 1)
{ EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
ncodes = i;
goto error;
off = r[i].boff + bbeg;
len = end - beg;
if (ftello(bases) != off)
fseeko(bases,off,SEEK_SET);
clen = bend-bbeg;
if (clen > 0)
{ if (fread(read,clen,1,bases) != 1)
{ EPRINTF(EPLACE,"%s: Failed read of .bps file (Load_Read)\n",Prog_Name);
EXIT(NULL);
}
fseeko(quiva,read.coff,SEEK_SET);
nx = Read_QVcoding(quiva);
if (nx == NULL)
{ ncodes = i;
goto error;
}
coding[i] = *nx;
Uncompress_Read(4*clen,read);
read += beg%4;
read[len] = 4;
if (ascii == 1)
{ Lower_Read(read);
read[-1] = '\0';
}
else if (ascii == 2)
{ Upper_Read(read);
read[-1] = '\0';
}
else
{ fseeko(quiva,db->reads[first-pfirst].coff,SEEK_SET);
nx = Read_QVcoding(quiva);
if (nx == NULL)
{ ncodes = i;
goto error;
read[-1] = 4;
return (read);
}
// Allocate a block big enough for all the uncompressed sequences, read them into it,
// reset the 'off' in each read record to be its in-memory offset, and set the
// bases pointer to point at the block after closing the bases file. If ascii is
// non-zero then the reads are converted to ACGT ascii, otherwise the reads are left
// as numeric strings over 0(A), 1(C), 2(G), and 3(T).
int Load_All_Reads(DAZZ_DB *db, int ascii)
{ FILE *bases = (FILE *) db->bases;
int nreads = db->nreads;
DAZZ_READ *reads = db->reads;
void (*translate)(char *s);
char *seq;
int64 o, off;
int i, len, clen;
if (db->loaded)
return (0);
seq = (char *) Malloc(db->totlen+nreads+4,"Allocating All Sequence Reads");
if (seq == NULL)
EXIT(1);
*seq++ = 4;
if (ascii == 1)
translate = Lower_Read;
else
translate = Upper_Read;
o = 0;
for (i = 0; i < nreads; i++)
{ len = reads[i].rlen;
off = reads[i].boff;
if (ftello(bases) != off)
fseeko(bases,off,SEEK_SET);
clen = COMPRESSED_LEN(len);
if (clen > 0)
{ if (fread(seq+o,clen,1,bases) != 1)
{ EPRINTF(EPLACE,"%s: Read of .bps file failed (Load_All_Sequences)\n",Prog_Name);
free(seq-1);
EXIT(1);
}
coding[i] = *nx;
db->reads[first-pfirst].coff = ftello(quiva);
}
Uncompress_Read(len,seq+o);
if (ascii)
translate(seq+o);
reads[i].boff = o;
o += (len+1);
}
reads[nreads].boff = o;
j = first-pfirst;
if (j < 0)
j = 0;
k = last-pfirst;
if (k > db->nreads)
k = db->nreads;
while (j < k)
table[j++] = (uint16) i;
fclose(bases);
first = last;
db->bases = (void *) seq;
db->loaded = 1;
return (0);
}
fclose(indx);
indx = NULL;
/*******************************************************************************************
*
* ARROW OPEN, LOAD, LOAD_ALL, & CLOSE
*
********************************************************************************************/
DAZZ_DB *Arrow_DB = NULL; // Last db/arw used by "Load_Arrow"
DAZZ_ARROW *Arrow_Ptr; // Becomes invalid after closing
// If the Arrow pseudo track is not already in db's track list, then load it and set it up.
// The database reads must not have been loaded with Load_All_Reads yet.
// -1 is returned if a .arw file is not present, and 1 is returned if an error (reported
// to EPLACE) occured and INTERACTIVE is defined. Otherwise a 0 is returned.
int Open_Arrow(DAZZ_DB *db)
{ int64 *avector;
DAZZ_ARROW *atrack;
FILE *afile;
DAZZ_READ *reads;
int i, nreads;
if (db->tracks != NULL && db->tracks->name == atrack_name)
return (0);
if ((db->allarr & DB_ARROW) == 0)
{ EPRINTF(EPLACE,"%s: The DB is not an Arrow database (Open_Arrow)\n",Prog_Name);
EXIT(1);
}
if (db->loaded)
{ EPRINTF(EPLACE,"%s: Cannot open Arrow vectors after loading all reads (Open_Arrow)\n",
Prog_Name);
EXIT(1);
}
else
{ // Load in coding scheme for each file, adjust .coff of first read in the file, and
// record which table each read uses
afile = Fopen(MyCatenate(db->path,"","",".arw"),"r");
if (afile == NULL)
return (-1);
ncodes = nfiles;
coding = (QVcoding *) Malloc(sizeof(QVcoding)*nfiles,"Allocating coding schemes");
table = (uint16 *) Malloc(sizeof(uint16)*db->nreads,"Allocating QV table indices");
if (coding == NULL || table == NULL)
goto error;
nreads = db->nreads;
avector = (int64 *) Malloc(sizeof(int64)*nreads,"Allocating Arrow index");
atrack = (DAZZ_ARROW *) Malloc(sizeof(DAZZ_ARROW),"Allocating Arrow track");
if (avector == NULL || atrack == NULL)
{ fclose(afile);
if (avector != NULL)
free(avector);
EXIT(1);
}
db->tracks = (DAZZ_TRACK *) atrack;
atrack->next = NULL;
atrack->name = atrack_name;
atrack->aoff = avector;
atrack->arrow = (void *) afile;
atrack->loaded = 0;
first = 0;
for (i = 0; i < nfiles; i++)
{ if (fscanf(istub,DB_FDATA,&last,fname,prolog) != 3)
{ EPRINTF(EPLACE,"%s: Stub file (.db) of %s is junk\n",Prog_Name,root);
goto error;
reads = db->reads;
for (i = 0; i < nreads; i++)
avector[i] = reads[i].boff;
return (0);
}
fseeko(quiva,db->reads[first].coff,SEEK_SET);
nx = Read_QVcoding(quiva);
if (nx == NULL)
{ ncodes = i;
goto error;
// Load into 'read' the i'th arrow in 'db'. As an ASCII string if ascii is 1,
// and as a numeric string otherwise.
int Load_Arrow(DAZZ_DB *db, int i, char *arrow, int ascii)
{ FILE *afile;
int64 off;
int len, clen;
if (db != Arrow_DB)
{ if (db->tracks == NULL || db->tracks->name != atrack_name)
{ EPRINTF(EPLACE,"%s: Arrow data is not available (Load_Arrow)\n",Prog_Name);
EXIT(1);
}
Arrow_Ptr = (DAZZ_ARROW *) db->tracks;
Arrow_DB = db;
}
coding[i] = *nx;
db->reads[first].coff = ftello(quiva);
for (j = first; j < last; j++)
table[j] = (uint16) i;
if (i < 0 || i >= db->nreads)
{ EPRINTF(EPLACE,"%s: Index out of bounds (Load_Arrow)\n",Prog_Name);
EXIT(1);
}
first = last;
afile = (FILE *) Arrow_Ptr->arrow;
off = Arrow_Ptr->aoff[i];
len = db->reads[i].rlen;
if (ftello(afile) != off)
fseeko(afile,off,SEEK_SET);
clen = COMPRESSED_LEN(len);
if (clen > 0)
{ if (fread(arrow,clen,1,afile) != 1)
{ EPRINTF(EPLACE,"%s: Failed read of .arw file (Load_Arrow)\n",Prog_Name);
EXIT(1);
}
}
Uncompress_Read(len,arrow);
if (ascii == 1)
{ Letter_Arrow(arrow);
arrow[-1] = '\0';
}
else
arrow[-1] = 4;
return (0);
}
// Allocate and fill in the DAZZ_QV record and add it to the front of the
// track list
// Allocate a block big enough for all the uncompressed Arrow vectors, read them into it,
// reset the 'off' in each arrow record to be its in-memory offset, and set the
// arrow pointer to point at the block after closing the arrow file. If ascii is
// non-zero then the arrows are converted to 0123 ascii, otherwise the arrows are left
// as numeric strings over [0-3].
qvtrk = (DAZZ_QV *) Malloc(sizeof(DAZZ_QV),"Allocating QV pseudo-track");
if (qvtrk == NULL)
goto error;
qvtrk->name = Strdup(".@qvs","Allocating QV pseudo-track name");
if (qvtrk->name == NULL)
goto error;
qvtrk->next = db->tracks;
db->tracks = (DAZZ_TRACK *) qvtrk;
qvtrk->ncodes = ncodes;
qvtrk->table = table;
qvtrk->coding = coding;
qvtrk->quiva = quiva;
int Load_All_Arrows(DAZZ_DB *db, int ascii)
{ int nreads = db->nreads;
DAZZ_READ *reads = db->reads;
FILE *afile;
int64 *aoff;
char *seq;
int64 o, off;
int i, len, clen;
if (db != Arrow_DB)
{ if (db->tracks == NULL || db->tracks->name != atrack_name)
{ EPRINTF(EPLACE,"%s: Arrow data is not available (Load_All_Arrows)\n",Prog_Name);
EXIT(1);
}
Arrow_Ptr = (DAZZ_ARROW *) db->tracks;
Arrow_DB = db;
}
fclose(istub);
if (Arrow_Ptr->loaded)
return (0);
error:
if (qvtrk != NULL)
free(qvtrk);
if (table != NULL)
free(table);
if (coding != NULL)
{ int i;
for (i = 0; i < ncodes; i++)
Free_QVcoding(coding+i);
free(coding);
}
if (indx != NULL)
fclose(indx);
if (istub != NULL)
fclose(istub);
fclose(quiva);
afile = (FILE *) Arrow_Ptr->arrow;
aoff = Arrow_Ptr->aoff;
seq = (char *) Malloc(db->totlen+nreads+4,"Allocating All Arrows");
if (seq == NULL)
EXIT(1);
*seq++ = 4;
o = 0;
for (i = 0; i < nreads; i++)
{ len = reads[i].rlen;
off = aoff[i];
if (ftello(afile) != off)
fseeko(afile,off,SEEK_SET);
clen = COMPRESSED_LEN(len);
if (clen > 0)
{ if (fread(seq+o,clen,1,afile) != 1)
{ EPRINTF(EPLACE,"%s: Read of .bps file failed (Load_All_Sequences)\n",Prog_Name);
free(seq-1);
EXIT(1);
}
}
Uncompress_Read(len,seq+o);
if (ascii)
Letter_Arrow(seq+o);
aoff[i] = o;
o += (len+1);
}
aoff[nreads] = o;
// Close the QV stream, free the QV pseudo track and all associated memory
fclose(afile);
void Close_QVs(DAZZ_DB *db)
{ DAZZ_TRACK *track;
DAZZ_QV *qvtrk;
int i;
Arrow_Ptr->arrow = (void *) seq;
Arrow_Ptr->loaded = 1;
Active_DB = NULL;
return (0);
}
track = db->tracks;
if (track != NULL && strcmp(track->name,".@qvs") == 0)
{ qvtrk = (DAZZ_QV *) track;
for (i = 0; i < qvtrk->ncodes; i++)
Free_QVcoding(qvtrk->coding+i);
free(qvtrk->coding);
free(qvtrk->table);
fclose(qvtrk->quiva);
db->tracks = track->next;
free(track);
// Remove the Arrow pseudo track, all space associated with it, and close the .arw file.
void Close_Arrow(DAZZ_DB *db)
{ DAZZ_ARROW *atrack;
Arrow_DB = NULL;
if (db->tracks != NULL && db->tracks->name == atrack_name)
{ atrack = (DAZZ_ARROW *) db->tracks;
if (atrack->loaded)
free(atrack->arrow);
else
fclose((FILE *) atrack->arrow);
free(atrack->aoff);
db->tracks = db->tracks->next;
free(atrack);
}
return;
}
/*******************************************************************************************
*
* TRACK LOAD & CLOSE ROUTINES
* TRACK CHECK, OPEN, BUFFER ALLOCATION, LOAD, LOAD_ALL & CLOSE ROUTINES
* TRACK EXTRAS READING & WRITING
*
********************************************************************************************/
......@@ -1160,11 +1446,11 @@ int Check_Track(DAZZ_DB *db, char *track, int *kind)
afile = NULL;
if (db->part > 0)
{ afile = fopen(Catenate(db->path,Numbered_Suffix(".",db->part,"."),track,".anno"),"r");
{ afile = fopen(MyCatenate(db->path,MyNumbered_Suffix(".",db->part,"."),track,".anno"),"r");
ispart = 1;
}
if (afile == NULL)
{ afile = fopen(Catenate(db->path,".",track,".anno"),"r");
{ afile = fopen(MyCatenate(db->path,".",track,".anno"),"r");
ispart = 0;
}
if (afile == NULL)
......@@ -1207,17 +1493,111 @@ int Check_Track(DAZZ_DB *db, char *track, int *kind)
return (-1);
}
// The DB has already been trimmed, but a track over the untrimmed DB needs to be opened.
// Trim the track by rereading the untrimmed DB index from the file system.
static int Late_Track_Trim(DAZZ_DB *db, DAZZ_TRACK *track, int ispart)
{ int i, j, r;
int allflag, cutoff;
int ureads;
char *root;
DAZZ_READ read;
FILE *indx;
if (db->cutoff <= 0 && (db->allarr & DB_ALL) != 0) return (0);
cutoff = db->cutoff;
if ((db->allarr & DB_ALL) != 0)
allflag = 0;
else
allflag = DB_BEST;
root = rindex(db->path,'/') + 2;
indx = Fopen(MyCatenate(db->path,"","",".idx"),"r");
fseeko(indx,sizeof(DAZZ_DB) + sizeof(DAZZ_READ)*db->ufirst,SEEK_SET);
if (ispart)
ureads = ((int *) (db->reads))[-1];
else
ureads = db->ureads;
{ int size;
size = track->size;
if (track->data == NULL)
{ char *anno = (char *) track->anno;
j = r = 0;
for (i = r = 0; i < ureads; i++, r += size)
{ if (fread(&read,sizeof(DAZZ_READ),1,indx) != 1)
{ EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
fclose(indx);
EXIT(1);
}
if ((read.flags & DB_BEST) >= allflag && read.rlen >= cutoff)
{ memmove(anno+j,anno+r,size);
j += size;
}
r += size;
}
memmove(anno+j,anno+r,size);
}
else if (size == 4)
{ int *anno4 = (int *) (track->anno);
int *alen = track->alen;
j = 0;
for (i = 0; i < ureads; i++)
{ if (fread(&read,sizeof(DAZZ_READ),1,indx) != 1)
{ EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
fclose(indx);
EXIT(1);
}
if ((read.flags & DB_BEST) >= allflag && read.rlen >= cutoff)
{ anno4[j] = anno4[i];
alen[j] = alen[i];
j += 1;
}
}
track->data = Realloc(track->data,anno4[j],NULL);
}
else // size == 8
{ int64 *anno8 = (int64 *) (track->anno);
int *alen = track->alen;
j = 0;
for (i = 0; i < ureads; i++)
{ if (fread(&read,sizeof(DAZZ_READ),1,indx) != 1)
{ EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
fclose(indx);
EXIT(1);
}
if ((read.flags & DB_BEST) >= allflag && read.rlen >= cutoff)
{ anno8[j] = anno8[i];
alen[j] = alen[i];
j += 1;
}
}
track->data = Realloc(track->data,anno8[j],NULL);
}
track->anno = Realloc(track->anno,track->size*(j+1),NULL);
}
fclose(indx);
return (0);
}
// If track is not already in the db's track list, then allocate all the storage for it,
// read it in from the appropriate file, add it to the track list, and return a pointer
// to the newly created DAZZ_TRACK record. If the track does not exist or cannot be
// opened for some reason, then NULL is returned.
DAZZ_TRACK *Load_Track(DAZZ_DB *db, char *track)
DAZZ_TRACK *Open_Track(DAZZ_DB *db, char *track)
{ FILE *afile, *dfile;
int tracklen, size;
int nreads, ispart;
int treads, ureads;
int64 dmax;
void *anno;
int *alen;
void *data;
char *name;
DAZZ_TRACK *record;
......@@ -1233,11 +1613,11 @@ DAZZ_TRACK *Load_Track(DAZZ_DB *db, char *track)
afile = NULL;
if (db->part)
{ afile = fopen(Catenate(db->path,Numbered_Suffix(".",db->part,"."),track,".anno"),"r");
{ afile = fopen(MyCatenate(db->path,MyNumbered_Suffix(".",db->part,"."),track,".anno"),"r");
ispart = 1;
}
if (afile == NULL)
{ afile = fopen(Catenate(db->path,".",track,".anno"),"r");
{ afile = fopen(MyCatenate(db->path,".",track,".anno"),"r");
ispart = 0;
}
if (afile == NULL)
......@@ -1247,13 +1627,14 @@ DAZZ_TRACK *Load_Track(DAZZ_DB *db, char *track)
dfile = NULL;
anno = NULL;
alen = NULL;
data = NULL;
record = NULL;
if (ispart)
name = Catenate(db->path,Numbered_Suffix(".",db->part,"."),track,".data");
name = MyCatenate(db->path,MyNumbered_Suffix(".",db->part,"."),track,".data");
else
name = Catenate(db->path,".",track,".data");
name = MyCatenate(db->path,".",track,".data");
if (name == NULL)
goto error;
dfile = fopen(name,"r");
......@@ -1316,54 +1697,52 @@ DAZZ_TRACK *Load_Track(DAZZ_DB *db, char *track)
goto error;
if (dfile != NULL)
{ int64 *anno8, off8, dlen;
int *anno4, off4;
{ int64 *anno8;
int *anno4;
int64 x, y;
int i;
alen = (int *) Malloc(sizeof(int)*nreads,"Allocating Track Anno Lengths");
if (alen == NULL)
goto error;
if (fread(anno,size,nreads+1,afile) != (size_t) (nreads+1))
{ EPRINTF(EPLACE,"%s: Track '%s' annotation file is junk\n",Prog_Name,track);
goto error;
}
dmax = 0;
if (size == 4)
{ anno4 = (int *) anno;
off4 = anno4[0];
if (off4 != 0)
{ for (i = 0; i <= nreads; i++)
anno4[i] -= off4;
fseeko(dfile,off4,SEEK_SET);
y = anno4[0];
for (i = 1; i <= nreads; i++)
{ x = anno4[i];
y = x-y;
if (y > dmax)
dmax = y;
alen[i-1] = y;
y = x;
}
dlen = anno4[nreads];
data = (void *) Malloc(dlen,"Allocating Track Data Vector");
}
else
{ anno8 = (int64 *) anno;
off8 = anno8[0];
if (off8 != 0)
{ for (i = 0; i <= nreads; i++)
anno8[i] -= off8;
fseeko(dfile,off8,SEEK_SET);
y = anno8[0];
for (i = 1; i <= nreads; i++)
{ x = anno8[i];
y = x-y;
if (y > dmax)
dmax = y;
alen[i-1] = y;
y = x;
}
dlen = anno8[nreads];
data = (void *) Malloc(dlen,"Allocating Track Data Vector");
}
if (data == NULL)
goto error;
if (dlen > 0)
{ if (fread(data,dlen,1,dfile) != 1)
{ EPRINTF(EPLACE,"%s: Track '%s' data file is junk\n",Prog_Name,track);
goto error;
}
}
fclose(dfile);
dfile = NULL;
}
else
{ if (fread(anno,size,nreads,afile) != (size_t) nreads)
{ dmax = 0;
if (fread(anno,size,nreads,afile) != (size_t) nreads)
{ EPRINTF(EPLACE,"%s: Track '%s' annotation file is junk\n",Prog_Name,track);
goto error;
}
data = NULL;
}
fclose(afile);
......@@ -1374,16 +1753,23 @@ DAZZ_TRACK *Load_Track(DAZZ_DB *db, char *track)
record->name = Strdup(track,"Allocating Track Name");
if (record->name == NULL)
goto error;
record->data = data;
if (dfile == NULL)
record->data = NULL;
else
record->data = (void *) dfile;
record->anno = anno;
record->alen = alen;
record->size = size;
record->nreads = nreads;
record->loaded = 0;
record->dmax = dmax;
if (db->trimmed && tracklen != treads)
{ if (Late_Track_Trim(db,record,ispart))
goto error;
}
if (db->tracks != NULL && strcmp(db->tracks->name,".@qvs") == 0)
if (db->tracks != NULL && (db->tracks->name == qtrack_name || db->tracks->name == atrack_name))
{ record->next = db->tracks->next;
db->tracks->next = record;
}
......@@ -1399,6 +1785,8 @@ error:
free(record);
if (data != NULL)
free(data);
if (alen != NULL)
free(alen);
if (anno != NULL)
free(anno);
if (dfile != NULL)
......@@ -1407,6 +1795,134 @@ error:
EXIT (NULL);
}
// Allocate a data buffer large enough to hold the longest read data block that will occur
// in the track. If cannot allocate memory then return NULL if INTERACTIVE is defined,
// or print error to stderr and exit otherwise.
void *New_Track_Buffer(DAZZ_TRACK *track)
{ void *data;
data = (void *) Malloc(track->dmax,"Allocating New Track Data Buffer");
if (data == NULL)
EXIT(NULL);
return (data);
}
// Load into 'data' the read data block for read i's "track" data. Return the length of
// the data in bytes, unless an error occurs and INTERACTIVE is defined in which case
// return wtih -1.
int Load_Track_Data(DAZZ_TRACK *track, int i, void *data)
{ FILE *dfile;
int64 off;
int len;
if (i < 0 || i >= track->nreads)
{ EPRINTF(EPLACE,"%s: Index out of bounds (Load_Track_Data)\n",Prog_Name);
EXIT(-1);
}
if (track->size == 4)
off = ((int *) track->anno)[i];
else
off = ((int64 *) track->anno)[i];
len = track->alen[i];
if (track->loaded)
{ strncpy(data,(void *) track->data + off,len);
return (len);
}
dfile = (FILE *) track->data;
if (ftello(dfile) != off)
fseeko(dfile,off,SEEK_SET);
if (len > 0)
if (fread(data,len,1,dfile) != 1)
{ EPRINTF(EPLACE,"%s: Failed read of .data file (Load_Track_Data)\n",Prog_Name);
EXIT(-1);
}
return (len);
}
// Allocate a block big enough for all the track data and read the data into it,
// reset the 'off' in each anno pointer to be its in-memory offset, and set the
// data pointer to point at the block after closing the data file. Return with a
// zero, except when an error occurs and INTERACTIVE is defined in which
// case return wtih 1.
int Load_All_Track_Data(DAZZ_TRACK *track)
{ FILE *dfile;
void *data;
int *alen;
int64 dlen, off, o;
int i, len, nreads;
if (track->loaded || track->data == NULL)
return (0);
nreads = track->nreads;
dfile = (FILE *) track->data;
alen = track->alen;
dlen = 0;
for (i = 0; i < nreads; i++)
dlen += alen[i];
data = (void *) Malloc(dlen,"Allocating All Track Data");
if (data == NULL)
EXIT(1);
o = 0;
if (track->size == 4)
{ int *anno4 = (int *) track->anno;
for (i = 0; i < nreads; i++)
{ len = alen[i];
off = anno4[i];
if (ftello(dfile) != off)
fseeko(dfile,off,SEEK_SET);
if (len > 0)
{ if (fread(data+o,len,1,dfile) != 1)
{ EPRINTF(EPLACE,"%s: Read of .data failed (Load_All_Track_Data)\n",Prog_Name);
free(data);
EXIT(1);
}
}
anno4[i] = o;
o += len;
}
anno4[nreads] = o;
}
else
{ int64 *anno8 = (int64 *) track->anno;
for (i = 0; i < nreads; i++)
{ len = alen[i];
off = anno8[i];
if (ftello(dfile) != off)
fseeko(dfile,off,SEEK_SET);
if (len > 0)
{ if (fread(data+o,len,1,dfile) != 1)
{ EPRINTF(EPLACE,"%s: Read of .data failed (Load_All_Track_Data)\n",Prog_Name);
free(data);
EXIT(1);
}
}
anno8[i] = o;
o += len;
}
anno8[nreads] = o;
}
fclose(dfile);
track->data = (void *) data;
track->loaded = 1;
return (0);
}
// Assumming file pointer for afile is correctly positioned at the start of a extra item,
// and aname is the name of the .anno file, decode the value present and places it in
// extra if extra->nelem == 0, otherwise reduce the value just read into extra according
......@@ -1542,32 +2058,28 @@ error:
int Write_Extra(FILE *afile, DAZZ_EXTRA *extra)
{ int slen;
#define EWRITE(v,s,n,file) \
{ if (fwrite(v,s,n,file) != (size_t) n) \
{ fprintf(stderr,"%s: System error, read failed!\n",Prog_Name); \
EXIT(1); \
} \
}
EWRITE(&(extra->vtype),sizeof(int),1,afile)
FWRITE(&(extra->nelem),sizeof(int),1,afile)
FWRITE(&(extra->accum),sizeof(int),1,afile)
FFWRITE(&(extra->vtype),sizeof(int),1,afile)
FFWRITE(&(extra->nelem),sizeof(int),1,afile)
FFWRITE(&(extra->accum),sizeof(int),1,afile)
slen = strlen(extra->name);
FWRITE(&slen,sizeof(int),1,afile)
FWRITE(extra->name,1,slen,afile)
FWRITE(extra->value,8,extra->nelem,afile)
FFWRITE(&slen,sizeof(int),1,afile)
FFWRITE(extra->name,1,slen,afile)
FFWRITE(extra->value,8,extra->nelem,afile)
return (0);
}
void Close_Track(DAZZ_DB *db, char *track)
void Close_Track(DAZZ_DB *db, DAZZ_TRACK *track)
{ DAZZ_TRACK *record, *prev;
prev = NULL;
for (record = db->tracks; record != NULL; record = record->next)
{ if (strcmp(record->name,track) == 0)
{ if (track == record)
{ free(record->anno);
if (record->loaded)
free(record->data);
else
fclose((FILE *) record->data);
free(record->name);
if (prev == NULL)
db->tracks = record->next;
......@@ -1576,186 +2088,252 @@ void Close_Track(DAZZ_DB *db, char *track)
free(record);
return;
}
prev = record;
prev = record;
}
return;
}
/*******************************************************************************************
*
* QV OPEN, BUFFER ALLOCATION, LOAD, & CLOSE ROUTINES
*
********************************************************************************************/
DAZZ_DB *Active_DB = NULL; // Last db/qv used by "Load_QVentry"
DAZZ_QV *Active_QV; // Becomes invalid after closing
int Open_QVs(DAZZ_DB *db)
{ FILE *quiva, *istub, *indx;
char *root;
uint16 *table;
DAZZ_QV *qvtrk;
QVcoding *coding, *nx;
int ncodes = 0;
if (db->tracks != NULL && db->tracks->name == qtrack_name)
return (0);
if (db->trimmed)
{ EPRINTF(EPLACE,"%s: Cannot load QVs after trimming the DB\n",Prog_Name);
EXIT(1);
}
if (db->reads[db->nreads-1].coff < 0)
{ if (db->part > 0)
{ EPRINTF(EPLACE,"%s: All QVs for this block have not been added to the DB!\n",Prog_Name);
EXIT(1);
}
else
{ EPRINTF(EPLACE,"%s: All QVs for this DB have not been added!\n",Prog_Name);
EXIT(1);
}
return;
}
// Open .qvs, .idx, and .db files
/*******************************************************************************************
*
* READ BUFFER ALLOCATION AND READ ACCESS
*
********************************************************************************************/
// Allocate and return a buffer big enough for the largest read in 'db', leaving room
// for an initial delimiter character
quiva = Fopen(MyCatenate(db->path,"","",".qvs"),"r");
if (quiva == NULL)
return (-1);
char *New_Read_Buffer(DAZZ_DB *db)
{ char *read;
istub = NULL;
indx = NULL;
table = NULL;
coding = NULL;
qvtrk = NULL;
read = (char *) Malloc(db->maxlen+4,"Allocating New Read Buffer");
if (read == NULL)
EXIT(NULL);
return (read+1);
root = rindex(db->path,'/');
if (root[1] == '.')
{ *root = '\0';
istub = Fopen(MyCatenate(db->path,"/",root+2,".db"),"r");
*root = '/';
}
else
istub = Fopen(MyCatenate(db->path,"","",".db"),"r");
if (istub == NULL)
goto error;
// Load into 'read' the i'th read in 'db'. As an upper case ASCII string if ascii is 2, as a
// lower-case ASCII string is ascii is 1, and as a numeric string over 0(A), 1(C), 2(G), and
// 3(T) otherwise.
//
// **NB**, the byte before read will be set to a delimiter character!
int Load_Read(DAZZ_DB *db, int i, char *read, int ascii)
{ FILE *bases = (FILE *) db->bases;
int64 off;
int len, clen;
DAZZ_READ *r = db->reads;
{ int first, last, nfiles;
char prolog[MAX_NAME], fname[MAX_NAME];
int i, j;
if (i >= db->nreads)
{ EPRINTF(EPLACE,"%s: Index out of bounds (Load_Read)\n",Prog_Name);
EXIT(1);
}
if (bases == NULL)
{ bases = Fopen(Catenate(db->path,"","",".bps"),"r");
if (bases == NULL)
EXIT(1);
db->bases = (void *) bases;
if (fscanf(istub,DB_NFILE,&nfiles) != 1)
{ EPRINTF(EPLACE,"%s: Stub file (.db) of %s is junk\n",Prog_Name,root);
goto error;
}
off = r[i].boff;
len = r[i].rlen;
if (db->part > 0)
{ int pfirst, plast;
int fbeg, fend;
int n, k;
FILE *indx;
if (ftello(bases) != off)
fseeko(bases,off,SEEK_SET);
clen = COMPRESSED_LEN(len);
if (clen > 0)
{ if (fread(read,clen,1,bases) != 1)
{ EPRINTF(EPLACE,"%s: Failed read of .bps file (Load_Read)\n",Prog_Name);
EXIT(1);
}
// Determine first how many and which files span the block (fbeg to fend)
pfirst = db->ufirst;
plast = pfirst + db->nreads;
first = 0;
for (fbeg = 0; fbeg < nfiles; fbeg++)
{ if (fscanf(istub,DB_FDATA,&last,fname,prolog) != 3)
{ EPRINTF(EPLACE,"%s: Stub file (.db) of %s is junk\n",Prog_Name,root);
goto error;
}
Uncompress_Read(len,read);
if (ascii == 1)
{ Lower_Read(read);
read[-1] = '\0';
if (last > pfirst)
break;
first = last;
}
else if (ascii == 2)
{ Upper_Read(read);
read[-1] = '\0';
for (fend = fbeg+1; fend <= nfiles; fend++)
{ if (last >= plast)
break;
if (fscanf(istub,DB_FDATA,&last,fname,prolog) != 3)
{ EPRINTF(EPLACE,"%s: Stub file (.db) of %s is junk\n",Prog_Name,root);
goto error;
}
else
read[-1] = 4;
return (0);
first = last;
}
// Load into 'read' the i'th arrow in 'db'. As an ASCII string if ascii is 1,
// and as a numeric string otherwise.
//
indx = Fopen(MyCatenate(db->path,"","",".idx"),"r");
ncodes = fend-fbeg;
coding = (QVcoding *) Malloc(sizeof(QVcoding)*ncodes,"Allocating coding schemes");
table = (uint16 *) Malloc(sizeof(uint16)*db->nreads,"Allocating QV table indices");
if (indx == NULL || coding == NULL || table == NULL)
{ ncodes = 0;
goto error;
}
DAZZ_DB *Arrow_DB = NULL; // Last db/arw used by "Load_Arrow"
FILE *Arrow_File = NULL; // Becomes invalid after closing
// Carefully get the first coding scheme (its offset is most likely in a DAZZ_RECORD
// in .idx that is *not* in memory). Get all the other coding schemes normally and
// assign the tables # for each read in the block in "tables".
int Load_Arrow(DAZZ_DB *db, int i, char *read, int ascii)
{ FILE *arrow;
int64 off;
int len, clen;
DAZZ_READ *r = db->reads;
rewind(istub);
(void) fscanf(istub,DB_NFILE,&nfiles);
if (i >= db->nreads)
{ EPRINTF(EPLACE,"%s: Index out of bounds (Load_Arrow)\n",Prog_Name);
EXIT(1);
}
if (Arrow_DB != db)
{ if (Arrow_File != NULL)
fclose(Arrow_File);
arrow = Fopen(Catenate(db->path,"","",".arw"),"r");
if (arrow == NULL)
EXIT(1);
Arrow_File = arrow;
Arrow_DB = db;
first = 0;
for (n = 0; n < fbeg; n++)
{ (void) fscanf(istub,DB_FDATA,&last,fname,prolog);
first = last;
}
else
arrow = Arrow_File;
off = r[i].boff;
len = r[i].rlen;
for (n = fbeg; n < fend; n++)
{ (void) fscanf(istub,DB_FDATA,&last,fname,prolog);
if (ftello(arrow) != off)
fseeko(arrow,off,SEEK_SET);
clen = COMPRESSED_LEN(len);
if (clen > 0)
{ if (fread(read,clen,1,arrow) != 1)
{ EPRINTF(EPLACE,"%s: Failed read of .bps file (Load_Arrow)\n",Prog_Name);
EXIT(1);
i = n-fbeg;
if (first < pfirst)
{ DAZZ_READ read;
fseeko(indx,sizeof(DAZZ_DB) + sizeof(DAZZ_READ)*first,SEEK_SET);
if (fread(&read,sizeof(DAZZ_READ),1,indx) != 1)
{ EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
ncodes = i;
goto error;
}
fseeko(quiva,read.coff,SEEK_SET);
nx = Read_QVcoding(quiva);
if (nx == NULL)
{ ncodes = i;
goto error;
}
Uncompress_Read(len,read);
if (ascii == 1)
{ Letter_Arrow(read);
read[-1] = '\0';
coding[i] = *nx;
}
else
read[-1] = 4;
return (0);
{ fseeko(quiva,db->reads[first-pfirst].coff,SEEK_SET);
nx = Read_QVcoding(quiva);
if (nx == NULL)
{ ncodes = i;
goto error;
}
coding[i] = *nx;
db->reads[first-pfirst].coff = ftello(quiva);
}
char *Load_Subread(DAZZ_DB *db, int i, int beg, int end, char *read, int ascii)
{ FILE *bases = (FILE *) db->bases;
int64 off;
int len, clen;
int bbeg, bend;
DAZZ_READ *r = db->reads;
j = first-pfirst;
if (j < 0)
j = 0;
k = last-pfirst;
if (k > db->nreads)
k = db->nreads;
while (j < k)
table[j++] = (uint16) i;
if (i >= db->nreads)
{ EPRINTF(EPLACE,"%s: Index out of bounds (Load_Read)\n",Prog_Name);
EXIT(NULL);
first = last;
}
if (bases == NULL)
{ bases = Fopen(Catenate(db->path,"","",".bps"),"r");
if (bases == NULL)
EXIT(NULL);
db->bases = (void *) bases;
fclose(indx);
indx = NULL;
}
bbeg = beg/4;
bend = (end-1)/4+1;
else
{ // Load in coding scheme for each file, adjust .coff of first read in the file, and
// record which table each read uses
off = r[i].boff + bbeg;
len = end - beg;
ncodes = nfiles;
coding = (QVcoding *) Malloc(sizeof(QVcoding)*nfiles,"Allocating coding schemes");
table = (uint16 *) Malloc(sizeof(uint16)*db->nreads,"Allocating QV table indices");
if (coding == NULL || table == NULL)
goto error;
if (ftello(bases) != off)
fseeko(bases,off,SEEK_SET);
clen = bend-bbeg;
if (clen > 0)
{ if (fread(read,clen,1,bases) != 1)
{ EPRINTF(EPLACE,"%s: Failed read of .bps file (Load_Read)\n",Prog_Name);
EXIT(NULL);
first = 0;
for (i = 0; i < nfiles; i++)
{ if (fscanf(istub,DB_FDATA,&last,fname,prolog) != 3)
{ EPRINTF(EPLACE,"%s: Stub file (.db) of %s is junk\n",Prog_Name,root);
goto error;
}
fseeko(quiva,db->reads[first].coff,SEEK_SET);
nx = Read_QVcoding(quiva);
if (nx == NULL)
{ ncodes = i;
goto error;
}
Uncompress_Read(4*clen,read);
read += beg%4;
read[len] = 4;
if (ascii == 1)
{ Lower_Read(read);
read[-1] = '\0';
coding[i] = *nx;
db->reads[first].coff = ftello(quiva);
for (j = first; j < last; j++)
table[j] = (uint16) i;
first = last;
}
else if (ascii == 2)
{ Upper_Read(read);
read[-1] = '\0';
}
else
read[-1] = 4;
return (read);
// Allocate and fill in the DAZZ_QV record and add it to the front of the
// track list
qvtrk = (DAZZ_QV *) Malloc(sizeof(DAZZ_QV),"Allocating QV pseudo-track");
if (qvtrk == NULL)
goto error;
qvtrk->name = qtrack_name;
if (qvtrk->name == NULL)
goto error;
qvtrk->next = db->tracks;
db->tracks = (DAZZ_TRACK *) qvtrk;
qvtrk->ncodes = ncodes;
qvtrk->table = table;
qvtrk->coding = coding;
qvtrk->quiva = quiva;
}
fclose(istub);
return (0);
/*******************************************************************************************
*
* QV BUFFER ALLOCATION QV READ ACCESS
*
********************************************************************************************/
error:
if (qvtrk != NULL)
free(qvtrk);
if (table != NULL)
free(table);
if (coding != NULL)
{ int i;
for (i = 0; i < ncodes; i++)
Free_QVcoding(coding+i);
free(coding);
}
if (indx != NULL)
fclose(indx);
if (istub != NULL)
fclose(istub);
fclose(quiva);
EXIT(1);
}
// Allocate and return a buffer of 5 vectors big enough for the largest read in 'db'
......@@ -1783,13 +2361,14 @@ int Load_QVentry(DAZZ_DB *db, int i, char **entry, int ascii)
if (db != Active_DB)
{ if (db->tracks == NULL || strcmp(db->tracks->name,".@qvs") != 0)
{ EPRINTF(EPLACE,"%s: QV's are not loaded (Load_QVentry)\n",Prog_Name);
{ EPRINTF(EPLACE,"%s: QV's have not been opened (Load_QVentry)\n",Prog_Name);
EXIT(1);
}
Active_QV = (DAZZ_QV *) db->tracks;
Active_DB = db;
}
if (i >= db->nreads)
if (i < 0 || i >= db->nreads)
{ EPRINTF(EPLACE,"%s: Index out of bounds (Load_QVentry)\n",Prog_Name);
EXIT(1);
}
......@@ -1823,187 +2402,33 @@ int Load_QVentry(DAZZ_DB *db, int i, char **entry, int ascii)
return (0);
}
// Close the QV stream, free the QV pseudo track and all associated memory
/*******************************************************************************************
*
* BLOCK LOAD OF ALL READS (PRIMARILY FOR DALIGNER)
*
********************************************************************************************/
// Allocate a block big enough for all the uncompressed sequences, read them into it,
// reset the 'off' in each read record to be its in-memory offset, and set the
// bases pointer to point at the block after closing the bases file. If ascii is
// non-zero then the reads are converted to ACGT ascii, otherwise the reads are left
// as numeric strings over 0(A), 1(C), 2(G), and 3(T).
int Read_All_Sequences(DAZZ_DB *db, int ascii)
{ FILE *bases;
int nreads = db->nreads;
DAZZ_READ *reads = db->reads;
void (*translate)(char *s);
char *seq;
int64 o, off;
int i, len, clen;
bases = Fopen(Catenate(db->path,"","",".bps"),"r");
if (bases == NULL)
EXIT(1);
seq = (char *) Malloc(db->totlen+nreads+4,"Allocating All Sequence Reads");
if (seq == NULL)
{ fclose(bases);
EXIT(1);
}
*seq++ = 4;
if (ascii == 1)
translate = Lower_Read;
else
translate = Upper_Read;
o = 0;
for (i = 0; i < nreads; i++)
{ len = reads[i].rlen;
off = reads[i].boff;
if (ftello(bases) != off)
fseeko(bases,off,SEEK_SET);
clen = COMPRESSED_LEN(len);
if (clen > 0)
{ if (fread(seq+o,clen,1,bases) != 1)
{ EPRINTF(EPLACE,"%s: Read of .bps file failed (Read_All_Sequences)\n",Prog_Name);
free(seq);
fclose(bases);
EXIT(1);
}
}
Uncompress_Read(len,seq+o);
if (ascii)
translate(seq+o);
reads[i].boff = o;
o += (len+1);
}
reads[nreads].boff = o;
fclose(bases);
db->bases = (void *) seq;
db->loaded = 1;
return (0);
}
// For the DB or DAM "path" = "prefix/root.[db|dam]", find all the files for that DB, i.e. all
// those of the form "prefix/[.]root.part" and call actor with the complete path to each file
// pointed at by path, and the suffix of the path by extension. The . proceeds the root
// name if the defined constant HIDE_FILES is set. Always the first call is with the
// path "prefix/root.[db|dam]" and extension "db" or "dam". There will always be calls for
// "prefix/[.]root.idx" and "prefix/[.]root.bps". All other calls are for *tracks* and
// so this routine gives one a way to know all the tracks associated with a given DB.
// -1 is returned if the path could not be found, and 1 is returned if an error (reported
// to EPLACE) occured and INTERACTIVE is defined. Otherwise a 0 is returned.
int List_DB_Files(char *path, void actor(char *path, char *extension))
{ int status, plen, rlen, dlen;
char *root, *pwd, *name;
int isdam;
DIR *dirp;
struct dirent *dp;
status = 0;
pwd = PathTo(path);
plen = strlen(path);
if (strcmp(path+(plen-4),".dam") == 0)
root = Root(path,".dam");
else
root = Root(path,".db");
rlen = strlen(root);
if (root == NULL || pwd == NULL)
{ free(pwd);
free(root);
EXIT(1);
}
if ((dirp = opendir(pwd)) == NULL)
{ EPRINTF(EPLACE,"%s: Cannot open directory %s (List_DB_Files)\n",Prog_Name,pwd);
status = -1;
goto error;
}
isdam = 0;
while ((dp = readdir(dirp)) != NULL) // Get case dependent root name (if necessary)
{ name = dp->d_name;
if (strcmp(name,Catenate("","",root,".db")) == 0)
break;
if (strcmp(name,Catenate("","",root,".dam")) == 0)
{ isdam = 1;
break;
}
}
if (dp == NULL)
{ status = -1;
closedir(dirp);
goto error;
}
if (isdam)
actor(Catenate(pwd,"/",root,".dam"),"dam");
else
actor(Catenate(pwd,"/",root,".db"),"db");
rewinddir(dirp); // Report each auxiliary file
while ((dp = readdir(dirp)) != NULL)
{ name = dp->d_name;
dlen = strlen(name);
#ifdef HIDE_FILES
if (name[0] != '.')
continue;
dlen -= 1;
name += 1;
#endif
if (dlen < rlen+1)
continue;
if (name[rlen] != '.')
continue;
if (strncmp(name,root,rlen) != 0)
continue;
actor(Catenate(pwd,PATHSEP,name,""),name+(rlen+1));
}
closedir(dirp);
error:
free(pwd);
free(root);
return (status);
}
void Close_QVs(DAZZ_DB *db)
{ DAZZ_TRACK *track;
DAZZ_QV *qvtrk;
int i;
void Print_Read(char *s, int width)
{ int i;
Active_DB = NULL;
if (s[0] < 4)
{ for (i = 0; s[i] != 4; i++)
{ if (i%width == 0 && i != 0)
printf("\n");
printf("%d",s[i]);
}
printf("\n");
}
else
{ for (i = 0; s[i] != '\0'; i++)
{ if (i%width == 0 && i != 0)
printf("\n");
printf("%c",s[i]);
}
printf("\n");
track = db->tracks;
if (track != NULL && strcmp(track->name,".@qvs") == 0)
{ qvtrk = (DAZZ_QV *) track;
for (i = 0; i < qvtrk->ncodes; i++)
Free_QVcoding(qvtrk->coding+i);
free(qvtrk->coding);
free(qvtrk->table);
fclose(qvtrk->quiva);
db->tracks = track->next;
free(track);
}
return;
}
/*******************************************************************************************
*
* COMMAND LINE BLOCK PARSER
* COMMAND LINE @-EXPANSION PARSER
* Take a command line argument and interpret the '@' block number ranges.
* Parse_Block_Arg produces an Block_Looper iterator object that can then
* be invoked multiple times to iterate through all the files implied by
......@@ -2014,18 +2439,52 @@ void Print_Read(char *s, int width)
typedef struct
{ int first, last, next;
char *root, *pwd, *ppnt;
int isDB;
char *slice;
} _Block_Looper;
// Advance the iterator e_parse to the next file, open it, and return the file pointer
// to it. Return NULL if at the end of the list of files.
int Next_Block_Exists(Block_Looper *e_parse)
{ _Block_Looper *parse = (_Block_Looper *) e_parse;
char *disp;
struct stat sts;
if (parse->isDB)
{ if (parse->next+1 > parse->last)
return (0);
else
return (1);
}
if (parse->next+1 > parse->last)
return (0);
if (parse->next < 0)
disp = parse->root;
else
disp = MyNumbered_Suffix(parse->root,parse->next+1,parse->ppnt);
if (stat(MyCatenate(parse->pwd,"/",disp,".las"),&sts))
return (0);
else
return (1);
}
FILE *Next_Block_Arg(Block_Looper *e_parse)
{ _Block_Looper *parse = (_Block_Looper *) e_parse;
char *disp;
FILE *input;
if (parse->isDB)
{ fprintf(stderr,"%s: Cannot open a DB block as a file (Next_Block_Arg)\n",Prog_Name);
exit (1);
}
parse->next += 1;
if (parse->next > parse->last)
return (NULL);
......@@ -2033,9 +2492,9 @@ FILE *Next_Block_Arg(Block_Looper *e_parse)
if (parse->next < 0)
disp = parse->root;
else
disp = Numbered_Suffix(parse->root,parse->next,parse->ppnt);
disp = MyNumbered_Suffix(parse->root,parse->next,parse->ppnt);
if ((input = fopen(Catenate(parse->pwd,"/",disp,".las"),"r")) == NULL)
if ((input = fopen(MyCatenate(parse->pwd,"/",disp,".las"),"r")) == NULL)
{ if (parse->last != INT_MAX)
{ fprintf(stderr,"%s: %s.las is not present\n",Prog_Name,disp);
exit (1);
......@@ -2053,23 +2512,38 @@ void Reset_Block_Arg(Block_Looper *e_parse)
parse->next = parse->first - 1;
}
// Advance the iterator e_parse to the next file
int Advance_Block_Arg(Block_Looper *e_parse)
{ _Block_Looper *parse = (_Block_Looper *) e_parse;
if (Next_Block_Exists(e_parse))
{ parse->next += 1;
return (1);
}
else
return (0);
}
// Return a pointer to the path for the current file
char *Block_Arg_Path(Block_Looper *e_parse)
{ _Block_Looper *parse = (_Block_Looper *) e_parse;
return (parse->pwd);
return (Strdup(parse->pwd,"Allocating block path"));
}
// Return a pointer to the root name for the current file
char *Block_Arg_Root(Block_Looper *e_parse)
{ _Block_Looper *parse = (_Block_Looper *) e_parse;
char *name;
if (parse->next < 0)
return (parse->root);
name = parse->root;
else
return (Numbered_Suffix(parse->root,parse->next,parse->ppnt));
name = MyNumbered_Suffix(parse->root,parse->next,parse->ppnt);
return (Strdup(name,"Allocating block root"));
}
// Free the iterator
......@@ -2093,6 +2567,11 @@ char *Next_Block_Slice(Block_Looper *e_parse, int slice)
exit (1);
}
if (parse->next+1 > parse->last)
return (NULL);
if (parse->next+slice > parse->last)
slice = parse->last-parse->next;
if (parse->first < 0)
sprintf(parse->slice,"%s/%s",parse->pwd,parse->root);
else
......@@ -2105,7 +2584,7 @@ char *Next_Block_Slice(Block_Looper *e_parse, int slice)
// Parse the command line argument and return an iterator to move through the
// file names, setting it up to report the first file.
Block_Looper *Parse_Block_Arg(char *arg)
static Block_Looper *parse_block_arg(char *arg, int isDB)
{ _Block_Looper *parse;
char *pwd, *root;
char *ppnt, *cpnt;
......@@ -2113,6 +2592,16 @@ Block_Looper *Parse_Block_Arg(char *arg)
parse = (_Block_Looper *) Malloc(sizeof(_Block_Looper),"Allocating parse node");
pwd = PathTo(arg);
if (isDB)
{ int len = strlen(arg);
if (strcmp(arg+(len-4),".dam") == 0)
{ root = Root(arg,".dam");
isDB = 2;
}
else
root = Root(arg,".db");
}
else
root = Root(arg,".las");
if (parse == NULL || pwd == NULL || root == NULL)
exit (1);
......@@ -2133,9 +2622,9 @@ Block_Looper *Parse_Block_Arg(char *arg)
last = INT_MAX;
}
else
{ if (first < 0)
{ if (first < 1)
{ fprintf(stderr,
"%s: Integer following %c-sigan is less than 0 in source name '%s'\n",
"%s: Integer following %c-sigan is less than 1 in source name '%s'\n",
Prog_Name,BLOCK_SYMBOL,root);
exit (1);
}
......@@ -2169,5 +2658,42 @@ Block_Looper *Parse_Block_Arg(char *arg)
parse->last = last;
parse->next = first-1;
parse->slice = NULL;
parse->isDB = isDB;
if (isDB && first >= 0 && last == INT_MAX)
{ char buffer[2*MAX_NAME+100];
char *dbname;
FILE *dbfile;
int i, nfiles, nblocks;
dbname = MyCatenate(pwd,"/",root,"db");
dbfile = fopen(dbname,"r");
if (dbfile == NULL)
{ dbname = MyCatenate(pwd,"/",root,"dam");
dbfile = fopen(dbname,"r");
if (dbfile == NULL)
{ fprintf(stderr,"%s: Cannot open database %s[db|dam]\n",Prog_Name,root);
exit (1);
}
}
if (fscanf(dbfile,DB_NFILE,&nfiles) != 1)
SYSTEM_READ_ERROR
for (i = 0; i < nfiles; i++)
if (fgets(buffer,2*MAX_NAME+100,dbfile) == NULL)
SYSTEM_READ_ERROR
if (fscanf(dbfile,DB_NBLOCK,&nblocks) != 1)
SYSTEM_READ_ERROR
fclose(dbfile);
parse->last = nblocks;
}
return ((Block_Looper *) parse);
}
Block_Looper *Parse_Block_LAS_Arg(char *arg)
{ return (parse_block_arg(arg, 0)); }
Block_Looper *Parse_Block_DB_Arg(char *arg)
{ return (parse_block_arg(arg, 1)); }
......@@ -150,7 +150,7 @@ int Count_Args(char *arg);
// Output
#define FWRITE(v,s,n,file) \
#define FFWRITE(v,s,n,file) \
{ if (fwrite(v,s,n,file) != (size_t) n) \
SYSTEM_WRITE_ERROR \
}
......@@ -179,7 +179,7 @@ int Count_Args(char *arg);
// Input
#define FREAD(v,s,n,file) \
#define FFREAD(v,s,n,file) \
{ if (fread(v,s,n,file) != (size_t) n) \
{ if (ferror(file)) \
SYSTEM_READ_ERROR \
......@@ -261,6 +261,7 @@ void Print_Read(char *s, int width);
void Lower_Read(char *s); // Convert read from numbers to lowercase letters (0-3 to acgt)
void Upper_Read(char *s); // Convert read from numbers to uppercase letters (0-3 to ACGT)
void Number_Read(char *s); // Convert read from letters to numbers
void Change_Read(char *s); // Convert read from one case to the other
void Letter_Arrow(char *s); // Convert arrow pw's from numbers to uppercase letters (0-3 to 1234)
void Number_Arrow(char *s); // Convert arrow pw string from letters to numbers
......@@ -273,8 +274,8 @@ void Number_Arrow(char *s); // Convert arrow pw string from letters to number
********************************************************************************************/
#define DB_QV 0x03ff // Mask for 3-digit quality value
#define DB_CSS 0x0400 // This is the second or later of a group of reads from a given insert
#define DB_BEST 0x0800 // This is the longest read of a given insert (may be the only 1)
#define DB_CSS 0x0400 // This is the second or later of a group of subreads from a given insert
#define DB_BEST 0x0800 // This is the "best" subread of a given insert (may be the only 1)
#define DB_ARROW 0x2 // DB is an arrow DB
#define DB_ALL 0x1 // all wells are in the trimmed DB
......@@ -299,13 +300,19 @@ typedef struct
// contains the variable length data
// data != NULL && size == 8: anno is an array of nreads+1 int64's and data[anno[i]..anno[i+1])
// contains the variable length data
// if open is set then the data is not loaded if present, rather data is an open file pointer
// set for reading.
typedef struct _track
{ struct _track *next; // Link to next track
char *name; // Symbolic name of track
int size; // Size in bytes of anno records
int nreads; // Number of reads in track
void *anno; // over [0,nreads]: read i annotation: int, int64, or 'size' records
void *data; // data[anno[i] .. anno[i+1]-1] is data if data != NULL
int *alen; // length of track data for read i (if data != NULL)
void *data; // data[anno[i] .. anno[i]+alen[i[) is data for read i (if data != NULL)
int loaded; // Is track data loaded in memory?
int64 dmax; // Largest read data segment in bytes
} DAZZ_TRACK;
// The tailing part of a .anno track file can contain meta-information produced by the
......@@ -345,6 +352,19 @@ typedef struct
FILE *quiva; // the open file pointer to the .qvs file
} DAZZ_QV;
// The information for accessing Arrow streams is in a DAZZ_ARW record that is a "pseudo-track"
// named ".@arw" and is always the first track record in the list (if present).
// Since normal track names cannot begin with a . (this is enforced), this pseudo-track
// is never confused with a normal track.
typedef struct
{ struct _track *next;
char *name;
int64 *aoff; // offset in file or memory of arrow vector for read i
void *arrow; // FILE * to the .arw file if not loaded, memory block otherwise
int loaded; // Are arrow vectors loaded in memory?
} DAZZ_ARROW;
// The DB record holds all information about the current state of an active DB including an
// array of DAZZ_READS, one per read, and a linked list of DAZZ_TRACKs the first of which
// is always a DAZZ_QV pseudo-track (if the QVs have been loaded).
......@@ -393,7 +413,7 @@ typedef struct
#define DB_NFILE "files = %9d\n" // number of files
#define DB_FDATA " %9d %s %s\n" // last read index + 1, fasta prolog, file name
#define DB_NBLOCK "blocks = %9d\n" // number of blocks
#define DB_PARAMS "size = %10lld cutoff = %9d all = %1d\n" // block size, len cutoff, all in well
#define DB_PARAMS "size = %11lld cutoff = %9d all = %1d\n" // block size, len cutoff, all in well
#define DB_BDATA " %9d %9d\n" // First read index (untrimmed), first read index (trimmed)
......@@ -430,26 +450,104 @@ int Open_DB(char *path, DAZZ_DB *db);
void Trim_DB(DAZZ_DB *db);
// Return the size in bytes of the given DB
int64 sizeof_DB(DAZZ_DB *db);
// For the DB or DAM "path" = "prefix/root.[db|dam]", find all the files for that DB, i.e. all
// those of the form "prefix/[.]root.part" and call actor with the complete path to each file
// pointed at by path, and the suffix of the path by extension. The . proceeds the root
// name if the defined constant HIDE_FILES is set. Always the first call is with the
// path "prefix/root.[db|dam]" and extension "db" or "dam". There will always be calls for
// "prefix/[.]root.idx" and "prefix/[.]root.bps". All other calls are for *tracks* and
// so this routine gives one a way to know all the tracks associated with a given DB.
// -1 is returned if the path could not be found, and 1 is returned if an error (reported
// to EPLACE) occured and INTERACTIVE is defined. Otherwise a 0 is returned.
int List_DB_Files(char *path, void actor(char *path, char *extension));
// Shut down an open 'db' by freeing all associated space, including tracks and QV structures,
// and any open file pointers. The record pointed at by db however remains (the user
// supplied it and so should free it).
void Close_DB(DAZZ_DB *db);
// Return the size in bytes of the given DB
int64 sizeof_DB(DAZZ_DB *db);
/*******************************************************************************************
*
* READ ROUTINES
*
********************************************************************************************/
// If QV pseudo track is not already in db's track list, then load it and set it up.
// The database must not have been trimmed yet. -1 is returned if a .qvs file is not
// present, and 1 is returned if an error (reported to EPLACE) occured and INTERACTIVE
// is defined. Otherwise a 0 is returned.
// Allocate and return a buffer big enough for the largest read in 'db'.
// **NB** free(x-1) if x is the value returned as *prefix* and suffix '\0'(4)-byte
// are needed by the alignment algorithms. If cannot allocate memory then return NULL
// if INTERACTIVE is defined, or print error to stderr and exit otherwise.
int Load_QVs(DAZZ_DB *db);
char *New_Read_Buffer(DAZZ_DB *db);
// Remove the QV pseudo track, all space associated with it, and close the .qvs file.
// Load into 'read' the i'th read in 'db'. As a lower case ascii string if ascii is 1, an
// upper case ascii string if ascii is 2, and a numeric string over 0(A), 1(C), 2(G), and 3(T)
// otherwise. A '\0' (or 4) is prepended and appended to the string so it has a delimeter
// for traversals in either direction. A non-zero value is returned if an error occured
// and INTERACTIVE is defined.
void Close_QVs(DAZZ_DB *db);
int Load_Read(DAZZ_DB *db, int i, char *read, int ascii);
// Load into 'read' the subread [beg,end] of the i'th read in 'db' and return a pointer to the
// the start of the subinterval (not necessarily = to read !!! ). As a lower case ascii
// string if ascii is 1, an upper case ascii string if ascii is 2, and a numeric string
// over 0(A), 1(C), 2(G), and 3(T) otherwise. A '\0' (or 4) is prepended and appended to
// the string holding the substring so it has a delimeter for traversals in either direction.
// A NULL pointer is returned if an error occured and INTERACTIVE is defined.
char *Load_Subread(DAZZ_DB *db, int i, int beg, int end, char *read, int ascii);
// Allocate a block big enough for all the uncompressed read sequences and read and uncompress
// the reads into it, reset the 'boff' in each read record to be its in-memory offset,
// and set the bases pointer to point at the block after closing the bases file. Return
// with a zero, except when an error occurs and INTERACTIVE is defined in which
// case return wtih 1.
int Load_All_Reads(DAZZ_DB *db, int ascii);
/*******************************************************************************************
*
* ARROW ROUTINES
*
********************************************************************************************/
// If the Arrow pseudo track is not already in db's track list, then load it and set it up.
// The database reads must not have been loaded with Load_All_Reads yet.
// -1 is returned if a .arw file is not present, and 1 is returned if an error (reported
// to EPLACE) occured and INTERACTIVE is defined. Otherwise a 0 is returned.
int Open_Arrow(DAZZ_DB *db);
// Exactly the same as Load_Read, save the arrow information is loaded, not the DNA sequence,
// and there is only a choice between numeric (0) or ascii (1);
int Load_Arrow(DAZZ_DB *db, int i, char *read, int ascii);
// Allocate a block big enough for all the uncompressed Arrow vectors, read them into it,
// reset the 'off' in each arrow record to be its in-memory offset, and set the
// arrow pointer to point at the block after closing the arrow file. If ascii is
// non-zero then the arrows are converted to 0123 ascii, otherwise the arrows are left
// as numeric strings over [0-3].
int Load_All_Arrows(DAZZ_DB *db, int ascii);
// Remove the Arrow pseudo track, all space associated with it, and close the .arw file.
void Close_Arrow(DAZZ_DB *);
/*******************************************************************************************
*
* TRACK ROUTINES
*
********************************************************************************************/
// Look up the file and header in the file of the indicated track. Return:
// 1: Track is for trimmed DB
......@@ -466,30 +564,50 @@ void Close_QVs(DAZZ_DB *db);
int Check_Track(DAZZ_DB *db, char *track, int *kind);
// If track is not already in the db's track list, then allocate all the storage for it,
// read it in from the appropriate file, add it to the track list, and return a pointer
// If track is not already in the db's track list, then allocate all the storage for the anno
// index, read it in from the appropriate file, add it to the track list, and return a pointer
// to the newly created DAZZ_TRACK record. If the track does not exist or cannot be
// opened for some reason, then NULL is returned if INTERACTIVE is defined. Otherwise
// the routine prints an error message to stderr and exits if an error occurs, and returns
// with NULL only if the track does not exist.
DAZZ_TRACK *Load_Track(DAZZ_DB *db, char *track);
DAZZ_TRACK *Open_Track(DAZZ_DB *db, char *track);
// Assumming file pointer for afile is correctly positioned at the start of a extra item,
// and aname is the name of the .anno file, decode the value present and places it in
// Allocate a data buffer large enough to hold the longest read data block that will occur
// in the track. If cannot allocate memory then return NULL if INTERACTIVE is defined,
// or print error to stderr and exit otherwise.
void *New_Track_Buffer(DAZZ_TRACK *track);
// Load into 'data' the read data block for read i's "track" data. Return the length of
// the data in bytes, unless an error occurs and INTERACTIVE is defined in which case
// return wtih -1.
int Load_Track_Data(DAZZ_TRACK *track, int i, void *data);
// Allocate a block big enough for all the track data and read the data into it,
// reset the 'off' in each anno pointer to be its in-memory offset, and set the
// data pointer to point at the block after closing the data file. Return with a
// zero, except when an error occurs and INTERACTIVE is defined in which
// case return wtih 1.
int Load_All_Track_Data(DAZZ_TRACK *track);
// Assumming file pointer for afile is correctly positioned at the start of an extra item,
// and aname is the name of the .anno file, decode the value present and place it in
// extra if extra->nelem == 0, otherwise reduce the value just read into extra according
// according the to the directive given by 'accum'. Leave the read poinrt at the next
// according to the directive given by 'accum'. Leave the read pointer at the next
// extra or end-of-file.
// Returns:
// 1 if at the end of file,
// 0 if item was read and folded correctly,
// -1 if there was a system IO or allocation error (if interactive), and
// -2 if the new value could not be reduced into the currenct value of extra (interactive)
// -2 if the new value could not be reduced into the current value of extra (interactive)
int Read_Extra(FILE *afile, char *aname, DAZZ_EXTRA *extra);
// Write extra record to end of file afile and advance write pointer
// If interactive, then return non-zero on error, if bash, then print
// If interactive, then return non-zero on error, if batch, then print
// and halt if an error
int Write_Extra(FILE *afile, DAZZ_EXTRA *extra);
......@@ -497,36 +615,21 @@ int Write_Extra(FILE *afile, DAZZ_EXTRA *extra);
// If track is on the db's track list, then it is removed and all storage associated with it
// is freed.
void Close_Track(DAZZ_DB *db, char *track);
// Allocate and return a buffer big enough for the largest read in 'db'.
// **NB** free(x-1) if x is the value returned as *prefix* and suffix '\0'(4)-byte
// are needed by the alignment algorithms. If cannot allocate memory then return NULL
// if INTERACTIVE is defined, or print error to stderr and exit otherwise.
char *New_Read_Buffer(DAZZ_DB *db);
// Load into 'read' the i'th read in 'db'. As a lower case ascii string if ascii is 1, an
// upper case ascii string if ascii is 2, and a numeric string over 0(A), 1(C), 2(G), and 3(T)
// otherwise. A '\0' (or 4) is prepended and appended to the string so it has a delimeter
// for traversals in either direction. A non-zero value is returned if an error occured
// and INTERACTIVE is defined.
void Close_Track(DAZZ_DB *db, DAZZ_TRACK *track);
int Load_Read(DAZZ_DB *db, int i, char *read, int ascii);
// Exactly the same as Load_Read, save the arrow information is loaded, not the DNA sequence,
// and there is only a choice between numeric (0) or ascii (1);
int Load_Arrow(DAZZ_DB *db, int i, char *read, int ascii);
/*******************************************************************************************
*
* QV ROUTINES
*
********************************************************************************************/
// Load into 'read' the subread [beg,end] of the i'th read in 'db' and return a pointer to the
// the start of the subinterval (not necessarily = to read !!! ). As a lower case ascii
// string if ascii is 1, an upper case ascii string if ascii is 2, and a numeric string
// over 0(A), 1(C), 2(G), and 3(T) otherwise. A '\0' (or 4) is prepended and appended to
// the string holding the substring so it has a delimeter for traversals in either direction.
// A NULL pointer is returned if an error occured and INTERACTIVE is defined.
// If QV pseudo track is not already in db's track list, then load it and set it up.
// The database must not have been trimmed yet. -1 is returned if a .qvs file is not
// present, and 1 is returned if an error (reported to EPLACE) occured and INTERACTIVE
// is defined. Otherwise a 0 is returned.
char *Load_Subread(DAZZ_DB *db, int i, int beg, int end, char *read, int ascii);
int Open_QVs(DAZZ_DB *db);
// Allocate a set of 5 vectors large enough to hold the longest QV stream that will occur
// in the database. If cannot allocate memory then return NULL if INTERACTIVE is defined,
......@@ -546,46 +649,38 @@ char **New_QV_Buffer(DAZZ_DB *db);
int Load_QVentry(DAZZ_DB *db, int i, char **entry, int ascii);
// Allocate a block big enough for all the uncompressed sequences, read them into it,
// reset the 'off' in each read record to be its in-memory offset, and set the
// bases pointer to point at the block after closing the bases file. If ascii is
// 1 then the reads are converted to lowercase ascii, if 2 then uppercase ascii, and
// otherwise the reads are left as numeric strings over 0(A), 1(C), 2(G), and 3(T).
// Return with a zero, except when an error occurs and INTERACTIVE is defined in which
// case return wtih 1.
// Remove the QV pseudo track, all space associated with it, and close the .qvs file.
int Read_All_Sequences(DAZZ_DB *db, int ascii);
void Close_QVs(DAZZ_DB *db);
// For the DB or DAM "path" = "prefix/root.[db|dam]", find all the files for that DB, i.e. all
// those of the form "prefix/[.]root.part" and call actor with the complete path to each file
// pointed at by path, and the suffix of the path by extension. The . proceeds the root
// name if the defined constant HIDE_FILES is set. Always the first call is with the
// path "prefix/root.[db|dam]" and extension "db" or "dam". There will always be calls for
// "prefix/[.]root.idx" and "prefix/[.]root.bps". All other calls are for *tracks* and
// so this routine gives one a way to know all the tracks associated with a given DB.
// -1 is returned if the path could not be found, and 1 is returned if an error (reported
// to EPLACE) occured and INTERACTIVE is defined. Otherwise a 0 is returned.
int List_DB_Files(char *path, void actor(char *path, char *extension));
/*******************************************************************************************
*
* @-SIGN EXPANSION ROUTINES
*
********************************************************************************************/
// Take a command line argument and interpret the '@' block number ranges.
// Parse_Block_Arg produces a Block_Looper iterator object that can then
// be invoked multiple times to iterate through all the files implied by
// Parse_Block_[LAS,DB]_Arg produces a Block_Looper iterator object that can then
// be invoked multiple times to iterate through all the file names implied by
// the @ pattern/range. Next_Block_Slice returns a string encoing the next
// slice files represented by an @-notation, and advances the iterator by
// that many files.
typedef void Block_Looper;
Block_Looper *Parse_Block_Arg(char *arg);
Block_Looper *Parse_Block_LAS_Arg(char *arg);
Block_Looper *Parse_Block_DB_Arg(char *arg);
int Next_Block_Exists(Block_Looper *e_parse);
FILE *Next_Block_Arg(Block_Looper *e_parse);
void Reset_Block_Arg(Block_Looper *e_parse); // Reset iterator to first file
int Advance_Block_Arg(Block_Looper *e_parse); // Advance iterator to next file, 0 if none
void Free_Block_Arg(Block_Looper *e_parse); // Free the iterator
char *Next_Block_Slice(Block_Looper *e_parse,int slice);
void Reset_Block_Arg(Block_Looper *e_parse); // Reset iterator to first file
char *Block_Arg_Path(Block_Looper *e_parse); // Path of current file
char *Block_Arg_Root(Block_Looper *e_parse); // Root name of current file
void Free_Block_Arg(Block_Looper *e_parse); // Free the iterator
char *Block_Arg_Path(Block_Looper *e_parse); // Path of current file, must free
char *Block_Arg_Root(Block_Looper *e_parse); // Root name of current file, must free
#endif // _DAZZ_DB
......@@ -24,17 +24,17 @@
#undef SLURM // define if want a directly executable SLURM script
static char *Usage[] =
{ "[-vbad] [-t<int>] [-w<int(6)>] [-l<int(1000)>] [-s<int(100)] [-M<int>]",
{ "[-vad] [-l<int(1500)>] [-s<int(100)] [-t<int>] [-M<int>]",
" [-P<dir(/tmp)>] [-B<int(4)>] [-T<int(4)>] [-f<name>]",
" ( [-k<int(14)>] [-h<int(35)>] [-e<double(.70)>] [-H<int>]",
" [-k<int(20)>] [-h<int(50)>] [-e<double(.85)>] <ref:db|dam> )",
" ( [-k<int(16)>] [-h<int(50)>] [-w<int(6)>] [-e<double(.75)>] [-H<int>]",
" [-k<int(20)>] [-h<int(50)>] [-w<int(6)>] [-e<double(.85)>] <ref:db|dam> )",
" [-m<track>]+ <reads:db|dam> [<first:int>[-<last:int>]]"
};
// Command Options
static int BUNIT;
static int VON, BON, CON, DON;
static int VON, CON, DON;
static int WINT, TINT, HGAP, HINT, KINT, SINT, LINT, MINT;
static int NTHREADS;
static double EREL;
......@@ -247,15 +247,13 @@ void daligner_script(int argc, char *argv[])
fprintf(out,"daligner");
if (VON)
fprintf(out," -v");
if (BON)
fprintf(out," -b");
if (CON)
fprintf(out," -a");
if (KINT != 14)
if (KINT != 16)
fprintf(out," -k%d",KINT);
if (WINT != 6)
fprintf(out," -w%d",WINT);
if (HINT != 35)
if (HINT != 50)
fprintf(out," -h%d",HINT);
if (TINT > 0)
fprintf(out," -t%d",TINT);
......@@ -263,7 +261,7 @@ void daligner_script(int argc, char *argv[])
fprintf(out," -H%d",HGAP);
if (EREL > 0.)
fprintf(out," -e%g",EREL);
if (LINT != 1000)
if (LINT != 1500)
fprintf(out," -l%d",LINT);
if (SINT != 100)
fprintf(out," -s%d",SINT);
......@@ -286,12 +284,12 @@ void daligner_script(int argc, char *argv[])
else
fprintf(out," %s",root);
hgh = (i*j)/bits + 1;
for (k = low; k < hgh; k++)
if (useblock)
if (usepath)
fprintf(out," %s/%s.%d",pwd,root,k);
fprintf(out," %s/%s.@%d-%d",pwd,root,low,hgh-1);
else
fprintf(out," %s.%d",root,k);
fprintf(out," %s.@%d-%d",root,low,hgh-1);
else
if (usepath)
fprintf(out," %s/%s",pwd,root);
......@@ -495,8 +493,8 @@ void daligner_script(int argc, char *argv[])
for (i = 1; i <= lblock; i++)
{ if (DON)
fprintf(out,"cd work%d; ",j);
printf("rm %s.%d.%s.*.las",root,i,root);
fprintf(out,"cd work%d; ",i);
fprintf(out,"rm %s.%d.%s.*.las",root,i,root);
if (DON)
fprintf(out,"; cd ..");
fprintf(out,"\n");
......@@ -766,8 +764,6 @@ void mapper_script(int argc, char *argv[])
fprintf(out,"daligner -A");
if (VON)
fprintf(out," -v");
if (BON)
fprintf(out," -b");
if (CON)
fprintf(out," -a");
fprintf(out," -k%d",KINT);
......@@ -989,7 +985,7 @@ int main(int argc, char *argv[])
BUNIT = 4;
TINT = 0;
WINT = 6;
LINT = 1000;
LINT = 1500;
SINT = 100;
MINT = -1;
PDIR = NULL;
......@@ -1008,7 +1004,7 @@ int main(int argc, char *argv[])
if (argv[i][0] == '-')
switch (argv[i][1])
{ default:
ARG_FLAGS("vbadAI");
ARG_FLAGS("vadAI");
break;
case 'e':
ARG_REAL(EREL)
......@@ -1072,7 +1068,6 @@ int main(int argc, char *argv[])
argc = j;
VON = flags['v'];
BON = flags['b'];
CON = flags['a'];
DON = flags['d'];
......@@ -1099,14 +1094,12 @@ int main(int argc, char *argv[])
fprintf(stderr," -T: Use -T threads.\n");
fprintf(stderr," -P: Do first level sort and merge in directory -P.\n");
fprintf(stderr," -m: Soft mask the blocks with the specified mask.\n");
fprintf(stderr," -b: For AT/GC biased data, compensate k-mer counts (deprecated).\n");
fprintf(stderr,"\n");
fprintf(stderr," Script control.\n");
fprintf(stderr," -v: Run all commands in script in verbose mode.\n");
fprintf(stderr," -a: Instruct LAsort & LAmerge to sort only on (a,ab).\n");
fprintf(stderr," -d: Put .las files for each target block in a sub-directory\n");
fprintf(stderr," -B: # of block compares per daligner job\n");
fprintf(stderr," -D: # of .las's to merge per LAmerge job\n");
fprintf(stderr," -f: Place script bundles in separate files with prefix <name>\n");
exit (1);
}
......@@ -1137,9 +1130,9 @@ int main(int argc, char *argv[])
}
else
{ if (KINT <= 0)
KINT = 14;
KINT = 16;
if (HINT <= 0)
HINT = 35;
HINT = 50;
}
for (j = 1; 2*j <= NTHREADS; j *= 2)
......
#include <stdlib.h>
#include <stdio.h>
#include "DB.h"
#include "align.h"
int main(int argc, char *argv[])
{ char code, which;
int64 total;
int aread, bread;
char orient, chain;
int alen, blen;
int ab, ae, bb, be;
int diffs;
int len;
int tspace, small;
uint8 *tbuffer = NULL;
uint16 *sbuffer;
(void) argv;
// Process arguments
if (argc > 1)
{ fprintf(stderr,"Usage: LAa2b <(ascii) >(binary)\n");
exit (1);
}
while (scanf(" %c",&code) == 1) // Header lines
if (code == '@' || code == '+' || code == '%')
{ scanf(" %c %lld",&which,&total);
fwrite(&code,sizeof(char),1,stdout);
fwrite(&which,sizeof(char),1,stdout);
fwrite(&total,sizeof(int64),1,stdout);
if (code == '@')
{ tbuffer = (uint8 *) malloc(2*total*sizeof(uint16));
sbuffer = (uint16 *) tbuffer;
}
}
else
{ ungetc(code,stdin);
break;
}
if (tbuffer != NULL)
{ if (code != 'X')
{ fprintf(stderr,"LAa2b: .las dump has traces but no X-line\n");
exit (1);
}
scanf(" X %d",&tspace);
small = (tspace <= TRACE_XOVR && tspace != 0);
fwrite(&code,sizeof(char),1,stdout);
fwrite(&tspace,sizeof(int),1,stdout);
}
while (scanf(" %c",&code) == 1) // For each data line do
{ fwrite(&code,sizeof(char),1,stdout);
switch (code)
{ case 'P': // Alignment pair
scanf(" %d %d %c %c",&aread,&bread,&orient,&chain);
fwrite(&aread,sizeof(int),1,stdout);
fwrite(&bread,sizeof(int),1,stdout);
fwrite(&orient,sizeof(char),1,stdout);
fwrite(&chain,sizeof(char),1,stdout);
break;
case 'L': // Read lengths
scanf(" %d %d",&alen,&blen);
fwrite(&len,sizeof(int),1,stdout);
fwrite(&blen,sizeof(int),1,stdout);
break;
case 'C': // Coordinate intervals
scanf(" %d %d %d %d",&ab,&ae,&bb,&be);
fwrite(&ab,sizeof(int),1,stdout);
fwrite(&ae,sizeof(int),1,stdout);
fwrite(&bb,sizeof(int),1,stdout);
fwrite(&be,sizeof(int),1,stdout);
break;
case 'D': // Differences
scanf(" %d",&diffs);
fwrite(&diffs,sizeof(int),1,stdout);
break;
case 'T': // Mask
if (tbuffer == NULL)
{ fprintf(stderr,"LAa2b: .las dump has traces but no @ T-line\n");
exit (1);
}
scanf(" %d",&len);
fwrite(&len,sizeof(int),1,stdout);
len *= 2;
if (small)
{ for (int i = 0; i < len; i += 2)
scanf(" %hhd %hhd",tbuffer+i,tbuffer+(i+1));
fwrite(tbuffer,sizeof(uint8),len,stdout);
}
else
{ for (int i = 0; i < len; i += 2)
scanf(" %hd %hd",sbuffer+i,sbuffer+(i+1));
fwrite(sbuffer,sizeof(uint16),len,stdout);
}
}
}
exit (0);
}
#include <stdlib.h>
#include <stdio.h>
#include "DB.h"
#include "align.h"
int main(int argc, char *argv[])
{ char code, which;
int64 total;
int aread, bread;
char orient, chain;
int alen, blen;
int ab, ae, bb, be;
int diffs;
int len;
int tspace, small;
uint8 *tbuffer = NULL;
uint16 *sbuffer;
(void) argv;
// Process arguments
if (argc > 1)
{ fprintf(stderr,"Usage: LAa2b <(ascii) >(binary)\n");
exit (1);
}
if (fread(&code,sizeof(char),1,stdin) == 0)
code = 0;
while (code == '@' || code == '+' || code == '%')
{ fread(&which,sizeof(char),1,stdin);
fread(&total,sizeof(int64),1,stdin);
printf("%c %c %lld\n",code,which,total);
if (code == '@')
{ tbuffer = (uint8 *) malloc(2*total*sizeof(uint16));
sbuffer = (uint16 *) tbuffer;
}
if (fread(&code,sizeof(char),1,stdin) == 0)
code = 0;
}
if (tbuffer != NULL && code != 0)
{ if (code != 'X')
{ fprintf(stderr,"LAb2a: .las dump has traces but no X-info\n");
exit (1);
}
fread(&tspace,sizeof(int),1,stdin);
small = (tspace <= TRACE_XOVR && tspace != 0);
printf("X %d\n",tspace);
}
while (code != 0) // For each data item do
{ switch (code)
{ case 'P': // Alignment pair
fread(&aread,sizeof(int),1,stdin);
fread(&bread,sizeof(int),1,stdin);
fread(&orient,sizeof(char),1,stdin);
fread(&chain,sizeof(char),1,stdin);
printf("%c %d %d %c %c\n",code,aread,bread,orient,chain);
break;
case 'L': // Read lengths
scanf(" %d %d",&alen,&blen);
fread(&len,sizeof(int),1,stdin);
fread(&blen,sizeof(int),1,stdin);
printf("%c %d %d\n",code,alen,blen);
break;
case 'C': // Coordinate intervals
fread(&ab,sizeof(int),1,stdin);
fread(&ae,sizeof(int),1,stdin);
fread(&bb,sizeof(int),1,stdin);
fread(&be,sizeof(int),1,stdin);
printf("%c %d %d %d %d\n",code,ab,ae,bb,be);
break;
case 'D': // Differences
fread(&diffs,sizeof(int),1,stdin);
printf("%c %d\n",code,diffs);
break;
case 'T': // Mask
if (tbuffer == NULL)
{ fprintf(stderr,"LAb2a: .las dump has traces but no @ T-info\n");
exit (1);
}
fread(&len,sizeof(int),1,stdin);
printf("%c %d\n",code,len);
len *= 2;
if (small)
{ fread(tbuffer,sizeof(uint8),len,stdin);
for (int i = 0; i < len; i += 2)
printf(" %d %d\n",tbuffer[i],tbuffer[i+1]);
}
else
{ fread(sbuffer,sizeof(uint16),len,stdin);
for (int i = 0; i < len; i += 2)
printf(" %d %d\n",sbuffer[i],sbuffer[i+1]);
}
}
if (fread(&code,sizeof(char),1,stdin) == 0)
code = 0;
}
exit (0);
}
......@@ -74,7 +74,7 @@ int main(int argc, char *argv[])
{ Block_Looper *parse;
FILE *input;
parse = Parse_Block_Arg(argv[c]);
parse = Parse_Block_LAS_Arg(argv[c]);
while ((input = Next_Block_Arg(parse)) != NULL)
{ int64 povl;
......@@ -121,7 +121,7 @@ int main(int argc, char *argv[])
otop = oblock + bsize;
for (c = 1; c < argc; c++)
{ parse = Parse_Block_Arg(argv[c]);
{ parse = Parse_Block_LAS_Arg(argv[c]);
while ((input = Next_Block_Arg(parse)) != NULL)
{ if (fread(&povl,sizeof(int64),1,input) != 1)
......
......@@ -69,7 +69,6 @@ int main(int argc, char *argv[])
{ Block_Looper *parse;
int status;
FILE *input;
ISTWO = 0;
status = Open_DB(argv[1],db1);
......@@ -83,8 +82,8 @@ int main(int argc, char *argv[])
if (argc <= 3)
db2 = db1;
else
{ parse = Parse_Block_Arg(argv[2]);
if ((input = Next_Block_Arg(parse)) == NULL)
{ parse = Parse_Block_LAS_Arg(argv[2]);
if (! Next_Block_Exists(parse))
{ ISTWO = 1;
status = Open_DB(argv[2],db2);
if (status < 0)
......@@ -96,9 +95,7 @@ int main(int argc, char *argv[])
Trim_DB(db2);
}
else
{ db2 = db1;
fclose(input);
}
db2 = db1;
Free_Block_Arg(parse);
}
Trim_DB(db1);
......@@ -137,7 +134,7 @@ int main(int argc, char *argv[])
// Establish IO and (novl,tspace) header
parse = Parse_Block_Arg(argv[i]);
parse = Parse_Block_LAS_Arg(argv[i]);
while ((input = Next_Block_Arg(parse)) != NULL)
{ disp = Block_Arg_Root(parse);
......
......@@ -38,7 +38,7 @@ int main(int argc, char *argv[])
FILE *input;
int64 novl;
int tspace, tbytes, small;
int tmax;
int trmax;
int reps, *pts;
int input_pts;
......@@ -299,7 +299,7 @@ int main(int argc, char *argv[])
{ int j, al, tlen;
int in, npt, idx, ar;
int64 novls, odeg, omax, sdeg, smax, ttot;
int64 novls, odeg, omax, sdeg, smax, tmax, ttot;
in = 0;
npt = pts[0];
......@@ -307,6 +307,7 @@ int main(int argc, char *argv[])
// For each record do
trmax = 0;
novls = omax = smax = ttot = tmax = 0;
sdeg = odeg = 0;
......@@ -318,6 +319,8 @@ int main(int argc, char *argv[])
{ Read_Overlap(input,ovl);
tlen = ovl->path.tlen;
fseeko(input,tlen*tbytes,SEEK_CUR);
if (tlen > trmax)
trmax = tlen;
// Determine if it should be displayed
......@@ -382,7 +385,8 @@ int main(int argc, char *argv[])
if (DOTRACE)
{ printf("+ T %lld\n",ttot);
printf("%% T %lld\n",smax);
printf("@ T %d\n",tmax);
printf("@ T %lld\n",tmax);
printf("X %d\n",tspace);
}
}
......@@ -397,7 +401,7 @@ int main(int argc, char *argv[])
fread(&novl,sizeof(int64),1,input);
fread(&tspace,sizeof(int),1,input);
trace = (uint16 *) Malloc(sizeof(uint16)*tmax,"Allocating trace vector");
trace = (uint16 *) Malloc(sizeof(uint16)*trmax,"Allocating trace vector");
if (trace == NULL)
exit (1);
......@@ -488,7 +492,7 @@ int main(int argc, char *argv[])
Decompress_TraceTo16(ovl);
printf("T %d\n",tlen>>1);
for (k = 0; k < tlen; k += 2)
printf(" %3d %3d\n",trace[k],trace[k+1]);
printf(" %d %d\n",trace[k],trace[k+1]);
}
}
......
/*******************************************************************************************
*
* Create an index with extension .las.idx for a .las file.
* Utility expects the .las file to be sorted.
* Header contains total # of trace points, max # of trace points for
* a given overlap, max # of trace points in all the overlaps for a given aread, and
* max # of overlaps for a given aread. The remainder are the offsets into each pile.
*
* Author: Gene Myers
* Date : Sept 2015
*
*******************************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "DB.h"
#include "align.h"
static char *Usage = "[-v] <source:las> ...";
#define MEMORY 1000 // How many megabytes for output buffer
int main(int argc, char *argv[])
{ Block_Looper *parse;
char *iblock;
FILE *input, *output;
int64 novl, bsize, ovlsize, ptrsize;
int tspace, tbytes;
int64 tmax, ttot;
int64 omax, smax;
int64 odeg, sdeg;
int i;
int VERBOSE;
// Process options
{ int j, k;
int flags[128];
ARG_INIT("LAindex")
j = 1;
for (i = 1; i < argc; i++)
if (argv[i][0] == '-')
{ ARG_FLAGS("v") }
else
argv[j++] = argv[i];
argc = j;
VERBOSE = flags['v'];
if (argc <= 1)
{ fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
exit (1);
}
}
// For each file do
ptrsize = sizeof(void *);
ovlsize = sizeof(Overlap) - ptrsize;
bsize = MEMORY * 1000000ll;
iblock = (char *) Malloc(bsize + ptrsize,"Allocating input block");
if (iblock == NULL)
exit (1);
iblock += ptrsize;
for (i = 1; i < argc; i++)
{ parse = Parse_Block_Arg(argv[i]);
while ((input = Next_Block_Arg(parse)) != NULL)
{ if (fread(&novl,sizeof(int64),1,input) != 1)
SYSTEM_READ_ERROR
if (fread(&tspace,sizeof(int),1,input) != 1)
SYSTEM_READ_ERROR
if (tspace <= TRACE_XOVR && tspace != 0)
tbytes = sizeof(uint8);
else
tbytes = sizeof(uint16);
output = Fopen(Catenate(Block_Arg_Path(parse),"/.",Block_Arg_Root(parse),".las.idx"),"w");
if (output == NULL)
exit (1);
if (VERBOSE)
{ printf(" Indexing %s: ",Block_Arg_Root(parse));
Print_Number(novl,0,stdout);
fprintf(stdout," records ... ");
fflush(stdout);
}
fwrite(&novl,sizeof(int64),1,output);
fwrite(&novl,sizeof(int64),1,output);
fwrite(&novl,sizeof(int64),1,output);
fwrite(&novl,sizeof(int64),1,output);
{ int j, alst;
Overlap *w;
int64 tsize;
int64 optr;
char *iptr, *itop;
int64 tlen;
optr = sizeof(int64) + sizeof(int32);
iptr = iblock;
itop = iblock + fread(iblock,1,bsize,input);
alst = -1;
odeg = sdeg = 0;
omax = smax = 0;
tmax = ttot = 0;
for (j = 0; j < novl; j++)
{ if (iptr + ovlsize > itop)
{ int64 remains = itop-iptr;
if (remains > 0)
memmove(iblock,iptr,remains);
iptr = iblock;
itop = iblock + remains;
itop += fread(itop,1,bsize-remains,input);
}
w = (Overlap *) (iptr - ptrsize);
tlen = w->path.tlen;
if (alst < 0)
{ fwrite(&optr,sizeof(int64),1,output);
alst = w->aread;
}
else
while (alst < w->aread)
{ if (sdeg > smax)
smax = sdeg;
if (odeg > omax)
omax = odeg;
fwrite(&optr,sizeof(int64),1,output);
odeg = sdeg = 0;
alst += 1;
}
if (tlen > tmax)
tmax = tlen;
ttot += tlen;
odeg += 1;
sdeg += tlen;
iptr += ovlsize;
tsize = tlen*tbytes;
if (iptr + tsize > itop)
{ int64 remains = itop-iptr;
if (remains > 0)
memmove(iblock,iptr,remains);
iptr = iblock;
itop = iblock + remains;
itop += fread(itop,1,bsize-remains,input);
}
optr += ovlsize + tsize;
iptr += tsize;
}
fwrite(&optr,sizeof(int64),1,output);
}
if (sdeg > smax)
smax = sdeg;
if (odeg > omax)
omax = odeg;
rewind(output);
fwrite(&omax,sizeof(int64),1,output);
fwrite(&ttot,sizeof(int64),1,output);
fwrite(&smax,sizeof(int64),1,output);
fwrite(&tmax,sizeof(int64),1,output);
if (VERBOSE)
{ Print_Number(ttot,0,stdout);
printf(" trace points\n");
fflush(stdout);
}
fclose(input);
fclose(output);
}
Free_Block_Arg(parse);
}
free(iblock-ptrsize);
exit (0);
}
......@@ -251,7 +251,7 @@ int main(int argc, char *argv[])
{ Block_Looper *parse;
FILE *input;
parse = Parse_Block_Arg(argv[c]);
parse = Parse_Block_LAS_Arg(argv[c]);
clen += strlen(Block_Arg_Path(parse)) + strlen(Block_Arg_Root(parse)) + 30;
......@@ -305,7 +305,7 @@ int main(int argc, char *argv[])
fsum = 0;
c = 2;
parse = Parse_Block_Arg(argv[c]);
parse = Parse_Block_LAS_Arg(argv[c]);
pid = getpid();
for (i = 1; i <= dim; i++)
......@@ -328,7 +328,7 @@ int main(int argc, char *argv[])
Free_Block_Arg(parse);
parse = Parse_Block_Arg(argv[c]);
parse = Parse_Block_LAS_Arg(argv[c]);
}
if (c < argc && fsum < cut)
{ int n = cut-fsum;
......@@ -370,7 +370,7 @@ int main(int argc, char *argv[])
{ Block_Looper *parse;
FILE *input;
parse = Parse_Block_Arg(argv[c]);
parse = Parse_Block_LAS_Arg(argv[c]);
while ((input = Next_Block_Arg(parse)) != NULL)
{ int64 novl;
......
......@@ -543,14 +543,14 @@ int main(int argc, char *argv[])
((ovl->path.aepos - ovl->path.abpos) + (ovl->path.bepos - ovl->path.bbpos)) );
printf(" (");
if (FLIP)
{ Print_Number(aln->alen,ai_wide,stdout);
{ Print_Number(aln->blen,bi_wide,stdout);
printf(" x ");
Print_Number(aln->blen,bi_wide,stdout);
Print_Number(aln->alen,ai_wide,stdout);
}
else
{ Print_Number(aln->blen,bi_wide,stdout);
{ Print_Number(aln->alen,ai_wide,stdout);
printf(" x ");
Print_Number(aln->alen,ai_wide,stdout);
Print_Number(aln->blen,bi_wide,stdout);
}
printf(" bps,");
if (CARTOON)
......
......@@ -148,7 +148,7 @@ int main(int argc, char *argv[])
int64 novl, sov;
Block_Looper *parse;
parse = Parse_Block_Arg(argv[i]);
parse = Parse_Block_LAS_Arg(argv[i]);
while ((input = Next_Block_Arg(parse)) != NULL)
{
......@@ -275,10 +275,10 @@ int main(int argc, char *argv[])
SYSTEM_READ_ERROR
}
}
}
free(perm);
fclose(foutput);
}
Free_Block_Arg(parse);
}
......
......@@ -2,12 +2,12 @@ DEST_DIR = ~/bin
CFLAGS = -O3 -Wall -Wextra -Wno-unused-result -fno-strict-aliasing
ALL = daligner HPC.daligner LAsort LAmerge LAsplit LAcat LAshow LAdump LAcheck LAindex
ALL = daligner HPC.daligner LAsort LAmerge LAsplit LAcat LAshow LAdump LAcheck LAa2b LAb2a dumpLA
all: $(ALL)
daligner: daligner.c filter.c filter.h align.c align.h DB.c DB.h QV.c QV.h
gcc $(CFLAGS) -o daligner daligner.c filter.c align.c DB.c QV.c -lpthread -lm
daligner: daligner.c filter.c filter.h radix.c radix.h align.c align.h DB.c DB.h QV.c QV.h
gcc $(CFLAGS) -o daligner -DWORD_SIZE=16 daligner.c filter.c radix.c align.c DB.c QV.c -lpthread -lm
HPC.daligner: HPC.daligner.c DB.c DB.h QV.c QV.h
gcc $(CFLAGS) -o HPC.daligner HPC.daligner.c DB.c QV.c -lm
......@@ -33,16 +33,18 @@ LAsplit: LAsplit.c align.h DB.c DB.h QV.c QV.h
LAcheck: LAcheck.c align.c align.h DB.c DB.h QV.c QV.h
gcc $(CFLAGS) -o LAcheck LAcheck.c align.c DB.c QV.c -lm
LAupgrade.Dec.31.2014: LAupgrade.Dec.31.2014.c align.c align.h DB.c DB.h QV.c QV.h
gcc $(CFLAGS) -o LAupgrade.Dec.31.2014 LAupgrade.Dec.31.2014.c align.c DB.c QV.c -lm
LAa2b: LAa2b.c align.c align.h DB.c DB.h QV.c QV.h
gcc $(CFLAGS) -o LAa2b LAa2b.c align.c DB.c QV.c -lm
LAindex: LAindex.c align.c align.h DB.c DB.h QV.c QV.h
gcc $(CFLAGS) -o LAindex LAindex.c align.c DB.c QV.c -lm
LAb2a: LAb2a.c align.c align.h DB.c DB.h QV.c QV.h
gcc $(CFLAGS) -o LAb2a LAb2a.c align.c DB.c QV.c -lm
dumpLA: dumpLA.c align.c align.h DB.c DB.h QV.c QV.h
gcc $(CFLAGS) -o dumpLA dumpLA.c align.c DB.c QV.c -lm
clean:
rm -f $(ALL)
rm -fr *.dSYM
rm -f LAupgrade.Dec.31.2014
rm -f daligner.tar.gz
install:
......
......@@ -2,9 +2,19 @@
## _Author: Gene Myers_
## _First: April 10, 2016_
## _Current: April 19, 2019_
For typeset documentation, examples of use, and design philosophy please go to
my [blog](https://dazzlerblog.wordpress.com/command-guides/daligner-command-reference-guide).
+
+
+### Version Numbers
+
+v1.0 has been released, but if you need to refer to a later revision
+from the stable master branch, please use ``v1.0.yyyymmdd`` where
+``yyyy-mm-dd`` is the date of the commit used. This is important for
+method details in scientific papers, and for software packaging
+(e.g. Conda, HomeBrew, or Linux distribution packages).
The commands below permit one to find all significant local alignments between reads
encoded in Dazzler database. The assumption is that the reads are from a PACBIO RS II
......@@ -19,20 +29,21 @@ alignment but simply a set of trace points, typically every 100bp or so, that al
efficient reconstruction of alignments on demand.
All programs add suffixes (e.g. .db, .las) as needed.
For the commands that take multiple .las files as arguments, i.e. LAsort, LAmerge, LAindex, LAcat,
For the commands that take multiple .las file blocks as arguments, i.e. LAsort, LAmerge, LAcat,
and LAcheck, one can place a @-sign in the name, which is then interpreted as the sequence of files
obtained by replacing the @-sign by 1, 2, 3, ... in sequence until a number is reached for
which no file matches. One can also place a @-sign followed by an integer, say, i, in which
case the sequence starts at i. Lastly, one can also place @i-j where i and j are integers, in
which case the sequence is from i to j, inclusive.
which case the sequence is from i to j, inclusive. The same is now also true of commands such
as daligner that take multiple .db blocks.
The formal UNIX command line
descriptions and options for the DALIGNER module commands are as follows:
```
1. daligner [-vabAI]
[-k<int(14)>] [-w<int(6)>] [-h<int(35)>] [-t<int>] [-M<int>] [-P<dir(/tmp)>]
[-e<double(.70)] [-l<int(1000)] [-s<int(100)>] [-H<int>] [-T<int(4)>]
1. daligner [-vaAI]
[-k<int(16)>] [-w<int(6)>] [-h<int(50)>] [-t<int>] [-M<int>] [-P<dir(/tmp)>]
[-e<double(.75)] [-l<int(1500)] [-s<int(100)>] [-H<int>] [-T<int(4)>]
[-m<track>]+ <subject:db|dam> <target:db|dam> ...
```
......@@ -52,10 +63,6 @@ between reads. Specifically, our search code looks for a pair of diagonal bands
width 2<sup>w</sup> (default 2<sup>6</sup> = 64) that contain a collection of exact matching k-mers
(default 14) between the two reads, such that the total number of bases covered by the
k-mer hits is h (default 35). k cannot be larger than 32 in the current implementation.
If the -b option is set, then the daligner assumes the data has a strong compositional
bias (e.g. >65% AT rich), and at the cost of a bit more time, dynamically adjusts k-mer
sizes depending on compositional bias, so that the mers used have an effective
specificity of 4<sup>k</sup>.
If there are one or more interval tracks specified with the -m option, then the reads
of the DB or DB's to which the mask applies are soft masked with the union of the
......@@ -245,8 +252,10 @@ scoring chain and + indicates an alternate near optimal chain (controlled by the
-n parameter to damapper). Each additional LA of a chain is marked with a - character.
```
5. LAdump [-cdtlo] <src1:db|dam> [ <src2:db|dam> ]
5a. LAdump [-cdtlo] <src1:db|dam> [ <src2:db|dam> ]
<align:las> [ <reads:FILE> | <reads:range> ... ]
5b. dumpLA <align.las>
```
Like LAshow, LAdump allows one to display the local alignments (LAs) of a subset of the
......@@ -287,23 +296,26 @@ They give size information about what is contained in the output. Specifically,
'+ X #' gives the total number of LAs (X=P), or the total number of trace point
intervals (X=T) in the file . '% X #' gives the maximum number of LAs (X=P) or
the maximum number of trace point intervals (X=T) in a given *pile* (collection of
LAs all with the same a-read (applies only to sorted .las files). Finally @ T #
LAs all with the same a-read (applies only to sorted .las files). A final line: '@ T #',
gives the maximum # of trace point intervals in any trace within the file.
After these lines and before the start of the lines describing alignment records is a
single line of the form 'X #' where the number is the trace point spacing for all
alignments.
The command dumpLA reads a 1-code file from the standard input and if possible produces a .las
file for it. The 1-code file is any legitimate coding of alignments as might be produced by LAdump.
The 1-code file must contain the P-, C-, and T-lines as well as the X-line and the header lines
beginning with +, %, or @. So for example, a 1-code file produced by LAdump with the -c and -t
options is invertible.
```
6. LAindex -v <source:las> ...
6a. LAa2b
6b. LAb2a
```
LAindex takes a series of one or more sorted .las files and produces a "pile
index" for each one. If the input file has name "X.las", then the name of its
index file is ".X.las.idx". For each A-read pile encoded in the .las file,
the index contains the offset to the first local alignment with A in the file.
The index starts with four 64-bit integers that encode the numbers % P, + T, % T,
and @ T described for LAdump above, and then an offset for each pile beginning
with the first A-read in the file (which may not be read 0). The index is meant
to allow programs that process piles to more efficiently read just the piles
they need at any momment int time, as opposed to having to sequentially scan
through the .las file.
Pipes (stdin to stdout) that convert an ASCII output produced by LAdump into a compressed
binary representation (LAa2b) and vice verse (LAb2a). The idea is to save disk space by
keeping the dumps in a more compressed format.
```
7. LAcat [-v] <source:las> ... > <target>.las
......@@ -351,9 +363,9 @@ information, and if it does, then it checks the validity of chains and checks th
sorting order of chains as a unit according to the -a option.
```
10. HPC.daligner [-vbad] [-t<int>] [-w<int(6)>] [-l<int(1000)] [-s<int(100)] [-M<int>]
10. HPC.daligner [-vad] [-t<int>] [-w<int(6)>] [-l<int(1500)] [-s<int(100)] [-M<int>]
[-P<dir(/tmp)>] [-B<int(4)>] [-T<int(4)>] [-f<name>]
( [-k<int(14)>] [-h<int(35)>] [-e<double(.70)] [-H<int>]
( [-k<int(16)>] [-h<int(50)>] [-e<double(.75)] [-H<int>]
[-k<int(20)>] [-h<int(50)>] [-e<double(.85)] <ref:db|dam> )
[-m<track>]+ <reads:db|dam> [<first:int>[-<last:int>]]
```
......
......@@ -58,6 +58,8 @@ typedef struct // Hidden from the user, working space for each threa
void *points;
int tramax;
void *trace;
int alnmax;
void *alnpts;
} _Work_Data;
Work_Data *New_Work_Data()
......@@ -72,6 +74,8 @@ Work_Data *New_Work_Data()
work->points = NULL;
work->tramax = 0;
work->trace = NULL;
work->alnmax = 0;
work->alnpts = NULL;
work->celmax = 0;
work->cells = NULL;
return ((Work_Data *) work);
......@@ -103,6 +107,19 @@ static int enlarge_points(_Work_Data *work, int newmax)
return (0);
}
static int enlarge_alnpts(_Work_Data *work, int newmax)
{ void *vec;
int max;
max = ((int) (newmax*1.2)) + 10000;
vec = Realloc(work->alnpts,max,"Enlarging point vector");
if (vec == NULL)
EXIT(1);
work->alnmax = max;
work->alnpts = vec;
return (0);
}
static int enlarge_trace(_Work_Data *work, int newmax)
{ void *vec;
int max;
......@@ -126,6 +143,8 @@ void Free_Work_Data(Work_Data *ework)
free(work->trace);
if (work->points != NULL)
free(work->points);
if (work->alnpts != NULL)
free(work->alnpts);
free(work);
}
......@@ -4327,6 +4346,7 @@ int Compute_Alignment(Alignment *align, Work_Data *ework, int task, int tspace)
aseq = align->aseq+path->abpos;
bseq = align->bseq+path->bbpos;
L = 0;
if (task != DIFF_ONLY)
{ if (task == DIFF_TRACE || task == PLUS_TRACE)
L = 2*(((path->aepos + (tspace-1))/tspace - path->abpos/tspace) + 1)*sizeof(uint16);
......@@ -4334,13 +4354,13 @@ int Compute_Alignment(Alignment *align, Work_Data *ework, int task, int tspace)
L = bsub*sizeof(int);
else
L = asub*sizeof(int);
if (L > work->tramax)
if (enlarge_trace(work,L))
if (L > work->alnmax)
if (enlarge_alnpts(work,L))
EXIT(1);
}
trace = ((int *) work->trace);
strace = ((uint16 *) work->trace);
trace = ((int *) work->alnpts);
strace = ((uint16 *) work->alnpts);
if (asub > bsub)
D = (4*asub+6)*sizeof(int);
......
/*********************************************************************************************\
/********************************************************************************************
*
* Find all local alignment between long, noisy DNA reads:
* Compare sequences in 'subject' database against those in the list of 'target' databases
......@@ -47,16 +47,17 @@
#endif
#include "DB.h"
#include "radix.h"
#include "filter.h"
static char *Usage[] =
{ "[-vabAI] [-k<int(14)>] [-w<int(6)>] [-h<int(35)>] [-t<int>] [-M<int>] [-P<dir(/tmp)>]",
" [-e<double(.70)] [-l<int(1000)>] [-s<int(100)>] [-H<int>] [-T<int(4)>]",
" [-m<track>]+ <subject:db|dam> <target:db|dam> ...",
{ "[-vaAI] [-k<int(16)>] [-w<int(6)>] [-h<int(50)>] [-t<int>] [-M<int>]",
" [-e<double(.75)] [-l<int(1500)>] [-s<int(100)>] [-H<int>]",
" [-T<int(4)>] [-P<dir(/tmp)>] [-m<track>]+",
" <subject:db|dam> <target:db|dam> ...",
};
int VERBOSE; // Globally visible to filter.c
int BIASED;
int MINOVER;
int HGAP_MIN;
int SYMMETRIC;
......@@ -268,6 +269,7 @@ static DAZZ_TRACK *merge_tracks(DAZZ_DB *block, int mtop, int64 nsize)
ntrack->data = data = (int *) Malloc(sizeof(int)*nsize,"Allocating merged track");
ntrack->size = sizeof(int);
ntrack->next = NULL;
ntrack->loaded = 1;
if (anno == NULL || data == NULL || ntrack->name == NULL)
Clean_Exit(1);
......@@ -361,7 +363,7 @@ static int read_DB(DAZZ_DB *block, char *name, char **mask, int *mstat, int mtop
mstat[i] = status;
}
if (status == 0 && kind == MASK_TRACK)
Load_Track(block,mask[i]);
Open_Track(block,mask[i]);
}
Trim_DB(block);
......@@ -376,7 +378,8 @@ static int read_DB(DAZZ_DB *block, char *name, char **mask, int *mstat, int mtop
if (status < 0 || kind != MASK_TRACK)
continue;
stop += 1;
track = Load_Track(block,mask[i]);
track = Open_Track(block,mask[i]);
Load_All_Track_Data(track);
anno = (int64 *) (track->anno);
for (j = 0; j <= block->nreads; j++)
......@@ -391,7 +394,7 @@ static int read_DB(DAZZ_DB *block, char *name, char **mask, int *mstat, int mtop
track = merge_tracks(block,stop,nsize);
while (block->tracks != NULL)
Close_Track(block,block->tracks->name);
Close_Track(block,block->tracks);
block->tracks = track;
}
......@@ -405,121 +408,11 @@ static int read_DB(DAZZ_DB *block, char *name, char **mask, int *mstat, int mtop
}
}
Read_All_Sequences(block,0);
Load_All_Reads(block,0);
return (isdam);
}
static void complement(char *s, int len)
{ char *t;
int c;
t = s + (len-1);
while (s < t)
{ c = *s;
*s = (char) (3-*t);
*t = (char) (3-c);
s += 1;
t -= 1;
}
if (s == t)
*s = (char) (3-*s);
}
static DAZZ_DB *complement_DB(DAZZ_DB *block, int inplace)
{ static DAZZ_DB _cblock, *cblock = &_cblock;
int nreads;
DAZZ_READ *reads;
char *seq;
nreads = block->nreads;
reads = block->reads;
if (inplace)
{ seq = (char *) block->bases;
cblock = block;
}
else
{ seq = (char *) Malloc(block->reads[nreads].boff+1,"Allocating dazzler sequence block");
if (seq == NULL)
Clean_Exit(1);
*seq++ = 4;
memmove(seq,block->bases,block->reads[nreads].boff);
*cblock = *block;
cblock->bases = (void *) seq;
cblock->tracks = NULL;
}
{ int i;
float x;
x = cblock->freq[0];
cblock->freq[0] = cblock->freq[3];
cblock->freq[3] = x;
x = cblock->freq[1];
cblock->freq[1] = cblock->freq[2];
cblock->freq[2] = x;
for (i = 0; i < nreads; i++)
complement(seq+reads[i].boff,reads[i].rlen);
}
{ DAZZ_TRACK *src, *trg;
int *data, *tata;
int i, x, rlen;
int64 *tano, *anno;
int64 j, k;
for (src = block->tracks; src != NULL; src = src->next)
{ tano = (int64 *) src->anno;
tata = (int *) src->data;
if (inplace)
{ data = tata;
anno = tano;
trg = src;
}
else
{ data = (int *) Malloc(sizeof(int)*tano[nreads],
"Allocating dazzler interval track data");
anno = (int64 *) Malloc(sizeof(int64)*(nreads+1),
"Allocating dazzler interval track index");
trg = (DAZZ_TRACK *) Malloc(sizeof(DAZZ_TRACK),
"Allocating dazzler interval track header");
if (data == NULL || trg == NULL || anno == NULL)
Clean_Exit(1);
trg->name = Strdup(src->name,"Copying track name");
if (trg->name == NULL)
Clean_Exit(1);
trg->size = 4;
trg->anno = (void *) anno;
trg->data = (void *) data;
trg->next = cblock->tracks;
cblock->tracks = trg;
}
for (i = 0; i < nreads; i++)
{ rlen = reads[i].rlen;
anno[i] = tano[i];
j = tano[i+1]-1;
k = tano[i];
while (k < j)
{ x = tata[j];
data[j--] = rlen - tata[k];
data[k++] = rlen - x;
}
if (k == j)
data[k] = rlen - tata[k];
}
anno[nreads] = tano[nreads];
}
}
return (cblock);
}
static char *CommandBuffer(char *aname, char *bname, char *spath)
{ static char *cat = NULL;
static int max = -1;
......@@ -553,6 +446,7 @@ int main(int argc, char *argv[])
{ DAZZ_DB _ablock, _bblock;
DAZZ_DB *ablock = &_ablock, *bblock = &_bblock;
char *afile, *bfile;
char *apath, *bpath;
char *aroot, *broot;
void *aindex, *bindex;
int alen, blen;
......@@ -575,16 +469,16 @@ int main(int argc, char *argv[])
char *eptr;
DIR *dirp;
ARG_INIT("daligner")
ARG_INIT("daligner2.0")
KMER_LEN = 14;
HIT_MIN = 35;
KMER_LEN = 16;
HIT_MIN = 50;
BIN_SHIFT = 6;
MAX_REPS = 0;
HGAP_MIN = 0;
AVE_ERROR = .70;
AVE_ERROR = .75;
SPACING = 100;
MINOVER = 1000; // Globally visible to filter.c
MINOVER = 1500; // Globally visible to filter.c
NTHREADS = 4;
SORT_PATH = "/tmp";
......@@ -607,7 +501,7 @@ int main(int argc, char *argv[])
if (argv[i][0] == '-')
switch (argv[i][1])
{ default:
ARG_FLAGS("vabAI")
ARG_FLAGS("vaAI")
break;
case 'k':
ARG_POSITIVE(KMER_LEN,"K-mer length")
......@@ -676,7 +570,6 @@ int main(int argc, char *argv[])
argc = j;
VERBOSE = flags['v']; // Globally declared in filter.h
BIASED = flags['b']; // Globally declared in filter.h
SYMMETRIC = 1-flags['A'];
IDENTITY = flags['I'];
MAP_ORDER = flags['a'];
......@@ -685,6 +578,7 @@ int main(int argc, char *argv[])
{ fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage[0]);
fprintf(stderr," %*s %s\n",(int) strlen(Prog_Name),"",Usage[1]);
fprintf(stderr," %*s %s\n",(int) strlen(Prog_Name),"",Usage[2]);
fprintf(stderr," %*s %s\n",(int) strlen(Prog_Name),"",Usage[3]);
fprintf(stderr,"\n");
fprintf(stderr," -k: k-mer size (must be <= 32).\n");
fprintf(stderr," -w: Look for k-mers in averlapping bands of size 2^-w.\n");
......@@ -701,7 +595,6 @@ int main(int argc, char *argv[])
fprintf(stderr," -T: Use -T threads.\n");
fprintf(stderr," -P: Do block level sorts and merges in directory -P.\n");
fprintf(stderr," -m: Soft mask the blocks with the specified mask.\n");
fprintf(stderr," -b: For AT/GC biased data, compensate k-mer counts (deprecated).\n");
fprintf(stderr,"\n");
fprintf(stderr," -v: Verbose mode, output statistics as proceed.\n");
fprintf(stderr," -a: sort .las by A-read,A-position pairs for map usecase\n");
......@@ -716,10 +609,8 @@ int main(int argc, char *argv[])
}
MINOVER *= 2;
if (Set_Filter_Params(KMER_LEN,BIN_SHIFT,MAX_REPS,HIT_MIN,NTHREADS))
{ fprintf(stderr,"Illegal combination of filter parameters\n");
exit (1);
}
Set_Filter_Params(KMER_LEN,BIN_SHIFT,MAX_REPS,HIT_MIN,NTHREADS);
Set_Radix_Params(NTHREADS,VERBOSE);
// Create directory in SORT_PATH for file operations
......@@ -744,34 +635,44 @@ int main(int argc, char *argv[])
aroot = Root(afile,".dam");
else
aroot = Root(afile,".db");
apath = PathTo(afile);
asettings = New_Align_Spec( AVE_ERROR, SPACING, ablock->freq, 1);
// Compare against reads in B in both orientations
{ int i, j;
Block_Looper *parse;
char *command;
aindex = NULL;
broot = NULL;
for (i = 2; i < argc; i++)
{ bfile = argv[i];
if (strcmp(afile,bfile) != 0)
{ isdam = read_DB(bblock,bfile,MASK,MSTAT,MTOP,KMER_LEN);
if (isdam)
broot = Root(bfile,".dam");
else
broot = Root(bfile,".db");
{ parse = Parse_Block_DB_Arg(argv[i]);
while (Advance_Block_Arg(parse))
{ broot = Block_Arg_Root(parse);
bpath = Block_Arg_Path(parse);
if (strcmp(bpath,apath) != 0 || strcmp(broot,aroot) != 0)
{ bfile = Strdup(Catenate(bpath,"/",broot,""),"Allocating path");
read_DB(bblock,bfile,MASK,MSTAT,MTOP,KMER_LEN);
free(bfile);
}
else
{ free(broot);
broot = aroot;
}
free(bpath);
if (i == 2)
{ for (j = 0; j < MTOP; j++)
{ if (MSTAT[j] == -2)
printf("%s: Warning: -m%s option given but no track found.\n",Prog_Name,MASK[j]);
printf("%s: Warning: -m%s option given but no track found.\n",
Prog_Name,MASK[j]);
else if (MSTAT[j] == -1)
printf("%s: Warning: %s track not sync'd with relevant db.\n",Prog_Name,MASK[j]);
printf("%s: Warning: %s track not sync'd with relevant db.\n",
Prog_Name,MASK[j]);
else if (MSTAT[j] == -3)
printf("%s: Warning: %s track is not a mask track.\n",Prog_Name,MASK[j]);
}
......@@ -785,66 +686,50 @@ int main(int argc, char *argv[])
{ if (VERBOSE)
printf("\nBuilding index for %s\n",broot);
bindex = Sort_Kmers(bblock,&blen);
Match_Filter(aroot,ablock,broot,bblock,aindex,alen,bindex,blen,0,asettings);
bblock = complement_DB(bblock,1);
if (VERBOSE)
printf("\nBuilding index for c(%s)\n",broot);
bindex = Sort_Kmers(bblock,&blen);
Match_Filter(aroot,ablock,broot,bblock,aindex,alen,bindex,blen,1,asettings);
Match_Filter(aroot,ablock,broot,bblock,aindex,alen,bindex,blen,asettings);
Close_DB(bblock);
}
else
{ Match_Filter(aroot,ablock,aroot,ablock,aindex,alen,aindex,alen,0,asettings);
bblock = complement_DB(ablock,0);
if (VERBOSE)
printf("\nBuilding index for c(%s)\n",aroot);
bindex = Sort_Kmers(bblock,&blen);
Match_Filter(aroot,ablock,aroot,bblock,aindex,alen,bindex,blen,1,asettings);
bblock->reads = NULL; // ablock & bblock share "reads" vector, don't let Close_DB
// free it !
}
Close_DB(bblock);
Match_Filter(aroot,ablock,aroot,ablock,aindex,alen,aindex,alen,asettings);
command = CommandBuffer(aroot,broot,SORT_PATH);
#define SYSTEM_CHECK(command) \
if (VERBOSE) \
printf("%s\n",command); \
printf("\n%s\n",command); \
if (system(command) != 0) \
{ fprintf(stderr,"\n%s: Command Failed:\n%*s %s\n", \
Prog_Name,(int) strlen(Prog_Name),"",command); \
Clean_Exit(1); \
}
sprintf(command,"LAsort %s %s %s/%s.%s.N%c %s/%s.%s.C%c",VERBOSE?"-v":"",
MAP_ORDER?"-a":"",SORT_PATH,aroot,broot,BLOCK_SYMBOL,
SORT_PATH,aroot,broot,BLOCK_SYMBOL);
sprintf(command,"LAsort %s %s %s/%s.%s.N%c",VERBOSE?"-v":"",
MAP_ORDER?"-a":"",SORT_PATH,aroot,broot,BLOCK_SYMBOL);
SYSTEM_CHECK(command)
sprintf(command,"LAmerge %s %s %s.%s.las %s/%s.%s.N%c.S %s/%s.%s.C%c.S",VERBOSE?"-v":"",
MAP_ORDER?"-a":"",aroot,broot,SORT_PATH,aroot,broot,BLOCK_SYMBOL,
SORT_PATH,aroot,broot,BLOCK_SYMBOL);
sprintf(command,"LAmerge %s %s %s.%s.las %s/%s.%s.N%c.S",VERBOSE?"-v":"",
MAP_ORDER?"-a":"",aroot,broot,SORT_PATH,aroot,broot,BLOCK_SYMBOL);
SYSTEM_CHECK(command)
if (aroot != broot && SYMMETRIC)
{ sprintf(command,"LAsort %s %s %s/%s.%s.N%c %s/%s.%s.C%c",VERBOSE?"-v":"",
MAP_ORDER?"-a":"",SORT_PATH,broot,aroot,BLOCK_SYMBOL,
SORT_PATH,broot,aroot,BLOCK_SYMBOL);
{ sprintf(command,"LAsort %s %s %s/%s.%s.N%c",VERBOSE?"-v":"",
MAP_ORDER?"-a":"",SORT_PATH,broot,aroot,BLOCK_SYMBOL);
SYSTEM_CHECK(command)
sprintf(command,"LAmerge %s %s %s.%s.las %s/%s.%s.N%c.S %s/%s.%s.C%c.S",VERBOSE?"-v":"",
MAP_ORDER?"-a":"",broot,aroot,SORT_PATH,broot,aroot,BLOCK_SYMBOL,
SORT_PATH,broot,aroot,BLOCK_SYMBOL);
sprintf(command,"LAmerge %s %s %s.%s.las %s/%s.%s.N%c.S",VERBOSE?"-v":"",
MAP_ORDER?"-a":"",broot,aroot,SORT_PATH,broot,aroot,BLOCK_SYMBOL);
SYSTEM_CHECK(command)
}
if (aroot != broot)
free(broot);
}
Free_Block_Arg(parse);
}
}
free(aindex);
free(apath);
free(aroot);
Clean_Exit(0);
}
daligner (1.0+git20190721.efb48c3-1) UNRELEASED; urgency=medium
* Afif removed himself from Uploaders
* Add myself to Uploaders
* New upstream version
* debhelper-compat 12
* Standards-Version: 4.4.0
TODO: radix.c:39:18: error: 'WORD_SIZE' undeclared here (not in a function)
-- Andreas Tille <tille@debian.org> Fri, 02 Aug 2019 23:50:11 +0200
daligner (1.0+git20180524.fd21879-1) unstable; urgency=medium
* Team upload.
......
Source: daligner
Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.org>
Uploaders: Andreas Tille <tille@debian.org>
Section: science
Priority: optional
Build-Depends: debhelper (>= 11~)
Standards-Version: 4.2.1
Build-Depends: debhelper-compat (= 12)
Standards-Version: 4.4.0
Vcs-Browser: https://salsa.debian.org/med-team/daligner
Vcs-Git: https://salsa.debian.org/med-team/daligner.git
Homepage: https://dazzlerblog.wordpress.com
......
......@@ -2,13 +2,13 @@ Description: Append to CFLAGS
Author: Afif Elghraoui <afif@ghraoui.name>
Forwarded: not-needed
Last-Update: 2016-01-08
--- daligner.orig/Makefile
+++ daligner/Makefile
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
DEST_DIR = ~/bin
-CFLAGS = -O3 -Wall -Wextra -Wno-unused-result -fno-strict-aliasing
+CFLAGS += -O3 -Wall -Wextra -Wno-unused-result -fno-strict-aliasing
ALL = daligner HPC.daligner LAsort LAmerge LAsplit LAcat LAshow LAdump LAcheck LAindex
ALL = daligner HPC.daligner LAsort LAmerge LAsplit LAcat LAshow LAdump LAcheck LAa2b LAb2a dumpLA