Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille
--- a/DAScover.c
+++ b/DAScover.c
+/*******************************************************************************************
+ *
+ *  Using overlap pile for each read compute estimated coverage of the underlying genome
+ *    generating a .covr track containing a histogram of the coverage of every unmasked
+ *    trace-point tile.
+ *
+ *  Author:  Gene Myers
+ *  Date  :  January 2018
+ *
+ *******************************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+
+#include "DB.h"
+#include "align.h"
+
+#ifdef HIDE_FILES
+#define PATHSEP "/."
+#else
+#define PATHSEP "/"
+#endif
+
+#undef COVER_DEBUG
+
+static char *Usage = "[-v] [-H<int>] [-m<track>]+ <source:db> <overlaps:las> ...";
+
+#define  MAX_COVER  1000
+
+static int    VERBOSE;
+static int    HGAP_MIN;    //  Under this length do not process for HGAP
+
+typedef struct
+  { char  *name;
+    int64 *idx;
+    int   *data;
+  } Mask;
+
+static int   NUM_MASK;
+static Mask *MASKS;
+
+static int64  Cov_Hist[MAX_COVER+1];   //  [0..MAX_COVER] counts of trace-interval coverages
+static char  *Cov_Name  = "Coverage Histogram";
+static char  *Hgap_Name = "Hgap threshold";
+
+static int     TRACE_SPACING;  //  Trace spacing (from .las file)
+static int     TBYTES;         //  Bytes per trace segment (from .las file)
+
+static DAZZ_DB   _DB, *DB  = &_DB;    //  Data base
+static int        DB_FIRST;           //  First read of DB to process
+static int        DB_LAST;            //  Last read of DB to process (+1)
+static int        DB_PART;            //  0 if all, otherwise block #
+static DAZZ_READ *Reads;	      //  Data base reads array
+
+static FILE   *CV_AFILE;   //  .covr.anno
+static char   *CV_ANAME;   //  ".covr.anno"
+
+
+//  Statistics
+
+static int64 nreads, totlen;
+
+//  For each pile, calculate QV scores of the aread at tick spacing TRACE_SPACING
+
+static void HISTOGRAM_COVER(int aread, Overlap *ovls, int novl)
+{ static int    nmax = 0;
+  static int   *local;
+  static int   *cover;
+  static int   *maskd;
+
+  int  alen, atick;
+  int  bread, cssr;
+  int  t2, t1;
+  int  i, j, a, e;
+
+  alen  = DB->reads[aread].rlen;
+  atick = (alen + (TRACE_SPACING-1))/TRACE_SPACING;
+
+  if (alen < HGAP_MIN)
+    return;
+
+#if defined(COVER_DEBUG)
+  printf("AREAD %d",aread);
+  if (novl == 0)
+    printf(" EMPTY");
+  printf("\n");
+#endif
+
+  //  COVERAGE
+  //       Allocate or expand data structures for cover calculation as needed
+
+  if (nmax == 0)
+    { nmax = (DB->maxlen + (TRACE_SPACING-1))/TRACE_SPACING;
+
+      local = (int *) Malloc(nmax*sizeof(int),"Allocating bread cover");
+      cover = (int *) Malloc(nmax*sizeof(int),"Allocating aread cover");
+      maskd = (int *) Malloc(nmax*sizeof(int),"Allocating aread cover");
+
+      if (local == NULL || cover == NULL || maskd == NULL)
+        exit (1);
+
+      for (i = nmax-1; i >= 0; i--)
+        local[i] = cover[i] = maskd[i] = 0;
+    }
+
+  //  For every segment, fill histogram of match diffs for every one of the
+  //   atick intervals, building separate histograms, hist & cist, for forward
+  //   and reverse B-hits
+
+  t2 = TRACE_SPACING/2;
+  t1 = t2-1;
+
+  for (i = 0; i < novl; i = j)
+    { bread = ovls[i].bread;
+
+      for (j = bread+1; j < DB_LAST; j++)
+        if ((Reads[j].flags & DB_CSS) == 0)
+          break;
+      cssr = j;
+
+      for (j = i; j < novl; j++)
+        if (ovls[j].bread < cssr)
+          { e = (ovls[j].path.aepos + t2) / TRACE_SPACING;
+            for (a = (ovls[j].path.abpos + t1) / TRACE_SPACING; a < e; a++)
+              local[a] = 1;
+          }
+        else
+          break;
+
+      for (j = i; j < novl; j++)
+        if (ovls[j].bread < cssr)
+          { e = (ovls[j].path.aepos + t2) / TRACE_SPACING;
+            for (a = (ovls[j].path.abpos + t1) / TRACE_SPACING; a < e; a++)
+              if (local[a] > 0)
+                { local[a] = 0;
+                  cover[a] += 1;
+                }
+          }
+        else
+          break;
+    }
+
+  for (i = 0; i < NUM_MASK; i++)
+    { Mask *m = (Mask *) (MASKS+i);
+      int   k, u, e;
+
+#ifdef COVER_DEBUG
+      printf("  %s:",m->name);
+#endif
+      for (k = m->idx[aread]; k < m->idx[aread+1]; k += 2)
+        { e = m->data[k+1];
+#ifdef COVER_DEBUG
+          printf(" [%d..%d]",m->data[k],e);
+#endif
+          e = (e + (TRACE_SPACING-1))/TRACE_SPACING;
+          for (u = m->data[k]/TRACE_SPACING; u < e; u++) 
+            maskd[u] = 1;
+        }
+#ifdef COVER_DEBUG
+      printf("\n");
+#endif
+    }
+
+#ifdef COVER_DEBUG
+  printf("Mask: ");
+  for (a = 0; a < atick; a++)
+    printf("%d",maskd[a]);
+  printf("\n");
+#endif
+
+  for (a = 0; a < atick; a++)
+    { if (maskd[a] == 0)
+        { e = cover[a];
+          if (e >= MAX_COVER)
+            Cov_Hist[MAX_COVER] += 1;
+          else
+            Cov_Hist[e] += 1;
+        }
+      cover[a] = 0;
+    }
+
+  for (i = 0; i < NUM_MASK; i++)
+    { Mask *m = (Mask *) (MASKS+i);
+      int   k, u, e;
+
+      for (k = m->idx[aread]; k < m->idx[aread+1]; k += 2)
+        { e = m->data[k+1];
+          e = (e + (TRACE_SPACING-1))/TRACE_SPACING;
+          for (u = m->data[k]/TRACE_SPACING; u < e; u++) 
+            maskd[u] = 0;
+        }
+    }
+
+  if (VERBOSE)
+    { nreads += 1;
+      totlen += alen;
+    }
+}
+
+  //  Read in each successive pile and call ACTION on it.  Read in the traces only if
+  //   "trace" is nonzero
+
+static int make_a_pass(FILE *input, void (*ACTION)(int, Overlap *, int), int trace)
+{ static Overlap *ovls = NULL;
+  static int      omax = 500;
+  static uint16  *paths = NULL;
+  static int      pmax = 100000;
+
+  int64 i, j, novl;
+  int   n, a;
+  int   pcur;
+  int   max;
+
+  if (ovls == NULL)
+    { ovls = (Overlap *) Malloc(sizeof(Overlap)*omax,"Allocating overlap buffer");
+      if (ovls == NULL)
+        exit (1);
+    }
+  if (trace && paths == NULL)
+    { paths = (uint16 *) Malloc(sizeof(uint16)*pmax,"Allocating path buffer");
+      if (paths == NULL)
+        exit (1);
+    }
+
+  rewind(input);
+  fread(&novl,sizeof(int64),1,input);
+  fread(&TRACE_SPACING,sizeof(int),1,input);
+  if (TRACE_SPACING <= TRACE_XOVR)
+    TBYTES = sizeof(uint8);
+  else
+    TBYTES = sizeof(uint16);
+
+  if (Read_Overlap(input,ovls) != 0)
+    ovls[0].aread = INT32_MAX;
+  else if (trace)
+    { if (ovls[0].path.tlen > pmax)
+        { pmax  = 1.2*(ovls[0].path.tlen)+10000;
+          paths = (uint16 *) Realloc(paths,sizeof(uint16)*pmax,"Expanding path buffer");
+          if (paths == NULL) exit (1);
+        }
+      fread(paths,TBYTES,ovls[0].path.tlen,input);
+      if (TBYTES == 1)
+        { ovls[0].path.trace = paths;
+          Decompress_TraceTo16(ovls);
+        }
+    }
+  else
+    fseek(input,TBYTES*ovls[0].path.tlen,SEEK_CUR);
+
+  if (ovls[0].aread < DB_FIRST)
+    { fprintf(stderr,"%s: .las file overlaps don't correspond to reads in block %d of DB\n",
+                     Prog_Name,DB_PART);
+      exit (1);
+    }
+
+  pcur = 0;
+  n = max = 0;
+  for (j = DB_FIRST; j < DB_LAST; j++)
+    { ovls[0] = ovls[n];
+      a = ovls[0].aread;
+      if (a != j)
+        n = 0;
+      else
+        { if (trace)
+            memmove(paths,paths+pcur,sizeof(uint16)*ovls[0].path.tlen);
+          n = 1;
+          pcur = ovls[0].path.tlen;
+          while (1)
+            { if (Read_Overlap(input,ovls+n) != 0)
+                { ovls[n].aread = INT32_MAX;
+                  break;
+                }
+              if (trace)
+                { if (pcur + ovls[n].path.tlen > pmax)
+                    { pmax = 1.2*(pcur+ovls[n].path.tlen)+10000;
+                      paths = (uint16 *) Realloc(paths,sizeof(uint16)*pmax,"Expanding path buffer");
+                      if (paths == NULL) exit (1);
+                    }
+                  fread(paths+pcur,TBYTES,ovls[n].path.tlen,input);
+                  if (TBYTES == 1)
+                    { ovls[n].path.trace = paths+pcur;
+                      Decompress_TraceTo16(ovls+n);
+                    }
+                }
+              else
+                fseek(input,TBYTES*ovls[n].path.tlen,SEEK_CUR);
+              if (ovls[n].aread != a)
+                break;
+              pcur += ovls[n].path.tlen;
+              n    += 1;
+              if (n >= omax)
+                { omax = 1.2*n + 100;
+                  ovls = (Overlap *) Realloc(ovls,sizeof(Overlap)*omax,"Expanding overlap buffer");
+                  if (ovls == NULL) exit (1);
+                }
+            }
+
+          if (n >= max)
+            max = n;
+          pcur = 0;
+          for (i = 0; i < n; i++)
+            { ovls[i].path.trace = paths+pcur;
+              pcur += ovls[i].path.tlen;
+            }
+        }
+      ACTION(j,ovls,n);
+    }
+
+  if (ovls[n].aread < INT32_MAX)
+    { fprintf(stderr,"%s: .las file overlaps don't correspond to reads in block %d of DB\n",
+                     Prog_Name,DB_PART);
+      exit (1);
+    }
+
+  return (max);
+}
+
+int main(int argc, char *argv[])
+{ char  *root, *dpwd;
+  int64  novl, hgap64;
+  int    c;
+
+  DAZZ_EXTRA ex_covr, ex_hgap;
+
+  //  Process arguments
+
+  { int  i, j, k;
+    int  flags[128];
+    char *eptr;
+    int   mmax;
+
+    ARG_INIT("DAScover")
+
+    HGAP_MIN = 0;
+    NUM_MASK = 0;
+
+    mmax  = 10;
+    MASKS = (Mask *) Malloc(mmax*sizeof(Mask),"Allocating mask array");
+    if (MASKS == NULL)
+      exit (1);
+
+    j = 1;
+    for (i = 1; i < argc; i++)
+      if (argv[i][0] == '-')
+        switch (argv[i][1])
+        { default:
+            ARG_FLAGS("v")
+            break;
+          case 'm':
+            if (NUM_MASK >= mmax)
+              { mmax  = 1.2*NUM_MASK + 10;
+                MASKS = (Mask *) Realloc(MASKS,mmax*sizeof(Mask),"Reallocating mask array");
+                if (MASKS == NULL)
+                  exit (1);
+              }
+            MASKS[NUM_MASK++].name = argv[i]+2;
+            break;
+          case 'H':
+            ARG_POSITIVE(HGAP_MIN,"HGAP threshold (in bp.s)")
+            break;
+        }
+      else
+        argv[j++] = argv[i];
+    argc = j;
+
+    VERBOSE = flags['v'];
+
+    if (argc < 3)
+      { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
+        fprintf(stderr,"\n");
+        fprintf(stderr,"      -v: Verbose mode, output statistics as proceed.\n");
+        fprintf(stderr,"      -H: HGAP minimum length threshold.\n");
+        fprintf(stderr,"      -m: repeat masks, stats not collected over these intervals\n");
+        exit (1);
+      }
+  }
+
+  //  Open trimmed DB and any mask tracks
+
+  { DAZZ_TRACK *track;
+    int64      *anno;
+    int         status, kind;
+    int         i, j, k;
+
+    status = Open_DB(argv[1],DB);
+    if (status < 0)
+      exit (1);
+    if (status == 1)
+      { fprintf(stderr,"%s: Cannot be called on a .dam index: %s\n",Prog_Name,argv[1]);
+        exit (1);
+      }
+    if (DB->part)
+      { fprintf(stderr,"%s: Cannot be called on a block: %s\n",Prog_Name,argv[1]);
+        exit (1);
+      }
+    Trim_DB(DB);
+    Reads = DB->reads;
+
+    k = 0;
+    for (i = 0; i < NUM_MASK; i++)
+      { status = Check_Track(DB,MASKS[i].name,&kind);
+        switch (status)
+        { case 0:
+            fprintf(stderr,"%s: [WARNING] %s track is for the *un*trimmed DB?\n",
+                           Prog_Name,MASKS[i].name);
+            continue;
+          case -1:
+            fprintf(stderr,"%s: [WARNING] %s track size not correct for trimmed DB.\n",
+                           Prog_Name,MASKS[i].name);
+            continue;
+          case -2:
+            fprintf(stderr,"%s: [WARNING] -m%s option given but no track found.\n",
+                           Prog_Name,MASKS[i].name);
+            continue;
+          default:
+            if (kind != MASK_TRACK)
+              { fprintf(stderr,"%s: [WARNING] %s track is not a mask track.\n",
+                               Prog_Name,MASKS[i].name);
+                continue;
+              }
+            break;
+        }
+
+        track = Load_Track(DB,MASKS[i].name);
+
+        anno = (int64 *) (track->anno);
+        for (j = 0; j <= DB->nreads; j++)
+          anno[j] /= sizeof(int);
+
+        MASKS[k].name = MASKS[i].name;
+        MASKS[k].idx  = anno;
+        MASKS[k].data = (int *) (track->data);
+        k += 1;
+      }
+    NUM_MASK = k;
+  }
+
+  //  Determine if overlap block is being processed and if so get first and last read
+  //    from .db file
+
+  ex_covr.vtype = DB_INT;
+  ex_covr.nelem = MAX_COVER+1;
+  ex_covr.accum = DB_SUM;
+  ex_covr.name  = Cov_Name;
+  ex_covr.value = Cov_Hist; 
+
+  ex_hgap.vtype = DB_INT;
+  ex_hgap.nelem = 1;
+  ex_hgap.accum = DB_EXACT;
+  ex_hgap.name  = Hgap_Name;
+  hgap64 = HGAP_MIN;
+  ex_hgap.value = &hgap64;
+
+  dpwd = PathTo(argv[1]);
+  root = Root(argv[1],".db");
+
+  for (c = 2; c < argc; c++)
+    { Block_Looper *parse;
+      FILE         *input;
+
+      parse = Parse_Block_Arg(argv[c]);
+
+      while ((input = Next_Block_Arg(parse)) != NULL)
+        { DB_PART  = 0;
+          DB_FIRST = 0;
+          DB_LAST  = DB->nreads;
+
+          { FILE *dbfile;
+            char  buffer[2*MAX_NAME+100];
+            char *p, *eptr;
+            int   i, part, nfiles, nblocks, cutoff, all, oindx;
+            int64 size;
+
+            p = rindex(Block_Arg_Root(parse),'.');
+            if (p != NULL)
+              { part = strtol(p+1,&eptr,10);
+                if (*eptr == '\0' && eptr != p+1)
+                  { dbfile = Fopen(Catenate(dpwd,"/",root,".db"),"r");
+                    if (dbfile == NULL)
+                      exit (1);
+                    if (fscanf(dbfile,DB_NFILE,&nfiles) != 1)
+                      SYSTEM_READ_ERROR
+                    for (i = 0; i < nfiles; i++)
+                      if (fgets(buffer,2*MAX_NAME+100,dbfile) == NULL)
+                        SYSTEM_READ_ERROR
+                    if (fscanf(dbfile,DB_NBLOCK,&nblocks) != 1)
+                      SYSTEM_READ_ERROR
+                    if (fscanf(dbfile,DB_PARAMS,&size,&cutoff,&all) != 3)
+                      SYSTEM_READ_ERROR
+                    for (i = 1; i <= part; i++)
+                      if (fscanf(dbfile,DB_BDATA,&oindx,&DB_FIRST) != 2)
+                        SYSTEM_READ_ERROR
+                    if (fscanf(dbfile,DB_BDATA,&oindx,&DB_LAST) != 2)
+                      SYSTEM_READ_ERROR
+                    fclose(dbfile);
+                    DB_PART = part;
+                  }
+              }
+          }
+
+          //  Set up cover extra's track
+
+          if (DB_PART > 0)
+           CV_ANAME = Strdup(Catenate(dpwd,PATHSEP,root,
+                           Numbered_Suffix(".",DB_PART,".covr.anno")),"Allocating cover anno name");
+          else
+           CV_ANAME = Strdup(Catenate(dpwd,PATHSEP,root,".covr.anno"),"Allocating cover anno name");
+          CV_AFILE = Fopen(CV_ANAME,"w");
+          if (CV_ANAME == NULL || CV_AFILE == NULL)
+            exit (1);
+
+          { int size, length;
+
+            length = 0;
+            size   = 1;
+            fwrite(&length,sizeof(int),1,CV_AFILE);
+            fwrite(&size,sizeof(int),1,CV_AFILE);
+          }
+
+          //  Get trace point spacing information
+
+          fread(&novl,sizeof(int64),1,input);
+          fread(&TRACE_SPACING,sizeof(int),1,input);
+
+          //  Initialize statistics gathering
+
+          if (VERBOSE)
+            { nreads = 0;
+              totlen = 0;
+              printf("\nDAScover %s %s\n",argv[1],argv[c]);
+            }
+
+          { int i;
+
+            for (i = 0; i <= MAX_COVER; i++)
+              Cov_Hist[i] = 0;
+          }
+
+          //  Process each read pile
+
+          make_a_pass(input,HISTOGRAM_COVER,0);
+
+          //  If verbose output statistics summary to stdout
+
+          if (VERBOSE)
+            { int   i, cover;
+              int64 ssum, stotal;
+
+              printf("\nInput:  ");
+              Print_Number(nreads,7,stdout);
+              printf("reads,  ");
+              Print_Number(totlen,12,stdout);
+              printf(" bases");
+              if (HGAP_MIN > 0)
+                { printf(" (another ");
+                  Print_Number((DB_LAST-DB_FIRST) - nreads,0,stdout);
+                  printf(" were < H-length)");
+                }
+              printf("\n");
+ 
+              stotal = 0;
+              for (i = 0; i <= MAX_COVER; i++)
+                stotal += Cov_Hist[i];
+
+              printf("\nCoverage Histogram\n\n");
+              ssum = Cov_Hist[MAX_COVER];
+              if (ssum > 0)
+                printf("    %4d:  %9lld  %5.1f%%\n\n",
+                       MAX_COVER,Cov_Hist[MAX_COVER],(100.*ssum)/stotal);
+              stotal -= ssum;
+              ssum    = 0;
+              for (i = MAX_COVER-1; i >= 0; i--) 
+                if (Cov_Hist[i] > 0)
+                  { ssum += Cov_Hist[i];
+                    printf("    %4d:  %9lld  %5.1f%%\n",
+                           i,Cov_Hist[i],(100.*ssum)/stotal);
+                  }
+
+              i = 0;
+              while (Cov_Hist[i+1] < Cov_Hist[i])
+                i += 1;
+              for (cover = i++; i < MAX_COVER; i++)
+                if (Cov_Hist[cover] < Cov_Hist[i])
+                  cover = i;
+
+              printf("\n  Coverage is estimated at %d\n\n",cover);
+            }
+
+          //  Output coverage histogram
+
+          Write_Extra(CV_AFILE,&ex_covr);
+          Write_Extra(CV_AFILE,&ex_hgap);
+
+          fclose(CV_AFILE);
+          free(CV_ANAME);
+          fclose(input);
+        }
+
+      Free_Block_Arg(parse);
+    }
+
+  free(dpwd);
+  free(root);
+
+  Close_DB(DB);
+  free(Prog_Name);
+
+  exit (0);
+}
--- a/DASedit.c
+++ b/DASedit.c
@@ -75,8 +75,6 @@ static char *Usage = "[-v] [-x<int>] <source:db> <target:db>";
 #define LOWQ  0   //  Gap is spanned by many LAs and patchable
 #define SPAN  1   //  Gap has many paired LAs and patchable
 #define SPLIT 2   //  Gap is a chimer or an unpatchable gap
-#define ADPRE 3   //  Gap is an adatper break and prefix should be removed
-#define ADSUF 4   //  Gap is an adatper break and suffix should be removed

 //  Global Variables (must exist across the processing of each pile)

@@ -205,9 +203,6 @@ int main(int argc, char *argv[])
  char       *pwd2, *root2;
  int         VERBOSE;
  int         CUTOFF;
-  int         GOOD_QV;
-  int         BAD_QV;
-  int         COVERAGE;
  int         HGAP_MIN;

  int         nfiles;        //  contents of source .db file
@@ -253,6 +248,9 @@ int main(int argc, char *argv[])

    if (argc != 3)
      { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
+        fprintf(stderr,"\n");
+        fprintf(stderr,"      -v: Verbose mode, output statistics as proceed.\n");
+        fprintf(stderr,"      -x: minimum length for edited reads\n");
        exit (1);
      }
  }
@@ -374,64 +372,35 @@ int main(int argc, char *argv[])
    track = Load_Track(DB,"trim");
    if (track != NULL)
      { FILE *afile;
-        int   size, tracklen, extra;
+        char *aname;
+        int   extra, tracklen, size;
+        DAZZ_EXTRA  ex_hgap;

        TRIM_IDX = (int64 *) track->anno;
        TRIM     = (int *) track->data;
        for (i = 0; i <= nreads; i++)
          TRIM_IDX[i] /= sizeof(int);

-        //  if newer .trim tracks with -g, -b, -c, -H meta data, grab it
+        //  Get HGAP minimum from .trim extras
+
+        aname = Strdup(Catenate(DB->path,".","trim",".anno"),"Allocating anno file");
+        if (aname == NULL)
+          exit (1);
+        afile  = fopen(aname,"r");

-        afile = fopen(Catenate(DB->path,".","trim",".anno"),"r");
        fread(&tracklen,sizeof(int),1,afile);
        fread(&size,sizeof(int),1,afile);
        fseeko(afile,0,SEEK_END);
        extra = ftell(afile) - (size*(tracklen+1) + 2*sizeof(int));
        fseeko(afile,-extra,SEEK_END);
-        if (extra == 4*sizeof(int))
-          { fread(&GOOD_QV,sizeof(int),1,afile);
-            fread(&BAD_QV,sizeof(int),1,afile);
-            fread(&COVERAGE,sizeof(int),1,afile);
-            fread(&HGAP_MIN,sizeof(int),1,afile);
-          }
-        else if (extra == 3*sizeof(int))
-          { fread(&GOOD_QV,sizeof(int),1,afile);
-            fread(&BAD_QV,sizeof(int),1,afile);
-            fread(&COVERAGE,sizeof(int),1,afile);
-            HGAP_MIN = 0;
-          }
-        else if (extra == 2*sizeof(int))
-          { fread(&GOOD_QV,sizeof(int),1,afile);
-            fread(&BAD_QV,sizeof(int),1,afile);
-            COVERAGE = -1;
-            HGAP_MIN = 0;
-          }
-        else
-          { fprintf(stderr,"%s: trim annotation is out of date, rerun DAStrim\n",Prog_Name);
+        ex_hgap.nelem = 0;
+        if (Read_Extra(afile,aname,&ex_hgap) != 0)
+          { fprintf(stderr,"%s: Hgap threshold extra missing from .trim track?\n",Prog_Name);
            exit (1);
          }
        fclose(afile);

-        { int a, t, x;
-          int tb, te;
-
-          x = 0;
-          for (a = 0; a < DB->nreads; a++)
-            { tb = TRIM_IDX[a];
-              te = TRIM_IDX[a+1];
-              if (tb+2 < te)
-                { if (TRIM[tb+2] == ADPRE)
-                    tb += 3;
-                  if (TRIM[te-3] == ADSUF)
-                    te -= 3;
-                }
-              TRIM_IDX[a] = x;
-              for (t = tb; t < te; t++)
-                TRIM[x++] = TRIM[t];
-            }
-          TRIM_IDX[DB->nreads] = x;
-        }
+        HGAP_MIN = (int) ((int64 *) (ex_hgap.value))[0];
      }
    else
      { fprintf(stderr,"%s: Must have a 'trim' track, run DAStrim\n",Prog_Name);

--- a/DASmap.c
+++ b/DASmap.c
@@ -106,6 +106,8 @@ int main(int argc, char *argv[])

    if (argc <= 1)
      { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
+        fprintf(stderr,"\n");
+        fprintf(stderr,"      -p: Pretty print (vs easy to parse).\n");
        exit (1);
      }
  }

--- a/DASpatch.c
+++ b/DASpatch.c
@@ -13,7 +13,6 @@
 #include <stdlib.h>
 #include <string.h>
 #include <stdint.h>
-#include <math.h>

 #include "DB.h"
 #include "align.h"
@@ -42,8 +41,6 @@ static int     VERBOSE;
 #define LOWQ  0   //  Gap is spanned by many LAs and patchable
 #define SPAN  1   //  Gap has many paired LAs and patchable
 #define SPLIT 2   //  Gap is a chimer or an unpatchable gap
-#define ADPRE 3   //  Gap is an adatper break and prefix should be removed
-#define ADSUF 4   //  Gap is an adatper break and suffix should be removed
 #define NOPAT 3   //  Gap could not be patched (internal only)

 #define  COVER_LEN     400  //  An overlap covers a point if it extends COVER_LEN to either side.
@@ -635,9 +632,7 @@ static int make_a_pass(FILE *input, void (*ACTION)(int, Overlap *, int), int tra
 }

 int main(int argc, char *argv[])
-{ FILE       *input;
-  char       *root, *dpwd;
-  char       *las, *lpwd;
+{ char       *root, *dpwd;
  int64       novl;
  DAZZ_TRACK *track;
  int         c;
@@ -665,6 +660,8 @@ int main(int argc, char *argv[])

    if (argc < 3)
      { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
+        fprintf(stderr,"\n");
+        fprintf(stderr,"      -v: Verbose mode, output statistics as proceed.\n");
        exit (1);
      }
  }
@@ -699,57 +696,52 @@ int main(int argc, char *argv[])
    track = Load_Track(DB,"trim");
    if (track != NULL)
      { FILE *afile;
-        int   size, tracklen, extra;
+        char *aname;
+        int   extra, tracklen, size;
+        DAZZ_EXTRA  ex_hgap, ex_cest, ex_good, ex_bad;

        TRIM_IDX = (int64 *) track->anno;
        TRIM     = (int *) track->data;
        for (i = 0; i <= DB->nreads; i++)
          TRIM_IDX[i] /= sizeof(int);

-        //  if newer .trim tracks with -g, -b, -c, -H meta data, grab it
+        //  Get HGAP minimum, and good and bad qv thresholds from .trim extras
+
+        aname = Strdup(Catenate(DB->path,".","trim",".anno"),"Allocating anno file");
+        if (aname == NULL)
+          exit (1);
+        afile  = fopen(aname,"r");

-        afile = fopen(Catenate(DB->path,".","trim",".anno"),"r");
        fread(&tracklen,sizeof(int),1,afile);
        fread(&size,sizeof(int),1,afile);
        fseeko(afile,0,SEEK_END);
        extra = ftell(afile) - (size*(tracklen+1) + 2*sizeof(int));
        fseeko(afile,-extra,SEEK_END);
-        if (extra == 4*sizeof(int))
-          { fread(&GOOD_QV,sizeof(int),1,afile);
-            fread(&BAD_QV,sizeof(int),1,afile);
-            fread(&HGAP_MIN,sizeof(int),1,afile);
-            fread(&HGAP_MIN,sizeof(int),1,afile);
+        ex_hgap.nelem = 0;
+        if (Read_Extra(afile,aname,&ex_hgap) != 0)
+          { fprintf(stderr,"%s: Hgap threshold extra missing from .trim track?\n",Prog_Name);
+            exit (1);
          }
-        else if (extra == 2*sizeof(int) || extra == 3*sizeof(int))
-          { fread(&GOOD_QV,sizeof(int),1,afile);
-            fread(&BAD_QV,sizeof(int),1,afile);
-            HGAP_MIN = 0;
+        ex_cest.nelem = 0;
+        if (Read_Extra(afile,aname,&ex_cest) != 0)
+          { fprintf(stderr,"%s: Coverage estimate extra missing from .trim track?\n",Prog_Name);
+            exit (1);
          }
-        else
-          { fprintf(stderr,"%s: trim annotation is out of date, rerun DAStrim\n",Prog_Name);
+        ex_good.nelem = 0;
+        if (Read_Extra(afile,aname,&ex_good) != 0)
+          { fprintf(stderr,"%s: Good QV threshold extra missing from .trim track?\n",Prog_Name);
+            exit (1);
+          }
+        ex_bad.nelem = 0;
+        if (Read_Extra(afile,aname,&ex_bad) != 0)
+          { fprintf(stderr,"%s: Bad QV threshold extra missing from .trim track?\n",Prog_Name);
            exit (1);
          }
        fclose(afile);

-        { int a, t, x;
-          int tb, te;
-
-          x = 0;
-          for (a = 0; a < DB->nreads; a++)
-            { tb = TRIM_IDX[a];
-              te = TRIM_IDX[a+1];
-              if (tb+2 < te)
-                { if (TRIM[tb+2] == ADPRE)
-                    tb += 3;
-                  if (TRIM[te-3] == ADSUF)
-                    te -= 3; 
-                }
-              TRIM_IDX[a] = x;
-              for (t = tb; t < te; t++)
-                TRIM[x++] = TRIM[t]; 
-            }
-          TRIM_IDX[DB->nreads] = x;
-        }
+        HGAP_MIN = (int) ((int64 *) (ex_hgap.value))[0];
+        GOOD_QV  = (int) ((int64 *) (ex_good.value))[0];
+        BAD_QV   = (int) ((int64 *) (ex_bad.value))[0];
      }
    else
      { fprintf(stderr,"%s: Must have a 'trim' track, run DAStrim\n",Prog_Name);
@@ -757,25 +749,21 @@ int main(int argc, char *argv[])
      }
  }

-  //  Initialize statistics gathering
-
-  if (VERBOSE)
-    { npatch = 0;
-      fpatch = 0;
-
-      printf("\nDASpatch -g%d -b%d %s",GOOD_QV,BAD_QV,argv[1]);
-      for (c = 2; c < argc; c++)
-        printf(" %s",argv[c]);
-      printf("\n");
-    }
-
  //  For each .las block/file

  dpwd = PathTo(argv[1]);
  root = Root(argv[1],".db");

  for (c = 2; c < argc; c++)
-    { las  = Root(argv[c],".las");
+    { Block_Looper *parse;
+      FILE         *input;
+
+      parse = Parse_Block_Arg(argv[c]);
+
+      while ((input = Next_Block_Arg(parse)) != NULL)
+        { DB_PART  = 0;
+          DB_FIRST = 0;
+          DB_LAST  = DB->nreads;

          //  Determine if a .las block is being processed and if so get first and last read
          //    from .db file
@@ -786,11 +774,7 @@ int main(int argc, char *argv[])
            int   i, part, nfiles, nblocks, cutoff, all, oindx;
            int64 size;

-        DB_PART  = 0;
-        DB_FIRST = 0;
-        DB_LAST  = DB->nreads;
-
-        p = rindex(las,'.');
+            p = rindex(Block_Arg_Root(parse),'.');
            if (p != NULL)
              { part = strtol(p+1,&eptr,10);
                if (*eptr == '\0' && eptr != p+1)
@@ -813,12 +797,11 @@ int main(int argc, char *argv[])
                      SYSTEM_READ_ERROR
                    fclose(dbfile);
                    DB_PART = part;
-                *p = '\0';
                  }
              }
          }

-      //  Set up QV trimming track
+          //  Set up patch track

          { int len, size;

@@ -843,40 +826,39 @@ int main(int argc, char *argv[])
 	    fwrite(&PR_INDEX,sizeof(int64),1,PR_AFILE);
          }

-      //  Open overlap file
-
-      lpwd = PathTo(argv[c]);
-      if (DB_PART)
-        input = Fopen(Catenate(lpwd,"/",las,Numbered_Suffix(".",DB_PART,".las")),"r");
-      else
-        input = Fopen(Catenate(lpwd,"/",las,".las"),"r");
-      if (input == NULL)
-        exit (1);
-
-      free(lpwd);
-      free(las);
-
          //  Get trace point spacing information

          fread(&novl,sizeof(int64),1,input);
          fread(&TRACE_SPACING,sizeof(int),1,input);
          ANCHOR_THRESH = ANCHOR_MATCH * TRACE_SPACING;

-      make_a_pass(input,PATCH_GAPS,1);
-
-      //  Clean up
+          //  Initialize statistics gathering

-      fclose(PR_AFILE);
-      fclose(PR_DFILE);
+          if (VERBOSE)
+            { npatch = 0;
+              fpatch = 0;
+              printf("\nDASpatch -g%d -b%d %s %s\n",GOOD_QV,BAD_QV,argv[1],argv[c]);
            }

+          //  Process each read pile
+
+          make_a_pass(input,PATCH_GAPS,1);
+
          //  If verbose output statistics summary to stdout

          if (VERBOSE)
            { if (fpatch == 0)
-        printf("%s: All %d patches were successful\n",Prog_Name,npatch);
+                printf("  All %d patches were successful\n",npatch);
              else
-        printf("%s: %d out of %d total patches failed\n",Prog_Name,fpatch,npatch);
+                printf("  %d out of %d total patches failed\n",fpatch,npatch);
+            }
+    
+          fclose(PR_AFILE);
+          fclose(PR_DFILE);
+          fclose(input);
+        }
+
+      Free_Block_Arg(parse);
    }

  free(dpwd);

--- a/DASqv.c
+++ b/DASqv.c
@@ -23,7 +23,7 @@

 #undef  QV_DEBUG

-static char *Usage = "[-v] [-H<int>] -c<int> <source:db> <overlaps:las> ...";
+static char *Usage = "[-v] [-c<int>] <source:db> <overlaps:las> ...";

 #define  MAXQV   50     //  Max QV score is 50
 #define  MAXQV1  51
@@ -32,6 +32,7 @@ static char *Usage = "[-v] [-H<int>] -c<int> <source:db> <overlaps:las> ...";
 #define  PARTIAL .20    //  Partial terminal segments covering this percentage are scored

 static int     VERBOSE;
+static int     COVERAGE;  //  Estimated coverage of genome
 static int       QV_DEEP;   //  # of best diffs to average for QV score
 static int     HGAP_MIN;  //  Under this length do not process for HGAP

@@ -52,6 +53,7 @@ static int64   QV_INDEX;   //  Current index into .qual.data file
 static int64 nreads, totlen;
 static int64 qgram[MAXQV1], sgram[MAXQV1];

+
 //  For each pile, calculate QV scores of the aread at tick spacing TRACE_SPACING

 static void CALCULATE_QVS(int aread, Overlap *ovls, int novl)
@@ -202,7 +204,6 @@ static void CALCULATE_QVS(int aread, Overlap *ovls, int novl)
       }
 #endif

-      if (VERBOSE)
      for (v = 0; v <= MAXQV; v++)
        sgram[v] += tick[v] + cick[v];
     
@@ -259,14 +260,12 @@ static void CALCULATE_QVS(int aread, Overlap *ovls, int novl)
 #endif
    }

-  //  Accumulate qv histogram (if VERBOSE) and append qv's to .qual file
+  //  Accumulate qv histogram and append qv's to .qual file

-  if (VERBOSE)
-    { for (i = 0; i < atick; i++)
+  for (i = 0; i < atick; i++)
    qgram[qvec[i]] += 1;
  nreads += 1;
  totlen += alen;
-    }

  fwrite(qvec,sizeof(uint8),atick,QV_DFILE);
  QV_INDEX += atick;
@@ -392,11 +391,17 @@ static int make_a_pass(FILE *input, void (*ACTION)(int, Overlap *, int), int tra
 }

 int main(int argc, char *argv[])
-{ FILE  *input;
-  char  *root, *dpwd;
-  char  *las, *lpwd;
+{ char  *root, *dpwd;
  int64  novl;
-  int    c, COVERAGE;
+  int    c;
+
+  DAZZ_EXTRA ex_hgap, ex_covr;
+  DAZZ_EXTRA ex_cest, ex_qvs, ex_dif;
+
+  char *cest_name = "Coverage Estimate";
+  char *qvs_name  = "Histogram of QVs";
+  char *dif_name  = "Histogram of Tile Differences";
+  int64 cover64;

  //  Process arguments

@@ -407,7 +412,6 @@ int main(int argc, char *argv[])
    ARG_INIT("DASqv")

    COVERAGE = -1;
-    HGAP_MIN = 0;

    j = 1;
    for (i = 1; i < argc; i++)
@@ -419,9 +423,6 @@ int main(int argc, char *argv[])
          case 'c':
            ARG_POSITIVE(COVERAGE,"Voting depth")
            break;
-          case 'H':
-            ARG_POSITIVE(HGAP_MIN,"HGAP threshold (in bp.s)")
-            break;
        }
      else
        argv[j++] = argv[i];
@@ -431,25 +432,11 @@ int main(int argc, char *argv[])

    if (argc < 3)
      { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
+        fprintf(stderr,"\n");
+        fprintf(stderr,"      -v: Verbose mode, output statistics as proceed.\n");
+        fprintf(stderr,"      -c: Use this as the average coverage (not DAScover estimate).\n");
        exit (1);
      }
-
-    if (COVERAGE < 0)
-      { fprintf(stderr,"%s: Must supply -c parameter\n",Prog_Name);
-        exit (1);
-      }
-    else
-      { if (COVERAGE >= 40)
-          QV_DEEP = COVERAGE/8;
-        else if (COVERAGE >= 20)
-          QV_DEEP = 5;
-        else if (COVERAGE >= 4)
-          QV_DEEP = COVERAGE/4;
-        else
-          { fprintf(stderr,"%s: Average coverage is too low (< 4X), cannot infer qv's\n",Prog_Name);
-            exit (1);
-          }
-      }
  }

  //  Open trimmed DB
@@ -470,30 +457,103 @@ int main(int argc, char *argv[])
    Trim_DB(DB);
  }

-  //  Initialize statistics gathering
+  //  Get .covr track information

-  if (VERBOSE)
+  { FILE      *afile;
+    char      *aname;
+    int        extra, cmax;
+    int64     *cgram;
+
+    aname = Strdup(Catenate(DB->path,".","covr",".anno"),"Allocating anno file");
+    if (aname == NULL)
+      exit (1);
+    afile  = fopen(aname,"r");
+    if (afile == NULL)
+      { fprintf(stderr,"%s: Must have a 'covr' track, run DAScover\n",Prog_Name);
+        exit (1);
+      }
+
+    fseeko(afile,0,SEEK_END);
+    extra = ftell(afile) - sizeof(int)*2;
+    fseeko(afile,-extra,SEEK_END);
+    ex_covr.nelem = 0;
+    if (Read_Extra(afile,aname,&ex_covr) != 0)
+      { fprintf(stderr,"%s: Histogram extra missing from .covr track?\n",Prog_Name);
+        exit (1);
+      }
+    ex_hgap.nelem = 0;
+    if (Read_Extra(afile,aname,&ex_hgap) != 0)
+      { fprintf(stderr,"%s: Hgap threshold extra missing from .covr track?\n",Prog_Name);
+        exit (1);
+      }
+    fclose(afile);
+
+    HGAP_MIN = (int) ((int64 *) (ex_hgap.value))[0];
+    cgram    = (int64 *) (ex_covr.value);
+    cmax     = ex_covr.nelem - 1;
+
+    if (COVERAGE < 0)
      { int i;

-      nreads = 0;
-      totlen = 0;
-      for (i = 0; i <= MAXQV; i++)
-        qgram[i] = sgram[i] = 0;
+        i = 0;
+        while (cgram[i+1] < cgram[i])
+          i += 1;
+        for (COVERAGE = i++; i < cmax; i++)
+          if (cgram[COVERAGE] < cgram[i])
+            COVERAGE = i;
+      }

-      printf("\nDASqv -c%d %s",COVERAGE,argv[1]);
-      for (i = 2; i < argc; i++)
-        printf(" %s",argv[i]);
-      printf("\n");
+    if (COVERAGE >= 40)
+      QV_DEEP = COVERAGE/8;
+     else if (COVERAGE >= 20)
+       QV_DEEP = 5;
+     else if (COVERAGE >= 4)
+       QV_DEEP = COVERAGE/4;
+     else
+       { fprintf(stderr,"%s: Average coverage is too low (< 4X), cannot infer qv's\n",Prog_Name);
+         exit (1);
+       }
  }

-  //  Determine if overlap block is being processed and if so get first and last read
-  //    from .db file
+  //  Setup extras
+
+  ex_cest.vtype = DB_INT;      // Estimated coverage (same for every .las)
+  ex_cest.nelem = 1;
+  ex_cest.accum = DB_EXACT;
+  ex_cest.name  = cest_name;
+  cover64 = COVERAGE;
+  ex_cest.value = &cover64;
+
+  ex_qvs.vtype = DB_INT;      //  Histogram of MAXQV1 trace-point diff counts
+  ex_qvs.nelem = MAXQV1;
+  ex_qvs.accum = DB_SUM;
+  ex_qvs.name  = qvs_name;
+  ex_qvs.value = &qgram;
+
+  ex_dif.vtype = DB_INT;     //  Histogram of MAXQV1 intrinisic qv counts
+  ex_dif.nelem = MAXQV1;
+  ex_dif.accum = DB_SUM;
+  ex_dif.name  = dif_name;
+  ex_dif.value = &sgram;
+
+  //  For each .las file do

  dpwd = PathTo(argv[1]);
  root = Root(argv[1],".db");

  for (c = 2; c < argc; c++)
-    { las  = Root(argv[c],".las");
+    { Block_Looper *parse;
+      FILE         *input;
+
+      parse = Parse_Block_Arg(argv[c]);
+
+      while ((input = Next_Block_Arg(parse)) != NULL)
+        { DB_PART  = 0;
+          DB_FIRST = 0;
+          DB_LAST  = DB->nreads;
+
+          //  Determine if overlap block is being processed and if so get first and last read
+          //    from .db file

          { FILE *dbfile;
            char  buffer[2*MAX_NAME+100];
@@ -501,11 +561,7 @@ int main(int argc, char *argv[])
            int   i, part, nfiles, nblocks, cutoff, all, oindx;
            int64 size;

-        DB_PART  = 0;
-        DB_FIRST = 0;
-        DB_LAST  = DB->nreads;
-
-        p = rindex(las,'.');
+            p = rindex(Block_Arg_Root(parse),'.');
            if (p != NULL)
              { part = strtol(p+1,&eptr,10);
                if (*eptr == '\0' && eptr != p+1)
@@ -528,7 +584,6 @@ int main(int argc, char *argv[])
                      SYSTEM_READ_ERROR
                    fclose(dbfile);
                    DB_PART = part;
-                *p = '\0';
                  }
              }
          }
@@ -558,34 +613,43 @@ int main(int argc, char *argv[])
            fwrite(&QV_INDEX,sizeof(int64),1,QV_AFILE);
          }

-      //  Open overlap file
-
-      lpwd = PathTo(argv[c]);
-      if (DB_PART > 0)
-        input = Fopen(Catenate(lpwd,"/",las,Numbered_Suffix(".",DB_PART,".las")),"r");
-      else
-        input = Fopen(Catenate(lpwd,"/",las,".las"),"r");
-      if (input == NULL)
-        exit (1);
-
-      free(lpwd);
-      free(las);
-
          //  Get trace point spacing information

          fread(&novl,sizeof(int64),1,input);
          fread(&TRACE_SPACING,sizeof(int),1,input);

+          //  Initialize statistics gathering
+
+          { int i;
+
+            nreads = 0;
+            totlen = 0;
+            for (i = 0; i <= MAXQV; i++)
+              qgram[i] = sgram[i] = 0;
+          }
+
+          if (VERBOSE)
+            { printf("\n\nDASqv");
+              if (HGAP_MIN > 0)
+                printf(" -H%d",HGAP_MIN);
+              printf(" -c%d %s %s\n\n",COVERAGE,argv[1],argv[c]);
+              fflush(stdout);
+            }
+
          //  Process each read pile

          make_a_pass(input,CALCULATE_QVS,1);

-      fwrite(&COVERAGE,sizeof(int),1,QV_AFILE);
-      fwrite(&HGAP_MIN,sizeof(int),1,QV_AFILE);
+          //  Write out extras and close .qual track
+
+          Write_Extra(QV_AFILE,&ex_hgap);
+          Write_Extra(QV_AFILE,&ex_cest);
+          Write_Extra(QV_AFILE,&ex_qvs);
+          Write_Extra(QV_AFILE,&ex_dif);

          fclose(QV_AFILE);
          fclose(QV_DFILE);
-    }
+          fclose(input);

          //  If verbose output statistics summary to stdout

@@ -639,6 +703,10 @@ int main(int argc, char *argv[])

              printf("\n    Recommend \'DAStrim -g%d -b%d'\n\n",gval,bval);
            }
+        }
+
+      Free_Block_Arg(parse);
+    }

  //  Clean up


--- a/DASrealign.c
+++ b/DASrealign.c
@@ -1011,6 +1011,9 @@ int main(int argc, char *argv[])

    if (argc != 5)
      { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
+        fprintf(stderr,"\n");
+        fprintf(stderr,"      -v: Verbose mode, output statistics as proceed.\n");
+        fprintf(stderr,"      -l: minimum length alignment length.\n");
        exit (1);
      }
  }

--- a/DAStrim.c
+++ b/DAStrim.c
@@ -14,7 +14,6 @@
 #include <stdlib.h>
 #include <string.h>
 #include <stdint.h>
-#include <math.h>

 #include "DB.h"
 #include "align.h"
@@ -40,7 +39,7 @@

 //  Command format and global parameter variables

-static char *Usage = " [-v] -g<int> -b<int> <source:db> <overlaps:las> ...";
+static char *Usage = " [-v] [-g<int>] [-b<int>] <source:db> <overlaps:las> ...";

 static int     COVERAGE;   //  estimated coverage
 static int     BAD_QV;     //  qv >= and you are "bad"
@@ -53,13 +52,8 @@ static int     VERBOSE;
 #define LOWQ  0   //  Gap is spanned by many LAs and patchable
 #define SPAN  1   //  Gap has many paired LAs and patchable
 #define SPLIT 2   //  Gap is a chimer or an unpatchable gap
-#define ADPRE 3   //  Gap is due to adaptemer, trim prefix interval to left
-#define ADSUF 4   //  Gap is due to adaptemer, trim suffix interval to right
 #define ADAPT 3   //  Gap is due to adaptemer (internal only)

-static int AdPre = ADPRE;
-static int AdSuf = ADSUF;
-
 //  Good patch constants

 #define MIN_BLOCK  500    //  Minimum length of a good patch
@@ -1970,10 +1964,8 @@ static void GAPS(int aread, Overlap *ovls, int novl)
      return;
    }

-  if (VERBOSE)
-    { nreads += 1;
+  nreads += 1;
  totlen += alen;
-    }

  //  Partition into HQ-blocks

@@ -1992,10 +1984,8 @@ static void GAPS(int aread, Overlap *ovls, int novl)
  //  No blocks? ==> nothing to do

  if (nblk <= 0)
-    { if (VERBOSE)
    { nelim += 1;
      nelimbp += alen;
-        }
 #ifdef ANNOTATE
      fwrite(&HQ_INDEX,sizeof(int64),1,HQ_AFILE);
      fwrite(&SN_INDEX,sizeof(int64),1,SN_AFILE);
@@ -2084,8 +2074,7 @@ static void GAPS(int aread, Overlap *ovls, int novl)

  //  Accummulate statistics

-    if (VERBOSE)
-      { if (block[0].beg > 0)
+    if (block[0].beg > 0)
      { n5trm += 1;
        n5trmbp += block[0].beg;
      }
@@ -2117,7 +2106,6 @@ static void GAPS(int aread, Overlap *ovls, int novl)
            nchimbp += block[i].beg - block[i-1].end;
          }
      }
-      }

 #ifdef ANNOTATE
    fwrite(&(block[abeg].beg),sizeof(int),1,KP_DFILE);
@@ -2134,12 +2122,6 @@ static void GAPS(int aread, Overlap *ovls, int novl)

  //  Output .trim track for this read

-    if (abeg > 0)
-      { fwrite(&(block[0].beg),sizeof(int),1,TR_DFILE);
-        fwrite(&(block[abeg-1].end),sizeof(int),1,TR_DFILE);
-        fwrite(&AdPre,sizeof(int),1,TR_DFILE);
-        TR_INDEX += 3*sizeof(int);
-      }
    fwrite(&(block[abeg].beg),sizeof(int),1,TR_DFILE);
    fwrite(&(block[abeg].end),sizeof(int),1,TR_DFILE);
    TR_INDEX += 2*sizeof(int);
@@ -2149,12 +2131,6 @@ static void GAPS(int aread, Overlap *ovls, int novl)
        fwrite(&(block[i].end),sizeof(int),1,TR_DFILE);
        TR_INDEX += 3*sizeof(int);
      }
-    if (aend < nblk)
-      { fwrite(&AdSuf,sizeof(int),1,TR_DFILE);
-        fwrite(&(block[aend].beg),sizeof(int),1,TR_DFILE);
-        fwrite(&(block[nblk-1].end),sizeof(int),1,TR_DFILE);
-        TR_INDEX += 3*sizeof(int);
-      }
    fwrite(&TR_INDEX,sizeof(int64),1,TR_AFILE);
  }
 }
@@ -2280,13 +2256,19 @@ static int make_a_pass(FILE *input, void (*ACTION)(int, Overlap *, int), int tra
 }

 int main(int argc, char *argv[])
-{ FILE       *input;
-  char       *root, *dpwd;
-  char       *las, *lpwd;
+{ char       *root, *dpwd;
  int64       novl;
  DAZZ_TRACK *track;
  int         c;

+  DAZZ_EXTRA  ex_hgap, ex_cest;
+  DAZZ_EXTRA  ex_good, ex_bad, ex_trim;
+
+  char *good_name = "Good QV threshold";
+  char *bad_name  = "Bad QV threshold";
+  char *trim_name = "Trimming statistics";
+  int64 good64, bad64, tstats[18];
+
  //  Process arguments

  { int  i, j, k;
@@ -2320,20 +2302,10 @@ int main(int argc, char *argv[])

    if (argc < 3)
      { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
-        exit (1);
-      }
-
-    if (GOOD_QV < 0)
-      { fprintf(stderr,"%s: Must supply -g parameter\n",Prog_Name);
-        exit (1);
-      }
-    if (BAD_QV < 0)
-      { fprintf(stderr,"%s: Must supply -b parameter\n",Prog_Name);
-        exit (1);
-      }
-    if (GOOD_QV > BAD_QV)
-      { fprintf(stderr,"%s: Good QV threshold (%d) > Bad QV threshold (%d) ?\n",
-                       Prog_Name,GOOD_QV,BAD_QV);
+        fprintf(stderr,"\n");
+        fprintf(stderr,"      -v: Verbose mode, output statistics as proceed.\n");
+        fprintf(stderr,"      -g: Use as good qv threshold (and not auto-estimate).\n");
+        fprintf(stderr,"      -b: Use as bad qv threshold (and not auto-estimate).\n");
        exit (1);
      }
  }
@@ -2354,80 +2326,137 @@ int main(int argc, char *argv[])
        exit (1);
      }
    Trim_DB(DB);
+  }
+
+  //  Get .qual track and extras

  track = Load_Track(DB,"qual");
  if (track != NULL)
    { FILE       *afile;
-        int   size, tracklen, extra;
+      char       *aname;
+      int         extra, tracklen, size;
+      DAZZ_EXTRA  ex_qvs, ex_dif;

      QV_IDX = (int64 *) track->anno;
      QV     = (uint8 *) track->data;

-        //  if newer .qual tracks with -c meta data, grab it
+      aname = Strdup(Catenate(DB->path,".","qual",".anno"),"Allocating anno file");
+      if (aname == NULL)
+        exit (1);
+      afile  = fopen(aname,"r");

-        afile = fopen(Catenate(DB->path,".","qual",".anno"),"r");
      fread(&tracklen,sizeof(int),1,afile);
      fread(&size,sizeof(int),1,afile);
      fseeko(afile,0,SEEK_END);
      extra = ftell(afile) - (size*(tracklen+1) + 2*sizeof(int));
      fseeko(afile,-extra,SEEK_END);
-        if (extra == 2*sizeof(int))
-          { fread(&COVERAGE,sizeof(int),1,afile);
-            fread(&HGAP_MIN,sizeof(int),1,afile);
+      ex_hgap.nelem = 0;
+      if (Read_Extra(afile,aname,&ex_hgap) != 0)
+        { fprintf(stderr,"%s: Hgap threshold extra missing from .qual track?\n",Prog_Name);
+          exit (1);
        }
-        else if (extra == sizeof(int))
-          { fread(&COVERAGE,sizeof(int),1,afile);
-            HGAP_MIN = 0;
+      ex_cest.nelem = 0;
+      if (Read_Extra(afile,aname,&ex_cest) != 0)
+        { fprintf(stderr,"%s: Coverage estimate extra missing from .qual track?\n",Prog_Name);
+          exit (1);
        }
-        else
-          { COVERAGE = -1;
-            HGAP_MIN = 0;
+      ex_qvs.nelem = 0;
+      if (Read_Extra(afile,aname,&ex_qvs) != 0)
+        { fprintf(stderr,"%s: QV histogram extra missing from .qual track?\n",Prog_Name);
+          exit (1);
+        }
+      ex_dif.nelem = 0;
+      if (Read_Extra(afile,aname,&ex_dif) != 0)
+        { fprintf(stderr,"%s: Differences histogram extra missing from .qual track?\n",Prog_Name);
+          exit (1);
        }
      fclose(afile);
+
+      COVERAGE = (int) ((int64 *) (ex_cest.value))[0];
+      HGAP_MIN = (int) ((int64 *) (ex_hgap.value))[0];
+
+      //  Compute -g and -b parameters
+
+      { int64  qsum, qtotal;
+        int64 *qgram;
+        int    i, maxqv;
+        int    gv, bv;
+
+        qgram = (int64 *) (ex_qvs.value);
+        maxqv = ex_qvs.nelem - 1;
+
+        qtotal = 0;
+        for (i = 0; i < maxqv; i++)
+          qtotal += qgram[i];
+
+        bv = gv = -1;
+        qsum = 0;
+        for (i = maxqv-1; i >= 0; i--)
+          if (qgram[i] > 0)
+            { qsum += qgram[i];
+              if ((100.*qsum)/qtotal > 7. && bv < 0)
+                bv = i+1;
+              if ((100.*qsum)/qtotal > 20. && gv < 0)
+                gv = i+1;
+            }
+
+        if (GOOD_QV < 0)
+          GOOD_QV = gv;
+        if (BAD_QV < 0)
+          BAD_QV = bv;
+      }
    }
  else
    { fprintf(stderr,"%s: Must have a 'qual' track, run DASqv\n",Prog_Name);
      exit (1);
    }
+
+  if (GOOD_QV > BAD_QV)
+    { fprintf(stderr,"%s: Good QV threshold (%d) > Bad QV threshold (%d) ?\n",
+                     Prog_Name,GOOD_QV,BAD_QV);
+      exit (1);
    }

-  //  Initialize statistics gathering
+  //  Setup extras

-  if (VERBOSE)
-    { nreads  = 0;
-      totlen  = 0;
-      nelim   = 0;
-      n5trm   = 0;
-      n3trm   = 0;
-      natrm   = 0;
-      nelimbp = 0;
-      n5trmbp = 0;
-      n3trmbp = 0;
-      natrmbp = 0;
+  ex_good.vtype = DB_INT;     // Good QV threshold
+  ex_good.nelem = 1;
+  ex_good.accum = DB_EXACT;
+  ex_good.name  = good_name;
+  good64 = GOOD_QV;
+  ex_good.value = &good64;

-      ngaps   = 0;
-      nlowq   = 0;
-      nspan   = 0;
-      nchim   = 0;
-      ngapsbp = 0;
-      nlowqbp = 0;
-      nspanbp = 0;
-      nchimbp = 0;
+  ex_bad.vtype = DB_INT;      // Bad QV threshold
+  ex_bad.nelem = 1;
+  ex_bad.accum = DB_EXACT;
+  ex_bad.name  = bad_name;
+  bad64 = BAD_QV;
+  ex_bad.value = &bad64;

-      printf("\nDAStrim -g%d -b%d %s", GOOD_QV,BAD_QV,argv[1]);
-      for (c = 2; c < argc; c++)
-        printf(" %s",argv[c]);
-      printf("\n");
-    }
+  ex_trim.vtype = DB_INT;     // Trim statistics
+  ex_trim.nelem = 16;
+  ex_trim.accum = DB_SUM;
+  ex_trim.name  = trim_name;
+  ex_trim.value = &tstats;

-  //  Determine if overlap block is being processed and if so get first and last read
-  //    from .db file
+  //  For each .las file do

  dpwd = PathTo(argv[1]);
  root = Root(argv[1],".db");

  for (c = 2; c < argc; c++)
-    { las  = Root(argv[c],".las");
+    { Block_Looper *parse;
+      FILE         *input;
+
+      parse = Parse_Block_Arg(argv[c]);
+
+      while ((input = Next_Block_Arg(parse)) != NULL)
+        { DB_PART  = 0;
+          DB_FIRST = 0;
+          DB_LAST  = DB->nreads;
+
+          //  Determine if overlap block is being processed and if so get first and last read
+          //    from .db file

          { FILE *dbfile;
            char  buffer[2*MAX_NAME+100];
@@ -2435,11 +2464,7 @@ int main(int argc, char *argv[])
            int   i, part, nfiles, nblocks, cutoff, all, oindx;
            int64 size;

-        DB_PART  = 0;
-        DB_FIRST = 0;
-        DB_LAST  = DB->nreads;
-
-        p = rindex(las,'.');
+            p = rindex(Block_Arg_Root(parse),'.');
            if (p != NULL)
              { part = strtol(p+1,&eptr,10);
                if (*eptr == '\0' && eptr != p+1)
@@ -2462,7 +2487,6 @@ int main(int argc, char *argv[])
                      SYSTEM_READ_ERROR
                    fclose(dbfile);
                    DB_PART = part;
-                *p = '\0';
                  }
              }
          }
@@ -2506,32 +2530,67 @@ int main(int argc, char *argv[])
          SETUP(KP_AFILE,KP_DFILE,KP_INDEX,".keep.anno",".keep.data",0)
 #endif

-      //  Open overlap file
-
-      lpwd = PathTo(argv[c]);
-      if (DB_PART)
-        input = Fopen(Catenate(lpwd,"/",las,Numbered_Suffix(".",DB_PART,".las")),"r");
-      else
-        input = Fopen(Catenate(lpwd,"/",las,".las"),"r");
-      if (input == NULL)
-        exit (1);
-
-      free(lpwd);
-      free(las);
-
          //  Get trace point spacing information

          fread(&novl,sizeof(int64),1,input);
          fread(&TRACE_SPACING,sizeof(int),1,input);

-      make_a_pass(input,GAPS,1);
+          //  Initialize statistics gathering
+
+          nreads  = 0;
+          totlen  = 0;
+          nelim   = 0;
+          n5trm   = 0;
+          n3trm   = 0;
+          natrm   = 0;
+          nelimbp = 0;
+          n5trmbp = 0;
+          n3trmbp = 0;
+          natrmbp = 0;
+          ngaps   = 0;
+          nlowq   = 0;
+          nspan   = 0;
+          nchim   = 0;
+          ngapsbp = 0;
+          nlowqbp = 0;
+          nspanbp = 0;
+          nchimbp = 0;

-      //  Clean up
+          if (VERBOSE)
+            { printf("\nDAStrim");
+              if (HGAP_MIN  > 0)
+                printf(" -H%d",HGAP_MIN);
+              printf(" -c%d -g%d -b%d %s %s\n",COVERAGE,GOOD_QV,BAD_QV,argv[1],argv[c]);
+            }

-      fwrite(&GOOD_QV,sizeof(int),1,TR_AFILE);
-      fwrite(&BAD_QV,sizeof(int),1,TR_AFILE);
-      fwrite(&COVERAGE,sizeof(int),1,TR_AFILE);
-      fwrite(&HGAP_MIN,sizeof(int),1,TR_AFILE);
+          //  Process each read pile
+
+          make_a_pass(input,GAPS,1);
+
+          //  Write out extras and close .trim track
+
+          tstats[ 0] = nelim;
+          tstats[ 1] = n5trm;
+          tstats[ 2] = n3trm;
+          tstats[ 3] = natrm;
+          tstats[ 4] = nelimbp;
+          tstats[ 5] = n5trmbp;
+          tstats[ 6] = n3trmbp;
+          tstats[ 7] = natrmbp;
+          tstats[ 8] = ngaps;
+          tstats[ 9] = nlowq;
+          tstats[10] = nspan;
+          tstats[11] = nchim;
+          tstats[12] = ngapsbp;
+          tstats[13] = nlowqbp;
+          tstats[14] = nspanbp;
+          tstats[15] = nchimbp;
+
+          Write_Extra(TR_AFILE,&ex_hgap);
+          Write_Extra(TR_AFILE,&ex_cest);
+          Write_Extra(TR_AFILE,&ex_good);
+          Write_Extra(TR_AFILE,&ex_bad);
+          Write_Extra(TR_AFILE,&ex_trim);

          fclose(TR_AFILE);
          fclose(TR_DFILE);
@@ -2555,7 +2614,8 @@ int main(int argc, char *argv[])
          fclose(KP_AFILE);
          fclose(KP_DFILE);
 #endif
-    }
+
+          fclose(input);

          //  If verbose output statistics summary to stdout

@@ -2636,6 +2696,10 @@ int main(int argc, char *argv[])
              Print_Number(nlowqbp+nspanbp,12,stdout);
              printf(" (%5.1f%%) bases\n",(100.*(nlowqbp+nspanbp))/totlen);
            }
+        }
+
+      Free_Block_Arg(parse);
+    }

  free(dpwd);
  free(root);

--- a/DB.c
+++ b/DB.c
@@ -17,6 +17,7 @@
 #include <ctype.h>
 #include <unistd.h>
 #include <dirent.h>
+#include <limits.h>

 #include "DB.h"

@@ -599,8 +600,8 @@ error:
 //   for the retained reads.

 void Trim_DB(DAZZ_DB *db)
-{ int         i, j, r;
-  int         allflag, cutoff;
+{ int         i, j, r, f;
+  int         allflag, cutoff, css;
  int64       totlen;
  int         maxlen, nreads;
  DAZZ_TRACK *record;
@@ -678,12 +679,20 @@ void Trim_DB(DAZZ_DB *db)

  totlen = maxlen = 0;
  for (j = i = 0; i < nreads; i++)
-    { r = reads[i].rlen;
-      if ((reads[i].flags & DB_BEST) >= allflag && r >= cutoff)
+    { f = reads[i].flags;
+      if ((f & DB_CSS) == 0)
+        css = 0;
+      r = reads[i].rlen;
+      if ((f & DB_BEST) >= allflag && r >= cutoff)
        { totlen += r;
          if (r > maxlen)
            maxlen = r;
-          reads[j++] = reads[i];
+          reads[j] = reads[i];
+          if (css)
+            reads[j++].flags |= DB_CSS;
+          else
+            reads[j++].flags &= ~DB_CSS;
+          css = 1;
        }
    }
  
@@ -1989,3 +1998,175 @@ void Print_Read(char *s, int width)
      printf("\n");
    }
 }
+
+
+/*******************************************************************************************
+ *
+ *  COMMAND LINE BLOCK PARSER
+ *    Take a command line argument and interpret the '@' block number ranges.
+ *    Parse_Block_Arg produces an Block_Looper iterator object that can then
+ *    be invoked multiple times to iterate through all the files implied by
+ *    the @ pattern/range.
+ *
+ ********************************************************************************************/
+
+typedef struct
+  { int first, last, next;
+    char *root, *pwd, *ppnt;
+    char *slice;
+  } _Block_Looper;
+
+  //  Advance the iterator e_parse to the next file, open it, and return the file pointer
+  //   to it.  Return NULL if at the end of the list of files.
+
+FILE *Next_Block_Arg(Block_Looper *e_parse)
+{ _Block_Looper *parse = (_Block_Looper *) e_parse;
+
+  char *disp;
+  FILE *input;
+
+  parse->next += 1;
+  if (parse->next > parse->last)
+    return (NULL);
+
+  if (parse->next < 0)
+    disp  = parse->root;
+  else
+    disp = Numbered_Suffix(parse->root,parse->next,parse->ppnt);
+
+  if ((input = fopen(Catenate(parse->pwd,"/",disp,".las"),"r")) == NULL)
+    { if (parse->last != INT_MAX)
+        { fprintf(stderr,"%s: %s.las is not present\n",Prog_Name,disp);
+           exit (1);
+        }
+      return (NULL);
+    }
+  return (input);
+}
+
+  //  Reset the iterator e_parse to the first file
+
+void Reset_Block_Arg(Block_Looper *e_parse)
+{ _Block_Looper *parse = (_Block_Looper *) e_parse;
+
+  parse->next = parse->first - 1;
+}
+
+  //  Return a pointer to the path for the current file
+
+char *Block_Arg_Path(Block_Looper *e_parse)
+{ _Block_Looper *parse = (_Block_Looper *) e_parse;
+
+  return (parse->pwd);
+}
+
+  //  Return a pointer to the root name for the current file
+
+char *Block_Arg_Root(Block_Looper *e_parse)
+{ _Block_Looper *parse = (_Block_Looper *) e_parse;
+
+  if (parse->next < 0)
+    return (parse->root);
+  else
+    return (Numbered_Suffix(parse->root,parse->next,parse->ppnt));
+}
+
+  //  Free the iterator
+
+void Free_Block_Arg(Block_Looper *e_parse)
+{ _Block_Looper *parse = (_Block_Looper *) e_parse;
+
+  free(parse->root);
+  free(parse->pwd);
+  free(parse->slice);
+  free(parse);
+}
+
+char *Next_Block_Slice(Block_Looper *e_parse, int slice)
+{ _Block_Looper *parse = (_Block_Looper *) e_parse;
+
+  if (parse->slice == NULL)
+    { int size = strlen(parse->pwd) + strlen(Block_Arg_Root(parse)) + 30;
+      parse->slice =  (char *)  Malloc(size,"Block argument slice");
+      if (parse->slice == NULL)
+        exit (1);
+    }
+
+  if (parse->first < 0)
+    sprintf(parse->slice,"%s/%s",parse->pwd,parse->root);
+  else
+    sprintf(parse->slice,"%s/%s%c%d-%d%s",parse->pwd,parse->root,BLOCK_SYMBOL,parse->next+1,
+                                          parse->next+slice,parse->ppnt);
+  parse->next += slice;
+  return (parse->slice);
+}
+
+  //  Parse the command line argument and return an iterator to move through the
+  //    file names, setting it up to report the first file.
+
+Block_Looper *Parse_Block_Arg(char *arg)
+{ _Block_Looper *parse;
+  char *pwd, *root;
+  char *ppnt, *cpnt;
+  int   first, last;
+
+  parse = (_Block_Looper *) Malloc(sizeof(_Block_Looper),"Allocating parse node");
+  pwd   = PathTo(arg);
+  root  = Root(arg,".las");
+  if (parse == NULL || pwd == NULL || root == NULL)
+    exit (1);
+
+  ppnt = index(root,BLOCK_SYMBOL);
+  if (ppnt == NULL)
+    first = last = -1;
+  else
+    { if (index(ppnt+1,BLOCK_SYMBOL) != NULL)
+        { fprintf(stderr,"%s: Two or more occurences of %c-sign in source name '%s'\n",
+                         Prog_Name,BLOCK_SYMBOL,root);
+          exit (1);
+        }
+      *ppnt++ = '\0';
+      first = strtol(ppnt,&cpnt,10);
+      if (cpnt == ppnt)
+        { first = 1;
+          last  = INT_MAX;
+        }
+      else
+        { if (first < 0)
+            { fprintf(stderr,
+                      "%s: Integer following %c-sigan is less than 0 in source name '%s'\n",
+                      Prog_Name,BLOCK_SYMBOL,root);
+              exit (1);
+            }
+          if (*cpnt == '-')
+            { ppnt = cpnt+1;
+              last = strtol(ppnt,&cpnt,10);
+              if (cpnt == ppnt)
+                { fprintf(stderr,"%s: Second integer must follow - in source name '%s'\n",
+                                 Prog_Name,root);
+                  exit (1);
+                }
+              if (last < first)
+                { fprintf(stderr,
+                          "%s: 2nd integer is less than 1st integer in source name '%s'\n",
+                          Prog_Name,root);
+                  exit (1);
+                }
+              ppnt = cpnt;
+            }
+          else
+            { last = INT_MAX;
+              ppnt = cpnt;
+            }
+        }
+    }
+
+  parse->pwd   = pwd;
+  parse->root  = root;
+  parse->ppnt  = ppnt;
+  parse->first = first;
+  parse->last  = last;
+  parse->next  = first-1;
+  parse->slice = NULL;
+  return ((Block_Looper *) parse);
+}
--- a/DB.h
+++ b/DB.h
@@ -59,6 +59,8 @@ typedef signed long long   int64;
 typedef float              float32;
 typedef double             float64;

+#define LAST_READ_SYMBOL '$'
+#define BLOCK_SYMBOL     '@'

 /*******************************************************************************************
 *
@@ -567,4 +569,24 @@ int Read_All_Sequences(DAZZ_DB *db, int ascii);

 int List_DB_Files(char *path, void actor(char *path, char *extension));

+  //   Take a command line argument and interpret the '@' block number ranges.
+  //   Parse_Block_Arg produces a Block_Looper iterator object that can then
+  //   be invoked multiple times to iterate through all the files implied by
+  //   the @ pattern/range.  Next_Block_Slice returns a string encoing the next
+  //   slice files represented by an @-notation, and advances the iterator by
+  //   that many files.
+
+typedef void Block_Looper;
+
+Block_Looper *Parse_Block_Arg(char *arg);
+
+FILE *Next_Block_Arg(Block_Looper *e_parse);
+
+char *Next_Block_Slice(Block_Looper *e_parse,int slice);
+
+void  Reset_Block_Arg(Block_Looper *e_parse);  //  Reset iterator to first file
+char *Block_Arg_Path(Block_Looper *e_parse);   //  Path of current file
+char *Block_Arg_Root(Block_Looper *e_parse);   //  Root name of current file
+void  Free_Block_Arg(Block_Looper *e_parse);   //  Free the iterator
+
 #endif // _DAZZ_DB
--- a/Makefile
+++ b/Makefile
@@ -2,10 +2,16 @@ DEST_DIR = ~/bin

 CFLAGS = -O3 -Wall -Wextra -Wno-unused-result -fno-strict-aliasing

-ALL = DASqv DAStrim DASpatch DASedit DASmap DASrealign REPqv REPtrim
+ALL = DAScover DASqv DAStrim DASpatch DASedit DASmap DASrealign REPcover REPqv REPtrim

 all: $(ALL)

+DAScover: DAScover.c align.c align.h DB.c DB.h QV.c QV.h
+	gcc $(CFLAGS) -o DAScover DAScover.c align.c DB.c QV.c -lm
+
+REPcover: REPcover.c DB.c DB.h QV.c QV.h
+	gcc $(CFLAGS) -o REPcover REPcover.c DB.c QV.c -lm
+
 DASqv: DASqv.c align.c align.h DB.c DB.h QV.c QV.h
 	gcc $(CFLAGS) -o DASqv DASqv.c align.c DB.c QV.c -lm


--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@
 For typeset documentation, examples of use, and design philosophy please go to
 my [blog](https://dazzlerblog.wordpress.com/command-guides/dascrubber-command-guide).

-This is still an incomplete release.
+This is still a preliminary release.
 The current set of commands provide a pipeline that one can use to scrub reads and if desired
 to scrub the alignment piles (with DASrealign).  Ultimately DASpatch/DASedit and DASrealign will
 be replaced with more powerful programs that correct reads and not only scrub alignment piles,
@@ -19,27 +19,69 @@ The goal of scrubbing is to produce a set of edited reads that are guaranteed to
 and not chimers), and (b) have no very low quality stretches (i\.e\. the error rate
 never exceeds some reasonable maximum, 20% or so in the case of Pacbio data).  The
 secondary goal of scrubbing is to do so with the minimum removal of data and splitting
-of reads.  The \"DAS\" suite consists of a pipeline of several programs that in sequence accomplish
-the task of scrubbing.
+of reads.
+
+Note carefully that the current scrubbing pipeline requires that one has employed
+repeat-masking in the daligner run as per the DAMASKER module described in this
+[post](https://dazzlerblog.wordpress.com/2016/04/01/detecting-and-soft-masking-repeats).
+
+The current \"DAS\" suite consists of a pipeline of several programs that in sequence accomplish
+the task of scrubbing: DAScover &rarr; DASqv &rarr; DAStrim &rarr; DASpatch &rarr; DASedit.
+For the commands, the \<source\> argument must always refer to the entire DB, and only
+the \<overlaps\> arguments can involve a block number.  If \<overlaps\> involves a block
+number, e\.g\. Ecoli.2.las, then the .las file is expected to contain all the overlaps
+where the A-read is in block 2 of the underlying database.  The HPC.daligner scripts in the
+DALIGNER module produce such .las files as their final result.  Parameters are propoagated
+down the pipeline to subsequent phases via the annotation  tracks so one need not specify
+the same parameter over and over again, i.e, the parameters/flags -H, -c, -g, and -b.
+
+All programs add suffixes (e.g. .db, .las) as needed.
+For the commands that take multiple .las block files as arguments, e.g. DAScover, DASqv, ...,
+one can place a @-sign in the name, which is then interpreted as the sequence of files
+obtained by replacing the @-sign by 1, 2, 3, ... in sequence until a number is reached for
+which no file matches.  One can also place a @-sign followed by an integer, say, i, in which
+case the sequence starts at i.  Lastly, one can also place @i-j where i and j are integers, in
+which case the sequence is from i to j, inclusive.

 ```
-1. DASqv [-v] [-H<int>] -c<int> <subject:db> <overlaps:las> ...
+1. DAScover [-v] [-H<int>] [-m<track>]+ <subject:db> <overlaps:las> ...
 ```

 This command takes as input a database \<source\> and a sequence of sorted local alignments
-blocks, \<overlaps\>, produced by an overlap/daligner run for said database.  It is recommended
-that one employ repeat-masking in the overlap run as per the new DAMASKER module described in
-this post.  Note carefully that \<source\> must always refer to the entire DB, only \<overlaps\>
-can involve a block number.  If \<overlaps\> involves a block number, e\.g\. Ecoli.2.las, then
-the .las file is expected to contain all the overlaps where the A-read is in block 2 of the
-underlying database.  The HPC.daligner scripts in the DALIGNER module produce such .las files
-as their final result.
+blocks, \<overlaps\>, produced by an overlap/daligner run for said database.
+Note carefully that \<source\> must always refer to the entire DB, only the \<overlaps\> can
+involve block numbers.
+
+Using the local
+alignment-pile for each A-read, DAScover produces a histogram of the depth of coverage of
+each trace point tile that is not within one of the intervals of the optionally specified tracks.
+It places this histogram in a .covr track for the bock and these block tracks are merged
+later with Catrack.  If the -v option is set, the histogram for each block is displayed and an
+estimate of the coverage of the underlying target genome is output.
+
+If the overlap file contains a block number
+then the track files also contain a block number, e\.g\. \"DAScovr DB OVL.2\" will result in
+the track files DB.2.covr.[anno,data].  Furthermore, if DAScovr is run on .las blocks, then
+once it has been run on all the blocks of the DB, the block tracks must be concatenated
+into a single track for the entire database with Catrack in preparation for the next phase
+of scrubbing by DAStrim.
+
+```
+2. DASqv [-v] [-c<int>] <subject:db> <overlaps:las> ...
+```
+
+This command takes as input a database \<source\> and a sequence of sorted local alignments
+blocks, \<overlaps\>, produced by an overlap/daligner run for said database.
+A .covr track obtained by running DASqv and Catrack must be present for the entire data base.
+Note carefully that \<source\> must always refer to the entire DB, only the \<overlaps\> can
+involve a block number.

 Using the local alignment-pile for each A-read, DASqv produces a QV value for each complete
 segment of TRACE_SPACING bases (e\.g\. 100bp, the -s parameter to daligner). The quality value
-of the average percentile of the best 25-50% alignment matches covering it depending on the
-coverage estimate -c.  One must supply the -c parameter to the expected coverage of the genome
-in question.  All quality values over 50 are clipped to 50.
+of ecah trace tile is the average of the best 25-50% of the estimated coverage alignment matches,
+where the estimated coverage is computed from the histogram of the .covr track.
+If one supplies the -c parameter, than this value explicitly overrides the estimated coverage
+produced by default.  All quality values over 50 are clipped to 50.

 The -v option prints out a histogram of the segment align matches, and the quality values
 produced.  This histogram is useful in assessing, for a given data set, what constitutes the
@@ -52,21 +94,22 @@ reads are used in the overlap piles for H-reads to help assess and scrub the H-r
 are themselves not scrubbed.

 The quality values are written to a .qual track, that can be viewed by calling DBdump with
-the -i option set (\"i\" for \"intrinsic QV\").  If the overlap file contains a block number
-then the track files also contain a block number, e\.g\. \"DASqv -c50 DB OVL.2\" will result in
-the track files DB.2.qual.[anno,data].  Furthermore, if DASqv is run on .las blocks, then
-once it has been run on all the blocks of the DB, the block tracks must be concatenated
-into a single track for the entire database with Catrack in preparation for the next phase
-of scrubbing by DAStrim.
+the -i option set (\"i\" for \"intrinsic QV\").
+Like DAScovr and all other
+scrubber modules, block tracks are produced in response to block .las files and these must be
+concatenated with Catrack into a single .qual track for the entire DB in preparation for the
+next phase of scrubbing by DAStrim.

 ```
-2. DAStrim [-v] -g<int> -b<int> <subject:db> <overlaps:las> ...
+3. DAStrim [-v] [-g<int>] [-b<int>] <subject:db> <overlaps:las> ...
 ```

-This command takes as input a database \<source\> and a sequence of sorted local alignments
-blocks, \<overlaps\>, produced by an overlap/daligner run for said database.  A .qual track
+A DB-wide .qual track produced by DASqv and Catrack are required as input to this command.
+This command further takes as input a database \<source\> and a sequence of sorted local alignments
+blocks, \<overlaps\>, produced by an overlap/daligner run for said database.
+ A .qual track
 obtained by running DASqv must be present for the entire data base.  Note carefully that
-\<source\> must always refer to the entire DB, only \<overlaps\> can involve a block number.
+\<source\> must always refer to the entire DB, only \<overlaps\> can involve block numbers.

 Using the local alignment-pile for each A-read and the QV\'s for all the reads in the pile,
 DAStrim (1) finds and breaks all chimeric reads, (2) finds all missed adaptamers and retains
@@ -76,11 +119,9 @@ decisions conservatively so that what remains is very highly likely to be free o
 adaptamers, and undetected low-quality sequence segments.  Some of these artifacts may still
 get through, but at very low odds, less than 1 in 10,000 in our experience.  The decision
 process is guided by the parameters -g and -b which indicate the thresholds for considering
-intrinsic QV values good, bad, or unknown.  In our experience, -g should be set to the value
-for which 80% or more of the QV\'s produced by DASqv are better than this value, and -b should
-be set to the value for which 5-7% or more of the QV\'s are worse than this value.  Consult
-the histogram produced by DASqv to set these parameter.  We further recommend doing this
-for each project on a case-by-case basis.
+intrinsic QV values good, bad, or unknown.  By default these parameters are automatically set
+to be the 80'th and 93'rd percentiles of the qv-histograms hidden in the .qual track.  They
+may however be explicitly set at the command line to over-rule this default choice.

 The -v option prints out a report of how many chimer and adaptamer breaks were detected, how
 much sequence was trimmed, how many low-quality segments were spanned by alignments, and how
@@ -89,20 +130,21 @@ low-quality region, and so on.

 The retained high-quality intervals for each read are written to a .trim track, in
 left-to-right order with an indicator of whether the gap between two such intervals is spanned
-by local alignments or by span-consistent pairs of local alignments.  Like DASqv and all other
+by local alignments or by span-consistent pairs of local alignments.
+Like DAScovr and all other
 scrubber modules, block tracks are produced in response to block .las files and these must be
 concatenated with Catrack into a single .trim track for the entire DB in preparation for the
 next phase of scrubbing by DASpatch.

 ```
-3. DASpatch [-v] <subject:db> <overlaps:las> ...
+4. DASpatch [-v] <subject:db> <overlaps:las> ...
 ```

 This command takes as input a database \<source\> and a sequence of sorted local alignments
 blocks, \<overlaps\>, produced by an overlap/daligner run for said database.  A .qual track
 and a .trim track obtained by running DASqv and DAStrim must be present for the entire data
 base.  Note carefully that \<source\> must always refer to the entire DB, only \<overlaps\> can
-involve a block number.
+involve block numbers.

 Using the local alignment-pile for each A-read, the QV\'s for all the reads in the pile, and
 the hiqh-quality segments annotated by DAStrim, DASpatch selects a high-quality B-read segment
@@ -113,16 +155,17 @@ span/patch can fail.  This is very rare but does occur and is the number of such
 reported by DASpatch when the -v option is set.

 The B-read segments for each patch (or a special \"failure patch\") are written to a .patch
-track, in left-to-right order.  Like DASqv and all other scrubber modules, block tracks are
+track, in left-to-right order.  Like DAScovr and all other scrubber modules, block tracks are
 produced in response to block .las files and these must be concatenated with Catrack into a
 single .patch track for the entire DB in preparation for the next phase of scrubbing by DASedit.

 ```
-4. DASedit [-v] [-x<int>] <source:db> <target:db>
+5. DASedit [-v] [-x<int>] <source:db> <target:db>
 ```

 This command takes as input a database \<source\> for which a .trim, and .patch tracks have
-produced by DAStrim and DASpatch in sequence (and perforce DASqv initially).  Using the
+produced by DAStrim and DASpatch in sequence (and perforce DAScover and DASqv before them).
+Using the
 information in the two tracks, DASedit produces a new database \<target\> whose reads are the
 patched, high-quality sub-reads.  That is, every low quality segment is patched with the relevant
 sequence of another read, and some reads give rise to two or more reads if deemed chimers, or
@@ -130,7 +173,7 @@ no reads if the entire read was deemed junk.  This command can take considerable
 access pattern to read sequences (for the patching) is not sequential or localized, implying
 poor cache performance.

-The new database does not have a .qvs or .arr component, that is it is a a sequence or
+The new database does not have a .qvs or .arr component, that is, it is a a sequence or
 S-database (see the original Dazzler DB post).  Very importantly, the new database has exactly
 the same block divisions as the original.  That is, all patched subreads in a block of the new
 database have been derived from reads of the same block in the original database, and only
@@ -140,7 +183,7 @@ high-quality (i\.e\. not patched).  The program DASmap below can be used to outp
 information in either an easy-to-read or an easy-to-parse format.

 ```
-5. DASmap [-p] <path:db> [ <reads:FILE> |<reads:range> ... ]
+6. DASmap [-p] <path:db> [ <reads:FILE> |<reads:range> ... ]
 ```

 This command takes as input a database of patched reads \<source\> produced by DASedit, and for
@@ -173,7 +216,7 @@ some n):
 ```

 ```
-6. DASrealign [-v] [-l<int:(800)>] <block1> <block2> <source:las> <target:las>
+7. DASrealign [-v] [-l<int:(800)>] <block1> <block2> <source:las> <target:las>
 ```

 This command takes as input two blocks a patched database \<source\> created by DASedit, and the
@@ -193,7 +236,18 @@ is roughly 40X faster than the original daligner run and the collection of overl
 output can be feed directly into a string graph construction process.

 ```
-7. REPqv <subject:db> ...
+8. REPcover <subject:db> ...
+```
+
+This command takes as input a sequence of databases or blocks \<source\> and for each outputs
+a histogram of the coverage of the unmasked portions of the reads in the source along with
+a recommendation of the -c value with which to run DASqv.  The .covr track produced by
+DAScover must be present for all sources referred to.  The command is a quick way get the
+-v output of DAScover at any time after producing the coverage histograms and to get the
+histogram and coverage estimate for the entire data base (as opposed to a block of the database.
+
+```
+9. REPqv <subject:db> ...
 ```

 This command takes as input a sequence of databases or blocks \<source\> and for each outputs
@@ -201,10 +255,10 @@ a histogram of the intrinsic quality values of the reads in the source along wit
 recommendation of the -g and -b values with which to run DAStrim.  The .qual track produced
 by DASqv must be present for all sources referred to.  The command is a quick way to get
 the -v output of DASqv at any time after producing the intrinsic quality values and to get
-the histogram for the entire data base (as opposed to a block of the database).
+the histograms for the entire data base (as opposed to a block of the database).

 ```
-8. REPtrim <subject:db> ...
+10. REPtrim <subject:db> ...
 ```

 This command takes as input a sequence of databases or blocks \<source\> and for each outputs

--- a/REPcover.c
+++ b/REPcover.c
+/*******************************************************************************************
+ *
+ *  Read the .covr track of each db or db block on the command line and output a histogram
+ *    of the coverage of the unmasked portions and an estimate of the coverage of the
+ *    underlying genome
+ *
+ *  Author:  Gene Myers
+ *  Date  :  January 2017
+ *
+ *******************************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <math.h>
+
+#include "DB.h"
+
+//  Command format and global parameter variables
+
+static char *Usage = "<source:db> ...";
+
+int main(int argc, char *argv[])
+{ int c;
+
+  //  Process arguments
+
+  Prog_Name = Strdup("REPcover","");
+
+  if (argc < 2)
+    { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
+      exit (1);
+    }
+
+  //  Open trimmed DB and the qual-track
+
+  for (c = 1; c < argc; c++)
+    { DAZZ_DB    _DB, *DB  = &_DB;
+      DAZZ_EXTRA  ex_hgap, ex_covr;
+
+      //  Load DB
+
+      { int status;
+
+        status = Open_DB(argv[c],DB);
+        if (status < 0)
+          exit (1);
+        if (status == 1)
+          { fprintf(stderr,"%s: Cannot be called on a .dam index: %s\n",Prog_Name,argv[1]);
+            exit (1);
+          }
+        Trim_DB(DB);
+      }
+
+      //  Get .covr track extras
+
+      { FILE *afile;
+        char *aname;
+        int   extra;
+
+        afile = NULL;
+        if (DB->part)
+          { aname = Strdup(Catenate(DB->path,Numbered_Suffix(".",DB->part,"."),"covr",".anno"),
+                           "Allocating anno file");
+            if (aname == NULL)
+              exit (1);
+            afile  = fopen(aname,"r");
+            if (afile == NULL)
+             { fprintf(stderr,"%s: Must have a 'covr.%d' track, run DAScover\n",Prog_Name,DB->part);
+               exit (1);
+             }
+          }
+        else
+          { aname = Strdup(Catenate(DB->path,".","covr",".anno"),"Allocating anno file");
+            if (aname == NULL)
+              exit (1);
+            afile  = fopen(aname,"r");
+            if (afile == NULL)
+              { fprintf(stderr,"%s: Must have a 'covr' track, run DAScover\n",Prog_Name);
+                exit (1);
+              }
+          }
+
+        fseeko(afile,0,SEEK_END);
+        extra = ftell(afile) - sizeof(int)*2;
+        fseeko(afile,-extra,SEEK_END);
+        ex_covr.nelem = 0;
+        if (Read_Extra(afile,aname,&ex_covr) != 0)
+          { fprintf(stderr,"%s: Histogram extra missing from .covr track?\n",Prog_Name);
+            exit (1);
+          }
+        ex_hgap.nelem = 0;
+        if (Read_Extra(afile,aname,&ex_hgap) != 0)
+          { fprintf(stderr,"%s: Hgap threshold extra missing from .covr track?\n",Prog_Name);
+            exit (1);
+          }
+        fclose(afile);
+      }
+
+      //  Generate display
+
+      { char       *root;
+        int         i, cmax, hgap_min, cover;
+        int64       nreads, totlen;
+        int64      *cgram;
+        int64       ssum, stotal;
+
+        root   = Root(argv[c],".db");
+        nreads = DB->nreads;
+        totlen = DB->totlen;
+
+        hgap_min = (int) ((int64 *) (ex_hgap.value))[0];
+        cgram    = (int64 *) (ex_covr.value);
+        cmax     = ex_covr.nelem - 1;
+
+        printf("\nDAScover");
+        if (hgap_min > 0)
+          printf(" -H%d",hgap_min);
+        printf(" %s\n\n",root);
+
+        if (hgap_min > 0) 
+          { for (i = 0; i < DB->nreads; i++)
+              if (DB->reads[i].rlen < hgap_min)
+                { nreads -= 1;
+                  totlen -= DB->reads[i].rlen;
+                }
+          }
+
+        // Display histogram
+
+        printf("\nInput:  ");
+        Print_Number(nreads,7,stdout);
+        printf("reads,  ");
+        Print_Number(totlen,12,stdout);
+        printf(" bases");
+        if (hgap_min > 0)
+          { printf(" (another ");
+            Print_Number(DB->nreads-nreads,0,stdout);
+            printf(" were < H-length)");
+          }
+        printf("\n");
+
+        stotal = 0;
+        for (i = 0; i <= cmax; i++)
+          stotal += cgram[i];
+
+        printf("\nCoverage Histogram\n\n");
+        ssum = cgram[cmax];
+        if (ssum > 0)
+          printf("    %4d:  %9lld  %5.1f%%\n\n",
+                 cmax,cgram[cmax],(100.*ssum)/stotal);
+        stotal -= ssum;
+        ssum    = 0;
+        for (i = cmax-1; i >= 0; i--)
+          if (cgram[i] > 0)
+            { ssum += cgram[i];
+              printf("    %4d:  %9lld  %5.1f%%\n",
+                     i,cgram[i],(100.*ssum)/stotal);
+            }
+
+        i = 0;
+        while (cgram[i+1] < cgram[i])
+          i += 1;
+        for (cover = i++; i < cmax; i++)
+          if (cgram[cover] < cgram[i])
+            cover = i;
+
+        printf("\n  Coverage is estimated at %d\n\n",cover);
+
+        free(root);
+        Close_DB(DB);
+      }
+    }
+
+  free(Prog_Name);
+  exit (0);
+}
--- a/REPqv.c
+++ b/REPqv.c
@@ -20,16 +20,6 @@

 static char *Usage = "<source:db> ...";

-//  Global Variables
-
-#define  MAXQV   50     //  Max QV score is 50
-#define  MAXQV1  51
-
-static DAZZ_DB _DB, *DB  = &_DB;   //  Data base
-
-static int64  *QV_IDX;     //  qual track index
-static uint8  *QV;         //  qual track values
-
 int main(int argc, char *argv[])
 { int c;

@@ -45,13 +35,13 @@ int main(int argc, char *argv[])
  //  Open trimmed DB and the qual-track

  for (c = 1; c < argc; c++)
+
+    { DAZZ_DB    _DB, *DB  = &_DB;
+      DAZZ_EXTRA ex_hgap, ex_cest, ex_qvs, ex_dif;
+
+      //  Load DB
+
      { int status;
-      char       *root;
-      int         i, bval, gval, cover, hgap_min;
-      DAZZ_TRACK *track;
-      int64       nreads, totlen;
-      int64       qgram[MAXQV1];
-      int64       qsum, qtotal;

        status = Open_DB(argv[c],DB);
        if (status < 0)
@@ -61,76 +51,103 @@ int main(int argc, char *argv[])
            exit (1);
          }
        Trim_DB(DB);
+      }

-      track = Load_Track(DB,"qual");
-      if (track != NULL)
-        { FILE *afile;
-          int   size, tracklen, extra;
-
-          QV_IDX = (int64 *) track->anno;
-          QV     = (uint8 *) track->data;
+      //  Get .qual track extras

-          //  if newer .qual tracks with -c meta data, grab it
+      { FILE *afile;
+        char *aname;
+        int   extra, tracklen, size;

+        afile = NULL;
        if (DB->part)
-            { afile = fopen(Catenate(DB->path,
-                                  Numbered_Suffix(".",DB->part,"."),"qual",".anno"),"r");
+          { aname = Strdup(Catenate(DB->path,Numbered_Suffix(".",DB->part,"."),"qual",".anno"),
+                           "Allocating anno file");
+            if (aname == NULL)
+              exit (1);
+            afile = fopen(aname,"r");
            if (afile == NULL)
-                afile = fopen(Catenate(DB->path,".","qual",".anno"),"r");
+              { fprintf(stderr,"%s: Must have a 'qual.%d' track, run DASqv\n",Prog_Name,DB->part);
+                exit (1);
+              }
          }
        else
-            afile = fopen(Catenate(DB->path,".","qual",".anno"),"r");
+          { aname = Strdup(Catenate(DB->path,".","qual",".anno"),"Allocating anno file");
+            if (aname == NULL)
+              exit (1);
+            afile  = fopen(aname,"r");
+            if (afile == NULL)
+              { fprintf(stderr,"%s: Must have a 'qual' track, run DASqv\n",Prog_Name);
+                exit (1);
+              }
+          }
+
        fread(&tracklen,sizeof(int),1,afile);
        fread(&size,sizeof(int),1,afile);
        fseeko(afile,0,SEEK_END);
        extra = ftell(afile) - (size*(tracklen+1) + 2*sizeof(int));
        fseeko(afile,-extra,SEEK_END);
-          if (extra == 2*sizeof(int))
-            { fread(&cover,sizeof(int),1,afile);
-              fread(&hgap_min,sizeof(int),1,afile);
-            }
-          else if (extra == sizeof(int))
-            { fread(&cover,sizeof(int),1,afile);
-              hgap_min = 0;
+        ex_hgap.nelem = 0;
+        if (Read_Extra(afile,aname,&ex_hgap) != 0)
+          { fprintf(stderr,"%s: Hgap threshold extra missing from .qual track?\n",Prog_Name);
+            exit (1);
          }
-          else
-            { cover = -1;
-              hgap_min = 0;
+        ex_cest.nelem = 0;
+        if (Read_Extra(afile,aname,&ex_cest) != 0)
+          { fprintf(stderr,"%s: Coverage estimate extra missing from .qual track?\n",Prog_Name);
+            exit (1);
          }
-          fclose(afile);
+        ex_qvs.nelem = 0;
+        if (Read_Extra(afile,aname,&ex_qvs) != 0)
+          { fprintf(stderr,"%s: QV histogram extra missing from .qual track?\n",Prog_Name);
+            exit (1);
          }
-      else
-        { fprintf(stderr,"%s: Must have a 'qual' track, run DASqv\n",Prog_Name);
+        ex_dif.nelem = 0;
+        if (Read_Extra(afile,aname,&ex_dif) != 0)
+          { fprintf(stderr,"%s: Differences histogram extra missing from .qual track?\n",Prog_Name);
            exit (1);
          }
+        fclose(afile);
+      }
+
+      //  Generate display
+
+      { char       *root;
+        int64       nreads, totlen;
+        int         hgap_min, cover;
+        int64      *qgram, *sgram;
+        int         maxqv;
+
+        //  Get relevant variables

        root   = Root(argv[c],".db");
        nreads = DB->nreads;
        totlen = DB->totlen;

-      for (i = 0; i <= MAXQV; i++)
-        qgram[i] = 0;
-
-      for (i = 0; i < QV_IDX[nreads]; i++)
-        qgram[QV[i]] += 1;
+        hgap_min = (int) ((int64 *) (ex_hgap.value))[0];
+        cover    = (int) ((int64 *) (ex_cest.value))[0];
+        qgram    = (int64 *) (ex_qvs.value);
+        maxqv    = ex_qvs.nelem - 1;
+        sgram    = (int64 *) (ex_dif.value);

        printf("\nDASqv");
-      if (cover >= 0)
-        printf(" -c%d",cover);
-      else
-        printf(" -c??");
        if (hgap_min > 0)
          printf(" -H%d",hgap_min);
-      printf(" %s\n\n",root);
+        printf(" -c%d %s\n\n",cover,root);

        if (hgap_min > 0) 
-        { for (i = 0; i < DB->nreads; i++)
+          { int i;
+
+            for (i = 0; i < DB->nreads; i++)
              if (DB->reads[i].rlen < hgap_min)
                { nreads -= 1;
                  totlen -= DB->reads[i].rlen;
                }
          }
-      printf("  Input:  ");
+
+        //  Display histograms
+
+        printf("\n  Input:  ");
        Print_Number(nreads,7,stdout);
        printf("reads,  ");
        Print_Number(totlen,12,stdout);
@@ -142,8 +159,17 @@ int main(int argc, char *argv[])
          }
        printf("\n");

-      if (cover >= 0)
-        { int qv_deep;
+        { int64 ssum, qsum;
+          int64 stotal, qtotal;
+          int   qv_deep;
+          int   gval, bval;
+          int   i;
+
+          stotal = qtotal = 0;
+          for (i = 0; i <= maxqv; i++)
+            { stotal += sgram[i];
+              qtotal += qgram[i];
+            }

          if (cover >= 40)
            qv_deep = cover/8;
@@ -152,36 +178,45 @@ int main(int argc, char *argv[])
          else
            qv_deep = cover/4;

-          printf("\n  Histogram of q-values (average %d best)\n",2*qv_deep);
-        }
-      else
-        printf("\n  Histogram of q-values\n");
+          printf("\n  Histogram of q-values (average %d best)\n",qv_deep);
+          printf("\n                 Input                 QV\n");
+          qsum = qgram[maxqv];
+          ssum = sgram[maxqv];
+          printf("\n    %2d:  %9lld  %5.1f%%    %9lld  %5.1f%%\n\n",
+                 maxqv,sgram[maxqv],(100.*ssum)/stotal,qgram[maxqv],(100.*qsum)/qtotal);

-      qtotal = 0;
-      for (i = 0; i <= MAXQV; i++)
-        qtotal += qgram[i];
+          qtotal -= qsum;
+          stotal -= ssum;
+          ssum = qsum = 0;
+          for (i = maxqv-1; i >= 0; i--)
+            if (qgram[i] > 0)
+              { ssum += sgram[i];
+                qsum += qgram[i];
+                printf("    %2d:  %9lld  %5.1f%%    %9lld  %5.1f%%\n",
+                       i,sgram[i],(100.*ssum)/stotal,
+                         qgram[i],(100.*qsum)/qtotal);
+              }

-      qsum = qgram[MAXQV];
-      printf("\n      %2d:  %9lld  %5.1f%%\n\n",MAXQV,qgram[MAXQV],(100.*qsum)/qtotal);
+          //  Estimate -g and -b parameters

          bval = gval = -1;
-      qtotal -= qsum;
          qsum = 0;
-      for (i = MAXQV-1; i >= 0; i--)
+          for (i = maxqv-1; i >= 0; i--)
            if (qgram[i] > 0)
              { qsum += qgram[i];
-            printf("      %2d:  %9lld  %5.1f%%\n",i,qgram[i],(100.*qsum)/qtotal);
                if ((100.*qsum)/qtotal > 7. && bval < 0)
-              bval = i;
+                  bval = i+1;
                if ((100.*qsum)/qtotal > 20. && gval < 0)
-              gval = i;
+                  gval = i+1;
              }
  
          printf("\n  Recommend \'DAStrim -g%d -b%d'\n\n",gval,bval);
+        }

        free(root);
        Close_DB(DB);
      }
+    }

  free(Prog_Name);
  exit (0);

--- a/REPtrim.c
+++ b/REPtrim.c
@@ -21,22 +21,6 @@

 static char *Usage = "<source:db> ...";

-//  Gap states
-
-#define LOWQ  0   //  Gap is spanned by many LAs and patchable
-#define SPAN  1   //  Gap has many paired LAs and patchable
-#define SPLIT 2   //  Gap is a chimer or an unpatchable gap
-#define ADPRE 3   //  Gap is due to adaptemer, trim prefix interval to left
-#define ADSUF 4   //  Gap is due to adaptemer, trim suffix interval to right
-
-
-//  Global Variables (must exist across the processing of each pile)
-
-static DAZZ_DB _DB, *DB  = &_DB;   //  Data base
-
-static int64  *TRIM_IDX;   //  trim track index
-static int    *TRIM;       //  trim track values
-
 int main(int argc, char *argv[])
 { int c;

@@ -52,22 +36,13 @@ int main(int argc, char *argv[])
  //  Open trimmed DB and .qual and .trim tracks

  for (c = 1; c < argc; c++)
+
+    { DAZZ_DB    _DB, *DB  = &_DB;
+      DAZZ_EXTRA ex_hgap, ex_cest, ex_good, ex_bad, ex_trim;
+
+      //  Load DB
+
      { int status;
-      char       *root;
-      int         i, a, tb, te;
-      int         alen;
-      DAZZ_TRACK *track;
-      int64       nreads, totlen;
-      int64       nelim, nelimbp;
-      int64       n5trm, n5trmbp;
-      int64       n3trm, n3trmbp;
-      int64       natrm, natrmbp;
-      int64       ngaps, ngapsbp;
-      int64       nlowq, nlowqbp;
-      int64       nspan, nspanbp;
-      int64       nchim, nchimbp;
-      int         rlog, blog;
-      int         BAD_QV, GOOD_QV, HGAP_MIN;

        status = Open_DB(argv[c],DB);
        if (status < 0)
@@ -77,136 +52,135 @@ int main(int argc, char *argv[])
            exit (1);
          }
        Trim_DB(DB);
+      }

-      track = Load_Track(DB,"trim");
-      if (track != NULL)
-        { FILE *afile;
-          int   size, tracklen, extra;
+      //  Get .trim track extras

-          TRIM_IDX = (int64 *) track->anno;
-          TRIM     = (int *) track->data;
-          for (i = 0; i <= DB->nreads; i++)
-            TRIM_IDX[i] /= sizeof(int);
+      { FILE *afile;
+        char *aname;
+        int   extra, tracklen, size;

+        afile = NULL;
        if (DB->part)
-            { afile = fopen(Catenate(DB->path,
-                                  Numbered_Suffix(".",DB->part,"."),"trim",".anno"),"r");
+          { aname = Strdup(Catenate(DB->path,Numbered_Suffix(".",DB->part,"."),"trim",".anno"),
+                           "Allocating anno file");
+            if (aname == NULL)
+              exit (1);
+            afile  = fopen(aname,"r");
            if (afile == NULL)
-                afile = fopen(Catenate(DB->path,".","trim",".anno"),"r");
+              { fprintf(stderr,"%s: Must have a 'trim.%d' track, run DAStrim\n",Prog_Name,DB->part);
+                exit (1);
+              }
          }
        else
-            afile = fopen(Catenate(DB->path,".","trim",".anno"),"r");
+          { aname = Strdup(Catenate(DB->path,".","trim",".anno"),"Allocating anno file");
+            if (aname == NULL)
+              exit (1);
+            afile = fopen(aname,"r");
+            if (afile == NULL)
+              { fprintf(stderr,"%s: Must have a 'trim' track, run DAStrim\n",Prog_Name);
+                exit (1);
+              }
+          }
+
        fread(&tracklen,sizeof(int),1,afile);
        fread(&size,sizeof(int),1,afile);
        fseeko(afile,0,SEEK_END);
        extra = ftell(afile) - (size*(tracklen+1) + 2*sizeof(int));
        fseeko(afile,-extra,SEEK_END);
-          if (extra == 4*sizeof(int))
-            { fread(&GOOD_QV,sizeof(int),1,afile);
-              fread(&BAD_QV,sizeof(int),1,afile);
-              fread(&HGAP_MIN,sizeof(int),1,afile);
-              fread(&HGAP_MIN,sizeof(int),1,afile);
+        ex_hgap.nelem = 0;
+        if (Read_Extra(afile,aname,&ex_hgap) != 0)
+          { fprintf(stderr,"%s: Hgap threshold extra missing from .trim track?\n",Prog_Name);
+            exit (1);
          }
-          else if (extra == 3*sizeof(int) || extra == 2*sizeof(int))
-            { fread(&GOOD_QV,sizeof(int),1,afile);
-              fread(&BAD_QV,sizeof(int),1,afile);
-              HGAP_MIN = 0;
+        ex_cest.nelem = 0;
+        if (Read_Extra(afile,aname,&ex_cest) != 0)
+          { fprintf(stderr,"%s: Coverage estimate extra missing from .trim track?\n",Prog_Name);
+            exit (1);
          }
-          else
-            { GOOD_QV  = -1;
-              BAD_QV   = -1;
-              HGAP_MIN = 0;
+        ex_good.nelem = 0;
+        if (Read_Extra(afile,aname,&ex_good) != 0)
+          { fprintf(stderr,"%s: Good QV threshold extra missing from .trim track?\n",Prog_Name);
+            exit (1);
          }
-          fclose(afile);
+        ex_bad.nelem = 0;
+        if (Read_Extra(afile,aname,&ex_bad) != 0)
+          { fprintf(stderr,"%s: Bad QV threshdold extra missing from .trim track?\n",Prog_Name);
+            exit (1);
          }
-      else
-        { fprintf(stderr,"%s: Must have a 'trim' track, run DAStrim\n",Prog_Name);
+        ex_trim.nelem = 0;
+        if (Read_Extra(afile,aname,&ex_trim) != 0)
+          { fprintf(stderr,"%s: Trimming statistics extra missing from .trim track?\n",Prog_Name);
            exit (1);
          }
+        fclose(afile);
+      }
+
+      //  Generate Display
+
+      { char       *root;
+        int64       nreads, totlen;
+        int64       nelim, nelimbp;
+        int64       n5trm, n5trmbp;
+        int64       n3trm, n3trmbp;
+        int64       natrm, natrmbp;
+        int64       ngaps, ngapsbp;
+        int64       nlowq, nlowqbp;
+        int64       nspan, nspanbp;
+        int64       nchim, nchimbp;
+        int         rlog, blog;
+        int         cover, hgap_min;
+        int         bad_qv, good_qv;
+        int64      *tstats;
+
+        //  Get relevant variables

        root   = Root(argv[c],".db");
        nreads = DB->nreads;
        totlen = DB->totlen;

-      nelim   = 0;
-      n5trm   = 0;
-      n3trm   = 0;
-      natrm   = 0;
-      nelimbp = 0;
-      n5trmbp = 0;
-      n3trmbp = 0;
-      natrmbp = 0;
-
-      ngaps   = 0;
-      nlowq   = 0;
-      nspan   = 0;
-      nchim   = 0;
-      ngapsbp = 0;
-      nlowqbp = 0;
-      nspanbp = 0;
-      nchimbp = 0;
-
-      for (a = 0; a < DB->nreads; a++)
-        { tb = TRIM_IDX[a];
-          te = TRIM_IDX[a+1];
-          alen  = DB->reads[a].rlen;
-          if (alen < HGAP_MIN)
+        hgap_min = (int) ((int64 *) (ex_hgap.value))[0];
+        cover    = (int) ((int64 *) (ex_cest.value))[0];
+        good_qv  = (int) ((int64 *) (ex_good.value))[0];
+        bad_qv   = (int) ((int64 *) (ex_bad.value))[0];
+        tstats   = (int64 *) (ex_trim.value);
+
+        nelim   = tstats[0];
+        n5trm   = tstats[1];
+        n3trm   = tstats[2];
+        natrm   = tstats[3];
+        nelimbp = tstats[4];
+        n5trmbp = tstats[5];
+        n3trmbp = tstats[6];
+        natrmbp = tstats[7];
+
+        ngaps   = tstats[8];
+        nlowq   = tstats[9];
+        nspan   = tstats[10];
+        nchim   = tstats[11];
+        ngapsbp = tstats[12];
+        nlowqbp = tstats[13];
+        nspanbp = tstats[14];
+        nchimbp = tstats[15];
+
+        printf("\nDAStrim");
+        if (hgap_min > 0)
+          printf(" [-H%d]",hgap_min);
+        printf(" -c%d -g%d -b%d %s\n\n",cover,good_qv,bad_qv,root);
+
+        //  Compensate for HGAP
+
+        if (hgap_min > 0)
+          { int i;
+
+            for (i = 0; i < DB->nreads; i++)
+              if (DB->reads[i].rlen < hgap_min)
                { nreads -= 1;
-              totlen -= alen;
-            }
-          else if (tb >= te)
-            { nelim += 1;
-              nelimbp += alen;
-            }
-          else
-            { if (TRIM[tb] > 0)
-                { n5trm += 1;
-                  n5trmbp += TRIM[tb];
-                } 
-              if (TRIM[te-1] < alen)
-                { n3trm += 1;
-                  n3trmbp += alen - TRIM[te-1];
-                } 
-              while (tb + 3 < te)
-                { ngaps += 1;
-                  ngapsbp += TRIM[tb+3] - TRIM[tb+1];
-                  if (TRIM[tb+2] == LOWQ)
-                    { nlowq += 1;
-                      nlowqbp += TRIM[tb+3] - TRIM[tb+1];
-                    }
-                  else if (TRIM[tb+2] == SPAN)
-                    { nspan += 1;
-                      nspanbp += TRIM[tb+3] - TRIM[tb+1];
-                    }
-                  else if (TRIM[tb+2] == ADPRE)
-                    { natrm += 1;
-                      natrmbp += TRIM[tb+3] - TRIM[tb];
-                    }
-                  else if (TRIM[tb+2] == ADSUF)
-                    { natrm += 1;
-                      natrmbp += TRIM[tb+4] - TRIM[tb+1];
-                    }
-                  else
-                    { nchim += 1;
-                      nchimbp += TRIM[tb+3] - TRIM[tb+1];
-                    }
-                  tb += 3;
-                }
+                  totlen -= DB->reads[i].rlen;
                }
          }

-      printf("\nStatistics for DAStrim");
-      if (GOOD_QV >= 0)
-        printf(" -g%d",GOOD_QV);
-      else
-        printf(" -g??");
-      if (BAD_QV >= 0)
-        printf(" -b%d",BAD_QV);
-      else
-        printf(" -b??");
-      if (HGAP_MIN > 0)
-        printf(" [-H%d]",HGAP_MIN);
-      printf(" %s\n\n",root);
+        //  Compute maximum field widths of statistics

        { int64 mult;

@@ -233,12 +207,14 @@ int main(int argc, char *argv[])
            blog += (blog-1)/3;
        }

+        //  Display the statistices
+
        printf("  Input:    ");
        Print_Number((int64) nreads,rlog,stdout);
        printf(" (100.0%%) reads     ");
        Print_Number(totlen,blog,stdout);
        printf(" (100.0%%) bases");
-      if (HGAP_MIN > 0)
+        if (hgap_min > 0)
          { printf(" (another ");
            Print_Number((int64) (DB->nreads-nreads),0,stdout);
            printf(" were < H-length)");
@@ -263,13 +239,11 @@ int main(int argc, char *argv[])
        Print_Number(n3trmbp,blog,stdout);
        printf(" (%5.1f%%) bases\n",(100.*n3trmbp)/totlen);

-      if (natrm > 0)
-        { printf("  Adapter:  ");
+        printf("  Adapter:  ");
        Print_Number(natrm,rlog,stdout);
        printf(" (%5.1f%%) reads     ",(100.*natrm)/nreads);
        Print_Number(natrmbp,blog,stdout);
        printf(" (%5.1f%%) bases\n",(100.*natrmbp)/totlen);
-        }

        printf("\n");

@@ -314,6 +288,7 @@ int main(int argc, char *argv[])
        free(root);
        Close_DB(DB);
      }
+    }

  free(Prog_Name);
  exit (0);

--- a/align.c
+++ b/align.c
@@ -34,7 +34,8 @@
 #undef  DEBUG_EXTEND       //  Show waves of Extend_Until_Overlap

 #undef  DEBUG_ALIGN        //  Show division points of Compute_Trace
-#undef  DEBUG_SCRIPT       //  Show trace additions for Compute_Trace
+#undef  DEBUG_TRACE        //  Show trace additions for Compute_Trace
+#undef  DEBUG_SCRIPT       //  Show script additions for Compute_Trace
 #undef  DEBUG_AWAVE        //  Show F/R waves of Compute_Trace

 #undef  SHOW_TRACE         //  Show full trace for Print_Alignment
@@ -3060,13 +3061,24 @@ int Write_Overlap(FILE *output, Overlap *ovl, int tbytes)
  return (0);
 }

-void Compress_TraceTo8(Overlap *ovl)
+int Compress_TraceTo8(Overlap *ovl, int check)
 { uint16 *t16 = (uint16 *) ovl->path.trace;
  uint8  *t8  = (uint8  *) ovl->path.trace;
-  int     j;
+  int     j, x;

+  if (check)
+    for (j = 0; j < ovl->path.tlen; j++)
+      { x = t16[j];
+        if (x > 255)
+          { fprintf(stderr,"%s: Compression of trace to bytes fails, value too big\n",Prog_Name);
+            EXIT(1);
+          }
+        t8[j] = (uint8) x;
+      }
+  else
    for (j = 0; j < ovl->path.tlen; j++)
      t8[j] = (uint8) (t16[j]);
+  return (0);
 }

 void Decompress_TraceTo16(Overlap *ovl)
@@ -3909,45 +3921,20 @@ static int depth = 0;

 typedef struct
  { int    *Stop;        //  Ongoing stack of alignment indels
+    uint16 *Trace;       //  Base of Trace Vector
    char *Aabs, *Babs;   //  Absolute base of A and B sequences

    int  **PVF, **PHF;   //  List of waves for iterative np algorithms
    int   mida,  midb;   //  mid point division for mid-point algorithms

    int   *VF,   *VB;    //  Forward/Reverse waves for nd algorithms
-                         //  (defunct: were used for O(nd) algorithms)
  } Trace_Waves;

-static int dandc_nd(char *A, int M, char *B, int N, Trace_Waves *wave)
+static int split_nd(char *A, int M, char *B, int N, Trace_Waves *wave, int *px, int *py)
 { int x, y;
  int D;

-#ifdef DEBUG_ALIGN
-  printf("%*s %ld,%ld: %d vs %d\n",depth,"",A-wave->Aabs,B-wave->Babs,M,N);
-#endif
-
-  if (M <= 0)
-    { x = (wave->Aabs-A)-1;
-      for (y = 1; y <= N; y++)
-        { *wave->Stop++ = x;
-#ifdef DEBUG_SCRIPT
-          printf("%*s *I %ld(%ld)\n",depth,"",y+(B-wave->Babs),(A-wave->Aabs)+1);
-#endif
-        }
-      return (N);
-    }
-  if (N <= 0)
-    { y = (B-wave->Babs)+1;
-      for (x = 1; x <= M; x++)
-        { *wave->Stop++ = y;
-#ifdef DEBUG_SCRIPT
-          printf("%*s *D %ld(%ld)\n",depth,"",x+(A-wave->Aabs),(B-wave->Babs)+1);
-#endif
-        }
-      return (M);
-    }
-
-  { int  *VF = wave->VF;
+  int  *VF = wave->VF;
  int  *VB = wave->VB;
  int   flow;  //  fhgh == D !
  int   blow, bhgh;
@@ -3961,8 +3948,10 @@ static int dandc_nd(char *A, int M, char *B, int N, Trace_Waves *wave)
    { while (y < M && B[y] == A[y])
        y += 1;
      if (y >= M && N == M)
+        { *px = *py = M;
          return (0);
        }
+    }

  flow   = 0;
  VF[0]  = y;
@@ -4021,7 +4010,9 @@ static int dandc_nd(char *A, int M, char *B, int N, Trace_Waves *wave)
                  else
                    y = r+1;
                  x = k+y;
-                    goto OVERLAP2;
+                  *px = x;
+                  *py = y;
+                  return (D);
                }
            }

@@ -4076,7 +4067,9 @@ static int dandc_nd(char *A, int M, char *B, int N, Trace_Waves *wave)
                  else
                    y = r;
                  x = k+y;
-                    goto OVERLAP2;
+                  *px = x;
+                  *py = y;
+                  return (D);
                }
            }

@@ -4099,36 +4092,215 @@ static int dandc_nd(char *A, int M, char *B, int N, Trace_Waves *wave)
    }
 }

-OVERLAP2:
+static int trace_nd(char *A, int M, char *B, int N, Trace_Waves *wave, int tspace)
+{ int x, y;
+  int D, s;

+#ifdef DEBUG_ALIGN
+  printf("%*s %ld,%ld: %d vs %d\n",depth,"",A-wave->Aabs,B-wave->Babs,M,N);
+  fflush(stdout);
+#endif
+
+  if (M <= 0)
+    { y = (((A-wave->Aabs)/tspace) << 1);
+      wave->Trace[y] += N;
+      wave->Trace[y+1] += N;
+#ifdef DEBUG_TRACE
+      printf("%*s Adding1 (%d,%d) to tp %d(%d,%d)\n",depth,"",N,N,y>>1,
+                                                     wave->Trace[y+1],wave->Trace[y]);
+      fflush(stdout);
+#endif
+      return (N);
+    }
+
+  if (N <= 0)
+    { x = A - wave->Aabs;
+      y = x/tspace;
+      x = (y+1)*tspace - x;
+      y <<= 1;
+      for (s = M; s > 0; s -= x, x = tspace)
+        { if (x > s)
+            x = s;
+          wave->Trace[y] += x;
+#ifdef DEBUG_TRACE
+          printf("%*s Adding2 (0,%d) to tp %d(%d,%d)\n",depth,"",x,y>>1,
+                                                        wave->Trace[y+1],wave->Trace[y]);
+          fflush(stdout);
+#endif
+          y += 2;
+        }
+      return (M);
+    }
+
+  D = split_nd(A,M,B,N,wave,&x,&y);
+
+  if (D > 1)
+    {
 #ifdef DEBUG_ALIGN
      printf("%*s (%d,%d) @ %d\n",depth,"",x,y,D);
      fflush(stdout);
+      depth += 2;
+#endif
+
+      s = A-wave->Aabs;
+      if ((s/tspace+1)*tspace - s >= x)
+        { s = ((s/tspace)<<1);
+          wave->Trace[s] += (D+1)/2;
+          wave->Trace[s+1] += y;
+#ifdef DEBUG_TRACE
+          printf("%*s Adding3 (%d,%d) to tp %d(%d,%d)\n",depth,"",y,(D+1)/2,s>>1,
+                                                         wave->Trace[s+1],wave->Trace[s]);
+          fflush(stdout);
+#endif
+        }
+      else
+        trace_nd(A,x,B,y,wave,tspace);
+
+      s = (A+x)-wave->Aabs;
+      if ((s/tspace+1)*tspace - s >= M-x)
+        { s = ((s/tspace)<<1);
+          wave->Trace[s] += D/2;
+          wave->Trace[s+1] += N-y;
+#ifdef DEBUG_TRACE
+          printf("%*s Adding4 (%d,%d)) to tp %d(%d,%d)\n",depth,"",N-y,D/2,s>>1,
+                                                          wave->Trace[s+1],wave->Trace[s]);
+          fflush(stdout);
+#endif
+        }
+      else
+        trace_nd(A+x,M-x,B+y,N-y,wave,tspace);
+
+#ifdef DEBUG_ALIGN
+      depth -= 2;
+#endif
+    }
+
+  else
+    { int u, v;
+
+      if (D == 0 || M < N)
+        s = x;
+      else
+        s = x-1;
+      if (s > 0)
+        { u = A - wave->Aabs;
+          v = u/tspace;
+          u = (v+1)*tspace - u;
+          for (v <<= 1; s > 0; s -= u, u = tspace)
+            { if (u > s)
+                u = s;
+              wave->Trace[v+1] += u;
+#ifdef DEBUG_TRACE
+              printf("%*s Adding5 (%d,0)) to tp %d(%d,%d)\n",depth,"",u,v>>1,
+                                                             wave->Trace[v+1],wave->Trace[v]);
+              fflush(stdout);
 #endif
+              v += 2;
+            }
+        }
+
+      if (D == 0)
+        return (D);
+
+      if (M < N)
+        y = ((((A+x)-wave->Aabs)/tspace)<<1);
+      else
+        y = ((((A+(x-1))-wave->Aabs)/tspace)<<1);
+      wave->Trace[y] += 1;
+      if (M <= N)
+        wave->Trace[y+1] += 1;
+#ifdef DEBUG_TRACE
+      printf("%*s Adding5 (%d,1)) to tp %d(%d,%d)\n",depth,"",N>=M,y>>1,
+                                                     wave->Trace[y+1],wave->Trace[y]);
+      fflush(stdout);
+#endif
+
+      s = M-x;
+      if (s > 0)
+        { u = (A+x) - wave->Aabs;
+          v = u/tspace;
+          u = (v+1)*tspace - u;
+          for (v <<= 1; s > 0; s -= u, u = tspace)
+            { if (u > s)
+                u = s;
+              wave->Trace[v+1] += u;
+#ifdef DEBUG_TRACE
+              printf("%*s Adding5 (%d,0)) to tp %d(%d,%d)\n",depth,"",u,v>>1,
+                                                             wave->Trace[v+1],wave->Trace[v]);
+              fflush(stdout);
+#endif
+              v += 2;
+            }
+        }
+    }
+
+  return (D);
+}
+
+static int dandc_nd(char *A, int M, char *B, int N, Trace_Waves *wave)
+{ int x, y;
+  int D;
+
+#ifdef DEBUG_ALIGN
+  printf("%*s %ld,%ld: %d vs %d\n",depth,"",A-wave->Aabs,B-wave->Babs,M,N);
+#endif
+
+  if (M <= 0)
+    { x = (wave->Aabs-A)-1;
+      for (y = 1; y <= N; y++)
+        { *wave->Stop++ = x;
+#ifdef DEBUG_SCRIPT
+          printf("%*s *I %ld(%ld)\n",depth,"",y+(B-wave->Babs),(A-wave->Aabs)+1);
+#endif
+        }
+      return (N);
+    }
+
+  if (N <= 0)
+    { y = (B-wave->Babs)+1;
+      for (x = 1; x <= M; x++)
+        { *wave->Stop++ = y;
+#ifdef DEBUG_SCRIPT
+          printf("%*s *D %ld(%ld)\n",depth,"",x+(A-wave->Aabs),(B-wave->Babs)+1);
+#endif
+        }
+      return (M);
+    }
+
+  D = split_nd(A,M,B,N,wave,&x,&y);
+
  if (D > 1)
    { 
 #ifdef DEBUG_ALIGN
+      printf("%*s (%d,%d) @ %d\n",depth,"",x,y,D);
+      fflush(stdout);
      depth += 2;
 #endif
+
      dandc_nd(A,x,B,y,wave);
      dandc_nd(A+x,M-x,B+y,N-y,wave);
+
 #ifdef DEBUG_ALIGN
      depth -= 2;
 #endif
    }
+
  else if (D == 1)
+
    { if (M > N)
        { *wave->Stop++ = (B-wave->Babs)+y+1;
 #ifdef DEBUG_SCRIPT
          printf("%*s  D %ld(%ld)\n",depth,"",(A-wave->Aabs)+x,(B-wave->Babs)+y+1);
 #endif
        }
+
      else if (M < N)
        { *wave->Stop++ = (wave->Aabs-A)-x-1;
 #ifdef DEBUG_SCRIPT
          printf("%*s  I %ld(%ld)\n",depth,"",(B-wave->Babs)+y,(A-wave->Aabs)+x+1);
 #endif
        }
+
 #ifdef DEBUG_SCRIPT
      else
        printf("%*s  %ld S %ld\n",depth,"",(wave->Aabs-A)+x,(B-wave->Babs)+y);
@@ -4138,47 +4310,138 @@ OVERLAP2:
  return (D);
 }

-
-static int Compute_Trace_ND_ALL(Alignment *align, Work_Data *ework)
+int Compute_Alignment(Alignment *align, Work_Data *ework, int task, int tspace)
 { _Work_Data *work = (_Work_Data *) ework;
  Trace_Waves wave;

  int     L, D;
  int     asub, bsub;
+  char   *aseq, *bseq;
  Path   *path;
  int    *trace;
+  uint16 *strace;

  path = align->path;
  asub = path->aepos-path->abpos;
  bsub = path->bepos-path->bbpos;
+  aseq = align->aseq+path->abpos;
+  bseq = align->bseq+path->bbpos;

-  if (asub < bsub)
-    L = bsub;
+  if (task != DIFF_ONLY)
+    { if (task == DIFF_TRACE || task == PLUS_TRACE)
+        L = 2*(((path->aepos + (tspace-1))/tspace - path->abpos/tspace) + 1)*sizeof(uint16);
+      else if (asub < bsub)
+        L = bsub*sizeof(int);
      else
-    L = asub;
-  L *= sizeof(int);
+        L = asub*sizeof(int);
      if (L > work->tramax)
        if (enlarge_trace(work,L))
          EXIT(1);
+    }

-  trace = wave.Stop = ((int *) work->trace);
+  trace  = ((int *) work->trace);
+  strace = ((uint16 *) work->trace);

-  D = 2*(path->diffs + 4)*sizeof(int);
+  if (asub > bsub)
+    D = (4*asub+6)*sizeof(int);
+  else
+    D = (4*bsub+6)*sizeof(int);
  if (D > work->vecmax)
    if (enlarge_vector(work,D))
      EXIT(1);
  
-  D = (path->diffs+3)/2;
-  wave.VF = ((int *) work->vector) + (D+1);
-  wave.VB = wave.VF + (2*D+1);
+  if (asub > bsub)
+    { wave.VF = ((int *) work->vector) + (asub+1);
+      wave.VB = wave.VF + (2*asub+3);
+    }
+  else
+    { wave.VF = ((int *) work->vector) + (bsub+1);
+      wave.VB = wave.VF + (2*bsub+3);
+    }

  wave.Aabs = align->aseq;
  wave.Babs = align->bseq;

-  path->diffs = dandc_nd(align->aseq+path->abpos,path->aepos-path->abpos,
-                         align->bseq+path->bbpos,path->bepos-path->bbpos,&wave);
-  path->trace = trace;
+  if (task == DIFF_ONLY)
+    { wave.mida = -1;
+      if (asub <= 0)
+        path->diffs = bsub;
+      else if (bsub <= 0)
+        path->diffs = asub;
+      else
+        path->diffs = split_nd(aseq,asub,bseq,bsub,&wave,&wave.mida,&wave.midb);
+      path->trace = NULL;
+      path->tlen  = -1;
+      return (0);
+    }
+
+  else if (task < DIFF_ONLY && wave.mida >= 0)
+    { int x = wave.mida;
+      int y = wave.midb;
+
+      if (task == PLUS_ALIGN)
+        { wave.Stop = trace;
+          dandc_nd(aseq,x,bseq,y,&wave);
+          dandc_nd(aseq+x,asub-x,bseq+y,bsub-y,&wave);
          path->tlen = wave.Stop - trace;
+        }
+      else
+        { int i, n;
+
+          wave.Trace = strace - 2*(path->abpos/tspace);
+          n  = L/sizeof(uint16);
+          for (i = 0; i < n; i++)
+            strace[i] = 0;
+
+          trace_nd(aseq,x,bseq,y,&wave,tspace);
+          trace_nd(aseq+x,asub-x,bseq+y,bsub-y,&wave,tspace);
+
+          if (strace[n-1] != 0)   //  Last element is to capture all inserts on TP boundary
+            { strace[n-3] += strace[n-1];
+              strace[n-4] += strace[n-2];
+            }
+          path->tlen = n-2;
+
+#ifdef DEBUG_SCRIPT
+          printf("  Trace:\n");
+          for (i = 0; i < path->tlen; i += 2)
+            printf("    %3d  %3d\n",strace[i],strace[i+1]);
+          fflush(stdout);
+#endif
+        }
+    }
+
+  else 
+    { if (task == DIFF_ALIGN)
+        { wave.Stop   = trace;
+          path->diffs = dandc_nd(aseq,asub,bseq,bsub,&wave);
+          path->tlen  = wave.Stop - trace;
+        }
+      else
+        { int i, n;
+
+          wave.Trace  = strace - 2*(path->abpos/tspace);
+          n  = L/sizeof(uint16);
+          for (i = 0; i < n; i++)
+            strace[i] = 0;
+          path->diffs = trace_nd(aseq,asub,bseq,bsub,&wave,tspace);
+
+          if (strace[n-1] != 0)   //  Last element is to capture all inserts on TP boundary
+            { strace[n-3] += strace[n-1];
+              strace[n-4] += strace[n-2];
+            }
+          path->tlen = n-2;
+
+#ifdef DEBUG_SCRIPT
+          printf("  Trace:\n");
+          for (i = 0; i < path->tlen; i += 2)
+            printf("    %3d  %3d\n",strace[i],strace[i+1]);
+          fflush(stdout);
+#endif
+        }
+    }
+
+  path->trace = trace;
  return (0);
 }

@@ -4825,79 +5088,6 @@ static int middle_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode

 static char *TP_Error = "Trace point out of bounds (Compute_Trace), source DB likely incorrect";

-int Compute_Trace_ALL(Alignment *align, Work_Data *ework)
-{ _Work_Data *work = (_Work_Data *) ework;
-  Trace_Waves wave;
-
-  Path *path;
-  char *aseq, *bseq;
-  int   alen, blen;
-  int   M, N, D;
-  int   dmax;
-
-  alen   = align->alen;
-  blen   = align->blen;
-  path = align->path;
-  aseq = align->aseq;
-  bseq = align->bseq;
-
-  M = path->aepos-path->abpos;
-  N = path->bepos-path->bbpos;
-  
-  { int64 s;
-    int   d;
-    int   **PVF, **PHF;
-
-    if (M < N)
-      s = N;
-    else
-      s = M;
-    s *= sizeof(int);
-    if (s > work->tramax)
-      if (enlarge_trace(work,s))
-        EXIT(1);
-
-    dmax = path->diffs - abs(M-N);
-
-    s = (dmax+3)*2*((M+N+3)*sizeof(int) + sizeof(int *));
-
-    if (s > 256000000)
-      return (Compute_Trace_ND_ALL(align,ework));
-
-    if (s > work->vecmax)
-      if (enlarge_vector(work,s))
-        EXIT(1);
-
-    wave.PVF = PVF = ((int **) (work->vector)) + 2;
-    wave.PHF = PHF = PVF + (dmax+3);
-
-    s = M+N+3;
-    PVF[-2] = ((int *) (PHF + (dmax+1))) + (N+1);
-    for (d = -1; d <= dmax; d++)
-      PVF[d] = PVF[d-1] + s;
-    PHF[-2] = PVF[dmax] + s;
-    for (d = -1; d <= dmax; d++)
-      PHF[d] = PHF[d-1] + s;
-  }
-
-  wave.Stop = ((int *) work->trace);
-  wave.Aabs = aseq;
-  wave.Babs = bseq;
-
-  if (path->aepos > alen || path->bepos > blen)
-    { EPRINTF(EPLACE,"%s: %s\n",Prog_Name,TP_Error);
-      EXIT(1);
-    }
-  D = iter_np(aseq+path->abpos,M,bseq+path->bbpos,N,&wave,GREEDIEST,dmax);
-  if (D < 0)
-    EXIT(1);
-  path->diffs = D;
-  path->trace = work->trace;
-  path->tlen  = wave.Stop - ((int *) path->trace);
-
-  return (0);
-}
-
 int Compute_Trace_PTS(Alignment *align, Work_Data *ework, int trace_spacing, int mode)
 { _Work_Data *work = (_Work_Data *) ework;
  Trace_Waves wave;

--- a/align.h
+++ b/align.h
@@ -235,30 +235,25 @@ void Complement_Seq(char *a, int n);
  int   Find_Extension(Alignment *align, Work_Data *work, Align_Spec *spec,    //  experimental !!
                       int diag, int anti, int lbord, int hbord, int prefix);

-  /* Given a legitimate Alignment object, Compute_Trace_X computes an exact trace for the alignment.
-     If 'path.trace' is non-NULL, then it is assumed to be a sequence of pass-through points
-     and diff levels computed by Local_Alignment.  In either case 'path.trace' is set
+  /* Given a legitimate Alignment object and associated trace point vector in 'align->path.trace',
+     Compute_Trace_X, computes an exact trace for the alignment and resets 'align->path.trace'
     to point at an integer array within the storage of the Work_Data packet encoding an
     exact optimal trace from the start to end points.  If the trace is needed beyond the
     next call to a routine that sets it, then it should be copied to an array allocated
     and managed by the caller.

-     Compute_Trace_ALL does not require a sequence of pass-through points, as it computes the
-     best alignment between (path->abpos,path->bbpos) and (path->aepos,path->bepos) in the
-     edit graph between the sequences.  Compute_Trace_PTS computes a trace by computing the
-     trace between successive pass through points.  It is much, much faster than Compute_Trace_ALL
-     but at the tradeoff of not necessarily being optimal as pass-through points are not all
-     perfect.  Compute_Trace_MID computes a trace by computing the trace between the mid-points
-     of alignments between two adjacent pairs of pass through points.  It is generally twice as
-     slow as Compute_Trace_PTS, but it produces nearer optimal alignments.  All these routines
-     return 1 if an error occurred and 0 otherwise.
+     Compute_Trace_PTS computes a trace by computing the trace between successive trace points.
+     It is much, much faster than Compute_Alignment below but at the tradeoff of not necessarily
+     being optimal as pass-through points are not all perfect.  Compute_Trace_MID computes a trace
+     by computing the trace between the mid-points of alignments between two adjacent pairs of trace
+     points.  It is generally twice as slow as Compute_Trace_PTS, but it produces nearer optimal
+     alignments.  Both these routines return 1 if an error occurred and 0 otherwise.
  */

 #define LOWERMOST -1   //   Possible modes for "mode" parameter below)
 #define GREEDIEST  0
 #define UPPERMOST  1

-  int Compute_Trace_ALL(Alignment *align, Work_Data *work);
  int Compute_Trace_PTS(Alignment *align, Work_Data *work, int trace_spacing, int mode);
  int Compute_Trace_MID(Alignment *align, Work_Data *work, int trace_spacing, int mode);

@@ -271,6 +266,25 @@ void Complement_Seq(char *a, int n);

  int Compute_Trace_IRR(Alignment *align, Work_Data *work, int mode);   //  experimental !!

+  /* Compute Alignment determines the best alignment between the substrings specified by align.
+     If the task is DIFF_ONLY, then only the difference of this alignment is computed and placed
+     in the "diffs" field of align's path.  If the task is PLUS_TRACE or DIFF_TRACE, then
+     'path.trace' is set to point at an integer array within the storage of the Work_Data packet
+     encoding a trace point sequence for an optimal alignment, whereas if the task is PLUS_ALIGN
+     or DIFF_ALIGN, then it points to an optimal trace of an optimatl alignment.  The PLUS
+     tasks can only be called if the immmediately proceeding call was a DIFF_ONLY on the same
+     alignment record and sequences, in which case a little efficiency is gained by avoiding
+     the repetition of the top level search for an optimal mid-point.
+  */
+
+#define PLUS_ALIGN   0
+#define PLUS_TRACE   1
+#define DIFF_ONLY    2
+#define DIFF_ALIGN   3
+#define DIFF_TRACE   4
+
+  int Compute_Alignment(Alignment *align, Work_Data *work, int task, int trace_spacing);
+
  /* Alignment_Cartoon prints an ASCII representation of the overlap relationhip between the
     two reads of 'align' to the given 'file' indented by 'indent' space.  Coord controls
     the display width of numbers, it must be not less than the width of any number to be
@@ -339,7 +353,9 @@ typedef struct {
     margin by 'indent' spaces.

     Compress_TraceTo8 converts a trace fo 16-bit values to 8-bit values in place, and
-     Decompress_TraceTo16 does the reverse conversion.
+     Decompress_TraceTo16 does the reverse conversion.  If check is set in a call to Compress
+     then it checks whether the values fit in 8-bits, and if not returns a non-zero result
+     in interactive mode, or exits with an error message in batch mode.

     Check_Trace_Points checks that the number of trace points is correct and that the sum
     of the b-read displacements equals the b-read alignment interval, assuming the trace
@@ -353,7 +369,7 @@ typedef struct {
  int  Write_Overlap(FILE *output, Overlap *ovl, int tbytes);
  void Print_Overlap(FILE *output, Overlap *ovl, int tbytes, int indent);

-  void Compress_TraceTo8(Overlap *ovl);
+  int  Compress_TraceTo8(Overlap *ovl, int check);
  void Decompress_TraceTo16(Overlap *ovl);

  int  Check_Trace_Points(Overlap *ovl, int tspace, int verbose, char *fname);

--- a/debian/changelog
+++ b/debian/changelog
+dascrubber (1.1-1) unstable; urgency=medium
+
+  * Team upload.
+  * New upstream version
+  * Add watch file
+  * debhelper 11
+  * Point Vcs fields to salsa.debian.org
+  * Standards-Version: 4.2.1
+  * Remove trailing whitespace in debian/copyright
+  * hardening=+all
+
+ -- Andreas Tille <tille@debian.org>  Sun, 28 Oct 2018 08:56:42 +0100
+
 dascrubber (0~20180108-1) unstable; urgency=medium

  * New upstream snapshot (git 3491b14)

--- a/debian/compat
+++ b/debian/compat
-10
+11
--- a/debian/control
+++ b/debian/control
 Source: dascrubber
-Section: science
-Priority: optional
 Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.org>
 Uploaders: Afif Elghraoui <afif@debian.org>
-Build-Depends: debhelper (>=10)
-Standards-Version: 4.1.3
+Section: science
+Priority: optional
+Build-Depends: debhelper (>= 11~)
+Standards-Version: 4.2.1
+Vcs-Browser: https://salsa.debian.org/med-team/dascrubber
+Vcs-Git: https://salsa.debian.org/med-team/dascrubber.git
 Homepage: https://dazzlerblog.wordpress.com/
-Vcs-Git: https://anonscm.debian.org/git/debian-med/dascrubber.git
-Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/dascrubber.git

 Package: dascrubber
 Architecture: any
-Depends:
-	${shlibs:Depends},
-	${misc:Depends},
-Suggests:
-	daligner,
-	dazzdb,
+Depends: ${shlibs:Depends},
+         ${misc:Depends}
+Suggests: daligner,
+          dazzdb
 Description: alignment-based scrubbing pipeline for DNA sequencing reads
 The Dazzler Scrubbing Suite produces a set of edited reads that are guaranteed
 to

--- a/debian/copyright
+++ b/debian/copyright