Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille
--- a/Catrack.c
+++ b/Catrack.c
@@ -131,6 +131,7 @@ int main(int argc, char *argv[])
    int         nextra;
    int64       extail;

+    extra    = NULL;
    anno     = NULL;
    trackoff = 0;
    tracktot = tracksiz = 0;
@@ -144,7 +145,7 @@ int main(int argc, char *argv[])
    while (1)
      { FILE *dfile, *afile;
        char *dfile_name, *afile_name;
-        int   i, size, esize, tracklen;
+        int   i, apos, size, esize, tracklen;

        afile_name = Strdup(Numbered_Suffix(prefix,nfiles+1,Catenate(".",argv[2],".","anno")),
                            "Allocating .anno file name");
@@ -256,10 +257,11 @@ int main(int argc, char *argv[])
          }

        FSEEKO(afile,0,SEEK_END)
+        FTELLO(apos,afile)
        if (dfile != NULL)
-          extail = FTELLO(afile) - (esize*(tracklen+1) + 2*sizeof(int)); 
+          extail = apos - (esize*(tracklen+1) + 2*sizeof(int)); 
        else
-          extail = FTELLO(afile) - (esize*tracklen + 2*sizeof(int)); 
+          extail = apos - (esize*tracklen + 2*sizeof(int)); 
        FSEEKO(afile,-extail,SEEK_END)

        if (extail >= 20)
@@ -339,16 +341,16 @@ int main(int argc, char *argv[])
      }
  }

-  FCLOSE(aout);
-  if (dout != NULL)
-    FCLOSE(dout);
-
  if (nfiles != nblocks)
    { fprintf(stderr,"%s: Did not catenate all tracks of DB (nfiles %d != nblocks %d)\n",
 	             Prog_Name, nfiles, nblocks);
      goto error;
    }
  
+  FCLOSE(aout);
+  if (dout != NULL)
+    FCLOSE(dout);
+
  if (DELETE)
    { int   i;
      char *name;

--- a/DB.c
+++ b/DB.c
@@ -17,6 +17,8 @@
 #include <ctype.h>
 #include <unistd.h>
 #include <dirent.h>
+#include <limits.h>
+#include <sys/stat.h>

 #include "DB.h"

@@ -599,8 +601,8 @@ error:
 //   for the retained reads.

 void Trim_DB(DAZZ_DB *db)
-{ int         i, j, r;
-  int         allflag, cutoff;
+{ int         i, j, r, f;
+  int         allflag, cutoff, css;
  int64       totlen;
  int         maxlen, nreads;
  DAZZ_TRACK *record;
@@ -676,14 +678,23 @@ void Trim_DB(DAZZ_DB *db)
        record->anno = Realloc(record->anno,record->size*(j+1),NULL);
      }

+  css    = 0;
  totlen = maxlen = 0;
  for (j = i = 0; i < nreads; i++)
-    { r = reads[i].rlen;
-      if ((reads[i].flags & DB_BEST) >= allflag && r >= cutoff)
+    { f = reads[i].flags;
+      if ((f & DB_CSS) == 0)
+        css = 0;
+      r = reads[i].rlen;
+      if ((f & DB_BEST) >= allflag && r >= cutoff)
        { totlen += r;
          if (r > maxlen)
            maxlen = r;
-          reads[j++] = reads[i];
+          reads[j] = reads[i];
+          if (css)
+            reads[j++].flags |= DB_CSS;
+          else
+            reads[j++].flags &= ~DB_CSS;
+          css = 1;
        }
    }
  
@@ -1989,3 +2000,196 @@ void Print_Read(char *s, int width)
      printf("\n");
    }
 }
+
+
+/*******************************************************************************************
+ *
+ *  COMMAND LINE BLOCK PARSER
+ *    Take a command line argument and interpret the '@' block number ranges.
+ *    Parse_Block_Arg produces an Block_Looper iterator object that can then
+ *    be invoked multiple times to iterate through all the files implied by
+ *    the @ pattern/range.
+ *
+ ********************************************************************************************/
+
+typedef struct
+  { int first, last, next;
+    char *root, *pwd, *ppnt;
+    char *slice;
+  } _Block_Looper;
+
+  //  Advance the iterator e_parse to the next file, open it, and return the file pointer
+  //   to it.  Return NULL if at the end of the list of files.
+
+int Next_Block_Exists(Block_Looper *e_parse)
+{ _Block_Looper *parse = (_Block_Looper *) e_parse;
+
+  char       *disp;
+  struct stat sts;
+
+  if (parse->next+1 > parse->last)
+    return (0);
+
+  if (parse->next < 0)
+    disp  = parse->root;
+  else
+    disp = Numbered_Suffix(parse->root,parse->next+1,parse->ppnt);
+
+  if (stat(Catenate(parse->pwd,"/",disp,".las"),&sts))
+    return (0);
+  else
+    return (1);
+}
+
+
+FILE *Next_Block_Arg(Block_Looper *e_parse)
+{ _Block_Looper *parse = (_Block_Looper *) e_parse;
+
+  char *disp;
+  FILE *input;
+
+  parse->next += 1;
+  if (parse->next > parse->last)
+    return (NULL);
+
+  if (parse->next < 0)
+    disp  = parse->root;
+  else
+    disp = Numbered_Suffix(parse->root,parse->next,parse->ppnt);
+
+  if ((input = fopen(Catenate(parse->pwd,"/",disp,".las"),"r")) == NULL)
+    { if (parse->last != INT_MAX)
+        { fprintf(stderr,"%s: %s.las is not present\n",Prog_Name,disp);
+          exit (1);
+        }
+      return (NULL);
+    }
+  return (input);
+}
+
+  //  Reset the iterator e_parse to the first file
+
+void Reset_Block_Arg(Block_Looper *e_parse)
+{ _Block_Looper *parse = (_Block_Looper *) e_parse;
+
+  parse->next = parse->first - 1;
+}
+
+  //  Return a pointer to the path for the current file
+
+char *Block_Arg_Path(Block_Looper *e_parse)
+{ _Block_Looper *parse = (_Block_Looper *) e_parse;
+
+  return (parse->pwd);
+}
+
+  //  Return a pointer to the root name for the current file
+
+char *Block_Arg_Root(Block_Looper *e_parse)
+{ _Block_Looper *parse = (_Block_Looper *) e_parse;
+
+  if (parse->next < 0)
+    return (parse->root);
+  else
+    return (Numbered_Suffix(parse->root,parse->next,parse->ppnt));
+}
+
+  //  Free the iterator
+
+void Free_Block_Arg(Block_Looper *e_parse)
+{ _Block_Looper *parse = (_Block_Looper *) e_parse;
+
+  free(parse->root);
+  free(parse->pwd);
+  free(parse->slice);
+  free(parse);
+}
+
+char *Next_Block_Slice(Block_Looper *e_parse, int slice)
+{ _Block_Looper *parse = (_Block_Looper *) e_parse;
+
+  if (parse->slice == NULL)
+    { int size = strlen(parse->pwd) + strlen(Block_Arg_Root(parse)) + 30;
+      parse->slice =  (char *)  Malloc(size,"Block argument slice");
+      if (parse->slice == NULL)
+        exit (1);
+    }
+
+  if (parse->first < 0)
+    sprintf(parse->slice,"%s/%s",parse->pwd,parse->root);
+  else
+    sprintf(parse->slice,"%s/%s%c%d-%d%s",parse->pwd,parse->root,BLOCK_SYMBOL,parse->next+1,
+                                          parse->next+slice,parse->ppnt);
+  parse->next += slice;
+  return (parse->slice);
+}
+
+  //  Parse the command line argument and return an iterator to move through the
+  //    file names, setting it up to report the first file.
+
+Block_Looper *Parse_Block_Arg(char *arg)
+{ _Block_Looper *parse;
+  char *pwd, *root;
+  char *ppnt, *cpnt;
+  int   first, last;
+
+  parse = (_Block_Looper *) Malloc(sizeof(_Block_Looper),"Allocating parse node");
+  pwd   = PathTo(arg);
+  root  = Root(arg,".las");
+  if (parse == NULL || pwd == NULL || root == NULL)
+    exit (1);
+
+  ppnt = index(root,BLOCK_SYMBOL);
+  if (ppnt == NULL)
+    first = last = -1;
+  else
+    { if (index(ppnt+1,BLOCK_SYMBOL) != NULL)
+        { fprintf(stderr,"%s: Two or more occurences of %c-sign in source name '%s'\n",
+                         Prog_Name,BLOCK_SYMBOL,root);
+          exit (1);
+        }
+      *ppnt++ = '\0';
+      first = strtol(ppnt,&cpnt,10);
+      if (cpnt == ppnt)
+        { first = 1;
+          last  = INT_MAX;
+        }
+      else
+        { if (first < 0)
+            { fprintf(stderr,
+                      "%s: Integer following %c-sigan is less than 0 in source name '%s'\n",
+                      Prog_Name,BLOCK_SYMBOL,root);
+              exit (1);
+            }
+          if (*cpnt == '-')
+            { ppnt = cpnt+1;
+              last = strtol(ppnt,&cpnt,10);
+              if (cpnt == ppnt)
+                { fprintf(stderr,"%s: Second integer must follow - in source name '%s'\n",
+                                 Prog_Name,root);
+                  exit (1);
+                }
+              if (last < first)
+                { fprintf(stderr,
+                          "%s: 2nd integer is less than 1st integer in source name '%s'\n",
+                          Prog_Name,root);
+                  exit (1);
+                }
+              ppnt = cpnt;
+            }
+          else
+            { last = INT_MAX;
+              ppnt = cpnt;
+            }
+        }
+    }
+
+  parse->pwd   = pwd;
+  parse->root  = root;
+  parse->ppnt  = ppnt;
+  parse->first = first;
+  parse->last  = last;
+  parse->next  = first-1;
+  parse->slice = NULL;
+  return ((Block_Looper *) parse);
+}
--- a/DB.h
+++ b/DB.h
@@ -59,6 +59,8 @@ typedef signed long long   int64;
 typedef float              float32;
 typedef double             float64;

+#define LAST_READ_SYMBOL '$'
+#define BLOCK_SYMBOL     '@'

 /*******************************************************************************************
 *
@@ -215,12 +217,11 @@ int Count_Args(char *arg);
      SYSTEM_READ_ERROR		\
  }

-#define FTELLO(file)		\
-  ( { int x = ftello(file);	\
-      if (x < 0)		\
+#define FTELLO(val,file)	\
+  { val = ftello(file);		\
+    if (val < 0)		\
      SYSTEM_READ_ERROR		\
-      ; x; 			\
-  } )
+  }

 /*******************************************************************************************
 *
@@ -567,4 +568,25 @@ int Read_All_Sequences(DAZZ_DB *db, int ascii);

 int List_DB_Files(char *path, void actor(char *path, char *extension));

+  //   Take a command line argument and interpret the '@' block number ranges.
+  //   Parse_Block_Arg produces a Block_Looper iterator object that can then
+  //   be invoked multiple times to iterate through all the files implied by
+  //   the @ pattern/range.  Next_Block_Slice returns a string encoing the next
+  //   slice files represented by an @-notation, and advances the iterator by
+  //   that many files.
+
+typedef void Block_Looper;
+
+Block_Looper *Parse_Block_Arg(char *arg);
+
+FILE *Next_Block_Arg(Block_Looper *e_parse);
+int   Next_Block_Exists(Block_Looper *e_parse);
+
+char *Next_Block_Slice(Block_Looper *e_parse,int slice);
+
+void  Reset_Block_Arg(Block_Looper *e_parse);  //  Reset iterator to first file
+char *Block_Arg_Path(Block_Looper *e_parse);   //  Path of current file
+char *Block_Arg_Root(Block_Looper *e_parse);   //  Root name of current file
+void  Free_Block_Arg(Block_Looper *e_parse);   //  Free the iterator
+
 #endif // _DAZZ_DB
--- a/DBdump.c
+++ b/DBdump.c
@@ -94,6 +94,7 @@ static int prof_map[41] =
 int main(int argc, char *argv[])
 { DAZZ_DB    _db, *db = &_db;
  int         Quiva_DB, Arrow_DB;
+  int         FirstRead;
  FILE       *hdrs      = NULL;
  char       *hdrs_name = NULL;
  int64      *qv_idx    = NULL;
@@ -377,7 +378,10 @@ int main(int argc, char *argv[])
          else if (status == 1)
            MTRACK[i] = Load_Track(db,MASK[i]);
        }
+      FirstRead = db->tfirst;
    }
+  else
+    FirstRead = db->ufirst;

  if (DOIQV)
    { int         status, kind;
@@ -713,7 +717,7 @@ int main(int argc, char *argv[])
            r   = reads + i;
            len = r->rlen;
            if (DORED)
-              printf("R %d\n",i+1);
+              printf("R %d\n",FirstRead + (i+1));

            flags = r->flags;
            qv    = (flags & DB_QV);

--- a/DBdust.c
+++ b/DBdust.c
@@ -155,7 +155,7 @@ int main(int argc, char *argv[])
        FWRITE(&size,sizeof(int),1,afile)
        FSEEKO(afile,0,SEEK_END)
        FSEEKO(dfile,0,SEEK_END)
-        indx = FTELLO(dfile);
+        FTELLO(indx,dfile)
      }

    free(pwd);

--- a/DBshow.c
+++ b/DBshow.c
@@ -175,8 +175,8 @@ int main(int argc, char *argv[])
        fprintf(stderr,"      -n: Do not show the default read DNA sequences.\n");
        fprintf(stderr,"      -m: Show mask intervals and highlight in sequence.\n");
        fprintf(stderr,"\n");
-        fprintf(stderr,"      -Q: Produce a .quiva file (ignore all other options but -uU.\n");
-        fprintf(stderr,"      -A: Produce a .arrow file (ignore all other options but -uw.\n");
+        fprintf(stderr,"      -Q: Produce a .quiva file (ignore all other options but -uU).\n");
+        fprintf(stderr,"      -A: Produce a .arrow file (ignore all other options but -uw).\n");
        fprintf(stderr,"\n");
        fprintf(stderr,"      -U: Use upper case for DNA (default is lower case).\n");
        fprintf(stderr,"      -w: Print -w bp per line (default is 80).\n");
@@ -349,14 +349,13 @@ int main(int argc, char *argv[])
  if (argc == 3)
    { if (argv[2][0] != LAST_READ_SYMBOL || argv[2][1] != '\0')
        { char *eptr, *fptr;
-          int   b, e;

-          b = strtol(argv[2],&eptr,10);
-          if (eptr > argv[2] && b > 0)
+          strtol(argv[2],&eptr,10);
+          if (eptr > argv[2])
            { if (*eptr == '-')
                { if (eptr[1] != LAST_READ_SYMBOL || eptr[2] != '\0')
-                    { e = strtol(eptr+1,&fptr,10);
-                      input_pts = (fptr <= eptr+1 || *fptr != '\0' || e <= 0);
+                    { strtol(eptr+1,&fptr,10);
+                      input_pts = (fptr <= eptr+1 || *fptr != '\0');
                    }
                }
              else

--- a/DBsplit.c
+++ b/DBsplit.c
@@ -32,7 +32,15 @@
 #define PATHSEP "/"
 #endif

-static char *Usage = "[-af] [-x<int>] [-s<double(200.)>] <path:db|dam>";
+static char *Usage = "[-aflm] [-x<int>] [-s<double(200.)>] <path:db|dam>";
+
+static DAZZ_READ *reads;
+
+static int MSORT(const void *l, const void *r)
+{ int x = *((int *) l);
+  int y = *((int *) r);
+  return (reads[x].rlen - reads[y].rlen);
+}

 int main(int argc, char *argv[])
 { DAZZ_DB    db, dbs;
@@ -40,11 +48,16 @@ int main(int argc, char *argv[])
  FILE      *dbfile, *ixfile;
  char      *dbfile_name, *ixfile_name;
  int        status;
+  int        nreads;
+
+  int        medmax = 0;
+  int       *msort = NULL;

  int        FORCE;
  int        ALL;
  int        CUTOFF;
  int64      SIZE;
+  int        SELECT;

  { int   i, j, k;
    int   flags[128];
@@ -53,6 +66,7 @@ int main(int argc, char *argv[])

    ARG_INIT("DBsplit")

+    SELECT = -1;
    CUTOFF = 0;
    size   = 200;

@@ -61,7 +75,7 @@ int main(int argc, char *argv[])
      if (argv[i][0] == '-')
        switch (argv[i][1])
        { default:
-            ARG_FLAGS("af")
+            ARG_FLAGS("aflm")
            break;
          case 'x':
            ARG_NON_NEGATIVE(CUTOFF,"Min read length cutoff")
@@ -82,6 +96,16 @@ int main(int argc, char *argv[])
    ALL   = flags['a'];
    FORCE = flags['f'];

+    if (flags['l'])
+      { SELECT = 0;
+        if (flags['m'])
+          { fprintf(stderr,"%s: Cannot set both -l and -m, mutually exclusive\n",Prog_Name);
+            exit (1);
+          }
+      }
+    else if (flags['m'])
+      SELECT = 1;
+
    if (argc != 2)
      { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
        fprintf(stderr,"\n");
@@ -89,6 +113,9 @@ int main(int argc, char *argv[])
        fprintf(stderr,"      -x: Trimmed DB has reads >= this threshold.\n");
        fprintf(stderr,"      -a: Trimmed DB contains all reads from a well (not just longest).\n");
        fprintf(stderr,"      -f: Force the split to occur even if already split.\n");
+        fprintf(stderr,"\n");
+        fprintf(stderr,"      -l: Set primary read for a well to be the longest.\n");
+        fprintf(stderr,"      -m: Set primary read for a well to be the median.\n");
        exit (1);
      }
  }
@@ -102,6 +129,8 @@ int main(int argc, char *argv[])
    { fprintf(stderr,"%s: Cannot be called on a block: %s\n",Prog_Name,argv[1]);
      exit (1);
    }
+  reads  = db.reads;
+  nreads = db.ureads;

  { char    *pwd, *root;
    char     buffer[2*MAX_NAME+100];
@@ -146,18 +175,59 @@ int main(int argc, char *argv[])
          }
      }

-    dbpos = FTELLO(dbfile);
+    FTELLO(dbpos,dbfile)
    FSEEKO(dbfile,dbpos,SEEK_SET)
    FPRINTF(dbfile,DB_NBLOCK,0)
    FPRINTF(dbfile,DB_PARAMS,SIZE,CUTOFF,ALL)
  }

-  { DAZZ_READ *reads  = db.reads;
-    int        nreads = db.ureads;
-    int64      totlen;
+  if (SELECT >= 0)
+    { int        i, j, k, h;
+      int        off;
+
+      off = (~ DB_BEST);
+
+      for (i = 0; i < nreads; i = j)
+        { j = i+1;
+          reads[i].flags &= off;
+          while ((reads[j].flags & DB_CSS) != 0)
+            reads[j++].flags &= off;
+  
+          if (j-i <= 1)
+            { reads[i].flags |= DB_BEST;
+              continue;
+            }
+  
+          if (SELECT == 0)
+            { int mlen;
+
+              mlen = reads[h = i].rlen;
+              for (k = i+1; k < j; k++)
+                if (mlen < reads[k].rlen)
+                  mlen = reads[h = k].rlen;
+            }
+          else
+            { if (j-i > medmax)
+                { medmax = 1.2*(j-i) + 1000;
+                  msort  = (int *) Realloc(msort,sizeof(int)*medmax,"Allocating Median Array");
+                  if (msort == NULL)
+                    exit (1);
+                }
+              for (k = i; k < j; k++)
+                msort[k-i] = k;
+              qsort(msort,j-i,sizeof(int),MSORT);
+              h = msort[(j-i)/2];
+            }
+          reads[h].flags |= DB_BEST;
+        } 
+    }
+
+
+  { int64      totlen;
    int        nblock, ireads, treads, rlen, fno;
-    int        i;
+    int        i, upos, css;

+    css    = 0;
    nblock = 0;
    totlen = 0;
    ireads = 0;
@@ -166,16 +236,19 @@ int main(int argc, char *argv[])
    if (ALL)
      for (i = 0; i < nreads; i++)
        { rlen = reads[i].rlen;
+          if ((reads[i].flags & DB_CSS) == 0)
+            css = 0;
          if (rlen >= CUTOFF)
-            { ireads += 1;
-              treads += 1;
-              totlen += rlen;
-              if (totlen >= SIZE)
-                { FPRINTF(dbfile,DB_BDATA,i+1,treads)
+            { if (css == 0 && totlen >= SIZE)
+                { FPRINTF(dbfile,DB_BDATA,i,treads)
                  totlen = 0;
                  ireads = 0;
                  nblock += 1;
                }
+              ireads += 1;
+              treads += 1;
+              totlen += rlen;
+              css = 1;
            }
        }
    else
@@ -199,7 +272,8 @@ int main(int argc, char *argv[])
        nblock += 1;
      }
    fno = fileno(dbfile);
-    if (ftruncate(fno,FTELLO(dbfile)) < 0)
+    FTELLO(upos,dbfile)
+    if (ftruncate(fno,upos) < 0)
      SYSTEM_WRITE_ERROR

    FSEEKO(dbfile,dbpos,SEEK_SET)
@@ -208,9 +282,13 @@ int main(int argc, char *argv[])
    dbs.cutoff = CUTOFF;
    if (ALL)
      dbs.allarr |= DB_ALL;
+    else
+      dbs.allarr &= ~DB_ALL;
    dbs.treads = treads;
    FSEEKO(ixfile,0,SEEK_SET)
    FWRITE(&dbs,sizeof(DAZZ_DB),1,ixfile)
+    if (SELECT >= 0)
+      FWRITE(reads,sizeof(DAZZ_READ),nreads,ixfile)
  }

  FCLOSE(ixfile)

--- a/DBtrim.c
+++ b/DBtrim.c
@@ -22,7 +22,15 @@
 #define PATHSEP "/"
 #endif

-static char *Usage = "[-af] [-x<int>] <path:db|dam>";
+static char *Usage = "[-aflm] [-x<int>] <path:db|dam>";
+
+static DAZZ_READ *reads;
+
+static int MSORT(const void *l, const void *r)
+{ int x = *((int *) l);
+  int y = *((int *) r);
+  return (reads[x].rlen - reads[y].rlen);
+}

 int main(int argc, char *argv[])
 { DAZZ_DB    db, dbs;
@@ -31,10 +39,15 @@ int main(int argc, char *argv[])
  char      *dbfile_name, *ixfile_name;
  int        nblocks;
  int        status;
+  int        nreads;
+
+  int        medmax = 0;
+  int       *msort = NULL;

  int        FORCE;
  int        ALL;
  int        CUTOFF;
+  int        SELECT;

  { int   i, j, k;
    int   flags[128];
@@ -42,6 +55,7 @@ int main(int argc, char *argv[])

    ARG_INIT("DBtrim")

+    SELECT = -1;
    CUTOFF = 0;

    j = 1;
@@ -49,7 +63,7 @@ int main(int argc, char *argv[])
      if (argv[i][0] == '-')
        switch (argv[i][1])
        { default:
-            ARG_FLAGS("af")
+            ARG_FLAGS("aflm")
            break;
          case 'x':
            ARG_NON_NEGATIVE(CUTOFF,"Min read length cutoff")
@@ -62,12 +76,25 @@ int main(int argc, char *argv[])
    ALL   = flags['a'];
    FORCE = flags['f'];

+    if (flags['l'])
+      { SELECT = 0;
+        if (flags['m'])
+          { fprintf(stderr,"%s: Cannot set both -l and -m, mutually exclusive\n",Prog_Name);
+            exit (1);
+          }
+      }
+    else if (flags['m'])
+      SELECT = 1;
+
    if (argc != 2)
      { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
        fprintf(stderr,"\n");
        fprintf(stderr,"      -x: Trimmed DB has reads >= this threshold.\n");
        fprintf(stderr,"      -a: Trimmed DB contains all reads from a well (not just longest).\n");
        fprintf(stderr,"      -f: Force the new trim setting even if already set.\n");
+        fprintf(stderr,"\n");
+        fprintf(stderr,"      -l: Set primary read for a well to be the longest.\n");
+        fprintf(stderr,"      -m: Set primary read for a well to be the median.\n");
        exit (1);
      }
  }
@@ -81,6 +108,8 @@ int main(int argc, char *argv[])
    { fprintf(stderr,"%s: Cannot be called on a block: %s\n",Prog_Name,argv[1]);
      exit (1);
    }
+  reads  = db.reads;
+  nreads = db.ureads;

  { char    *pwd, *root;
    char     buffer[2*MAX_NAME+100];
@@ -135,12 +164,53 @@ int main(int argc, char *argv[])

    FSCANF(dbfile,DB_NBLOCK,&nblocks)

-    dbpos = FTELLO(dbfile);
+    FTELLO(dbpos,dbfile)
    FSCANF(dbfile,DB_PARAMS,&size,&cutoff,&all)
    FSEEKO(dbfile,dbpos,SEEK_SET)
    FPRINTF(dbfile,DB_PARAMS,size,CUTOFF,ALL)
  }

+  if (SELECT >= 0)
+    { int        i, j, k, h;
+      int        off;
+
+      off = (~ DB_BEST);
+
+      for (i = 0; i < nreads; i = j)
+        { j = i+1;
+          reads[i].flags &= off;
+          while ((reads[j].flags & DB_CSS) != 0)
+            reads[j++].flags &= off;
+
+          if (j-i <= 1)
+            { reads[i].flags |= DB_BEST;
+              continue;
+            }
+
+          if (SELECT == 0)
+            { int mlen;
+
+              mlen = reads[h = i].rlen;
+              for (k = i+1; k < j; k++)
+                if (mlen < reads[k].rlen)
+                  mlen = reads[h = k].rlen;
+            }
+          else
+            { if (j-i > medmax)
+                { medmax = 1.2*(j-i) + 1000;
+                  msort  = (int *) Realloc(msort,sizeof(int)*medmax,"Allocating Median Array");
+                  if (msort == NULL)
+                    exit (1);
+                }
+              for (k = i; k < j; k++)
+                msort[k-i] = k;
+              qsort(msort,j-i,sizeof(int),MSORT);
+              h = msort[(j-i)/2];
+            }
+          reads[h].flags |= DB_BEST;
+        }
+    }
+
  { DAZZ_READ *reads  = db.reads;
    int        uread, tread;
    int        rlen;
@@ -150,7 +220,7 @@ int main(int argc, char *argv[])
    t = 0;
    fprintf(dbfile,DB_BDATA,0,0);
    for (b = 0; b < nblocks; b++)
-      { dbpos = FTELLO(dbfile);
+      { FTELLO(dbpos,dbfile);
        FSCANF(dbfile,DB_BDATA,&uread,&tread)

        if (ALL)
@@ -174,9 +244,13 @@ int main(int argc, char *argv[])
    dbs.cutoff = CUTOFF;
    if (ALL)
      dbs.allarr |= DB_ALL;
+    else
+      dbs.allarr &= ~DB_ALL;
    dbs.treads = t;
    FSEEKO(ixfile,0,SEEK_SET)
    FWRITE(&dbs,sizeof(DAZZ_DB),1,ixfile)
+    if (SELECT >= 0)
+      FWRITE(reads,sizeof(DAZZ_READ),nreads,ixfile)
  }

  FCLOSE(ixfile)

--- a/README.md
+++ b/README.md
@@ -116,8 +116,9 @@ refer to a database we are referring to either a DB or a DAM.
 The command DBsplit sets or resets the current partition for a database which is
 determined by 3 parameters: (i) the total number of basepairs to place in each block,
 (ii) the minimum read length of reads to include within a block, and (iii) whether or
-not to only include the longest read from a given well or all reads from a well (NB:
-several reads of the same insert in a given well can be produced by the Pacbio
+not to only include the longest (-l) or median (-m) read from a given well or all
+reads from a well (-a) (NB: several reads of the same insert in a given well can be
+produced by the Pacbio
 instrument).  Note that the length and uniqueness parameters effectively select a
 subset of the reads that contribute to the size of a block.  We call this subset the
 *trimmed* data base.  Some commands operate on the entire database, others on the
@@ -259,7 +260,7 @@ should be used, and the characters per line, or line width, can be set to any po
 value with the -w option.

 ```
-9. DBsplit [-af] [-x<int>] [-s<double(200.)>] <path:db|dam>
+9. DBsplit [-aflm] [-x<int>] [-s<double(200.)>] <path:db|dam>
 ```

 Divide the database \<path\>.db or \<path\>.dam conceptually into a series of blocks
@@ -278,6 +279,12 @@ If the -f option is set, the split is forced regardless of whether or not the DB
 question has previously bin split, i.e. one is not interactively asked if they wish
 to proceed.

+By default, the primary read for each well consists of the longest subread of the insert
+from the well.  By setting the -m parameter, the subread of median length becomes the
+primary read instead.  One can at any later time change this back to the default longest
+by splitting again with the -l parameter set.  The setting of the primary reads occurs
+regardless of whether the -a parameter is set or not.
+
 ```
 10. DBtrim [-af] [-x<int>] <path:db|dam>
 ```

--- a/debian/changelog
+++ b/debian/changelog
-dazzdb (1.0+20180115-2) UNRELEASED; urgency=medium
+dazzdb (1.0+git20180908.0bd5e07-1) unstable; urgency=medium

+  * Team upload.
+
+  [ Jelmer Vernooĳ ]
  * Use secure copyright file specification URI.

- -- Jelmer Vernooĳ <jelmer@debian.org>  Mon, 08 Oct 2018 21:48:05 +0100
+  [ Andreas Tille ]
+  * d/watch: Use git mode
+  * New Git commit
+  * debhelper 11
+  * Point Vcs fields to salsa.debian.org
+  * Standards-Version: 4.2.1
+  * Remove trailing whitespace in debian/copyright
+  * hardening=+all
+
+ -- Andreas Tille <tille@debian.org>  Sun, 28 Oct 2018 10:29:24 +0100

 dazzdb (1.0+20180115-1) unstable; urgency=medium


--- a/debian/compat
+++ b/debian/compat
-10
+11
--- a/debian/control
+++ b/debian/control
@@ -3,12 +3,11 @@ Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.
 Uploaders: Afif Elghraoui <afif@debian.org>
 Section: science
 Priority: optional
-Build-Depends:
-	debhelper (>= 10),
-	zlib1g-dev,
-Standards-Version: 4.1.3
-Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/dazzdb.git
-Vcs-Git: https://anonscm.debian.org/git/debian-med/dazzdb.git
+Build-Depends: debhelper (>= 11~),
+               zlib1g-dev
+Standards-Version: 4.2.1
+Vcs-Browser: https://salsa.debian.org/med-team/dazzdb
+Vcs-Git: https://salsa.debian.org/med-team/dazzdb.git
 Homepage: https://github.com/thegenemyers/DAZZ_DB

 Package: dazzdb

--- a/debian/copyright
+++ b/debian/copyright
--- a/debian/rules
+++ b/debian/rules
@@ -4,6 +4,8 @@

 include /usr/share/dpkg/default.mk

+export DEB_BUILD_MAINT_OPTIONS=hardening=+all
+
 %:
 	dh $@


--- a/debian/watch
+++ b/debian/watch
-version=3
+version=4

-opts="filenamemangle=s/(?:.*)?v?(\d[\d\.]*)\.tar\.gz/dazzdb-$1.tar.gz/" \
-    https://github.com/thegenemyers/DAZZ_DB/tags (?:.*/)?[vV]?(\d[\d\.]*)\.tar\.gz
+opts="mode=git,pretty=1.0+git%cd.%h" \
+    https://github.com/thegenemyers/DAZZ_DB.git HEAD
+
+#opts="filenamemangle=s/(?:.*)?v?(\d[\d\.]*)\.tar\.gz/dazzdb-$1.tar.gz/" \
+#    https://github.com/thegenemyers/DAZZ_DB/tags (?:.*/)?[vV]?(\d[\d\.]*)\.tar\.gz
--- a/fasta2DAM.c
+++ b/fasta2DAM.c
@@ -407,8 +407,12 @@ int main(int argc, char *argv[])
        nline = 1;
        eof   = (fgets(read,MAX_NAME,input) == NULL);
        if (eof || strlen(read) < 1)
-          { fprintf(stderr,"Skipping '%s', file is empty!\n",core);
-            fclose(input);
+          { fclose(input);
+            if (PIPE != NULL)
+              { fprintf(stderr,"Standard input is empty, terminating!\n");
+                break;
+              }
+            fprintf(stderr,"Skipping '%s', file is empty!\n",core);
            free(core);
            continue;
          }

--- a/fasta2DB.c
+++ b/fasta2DB.c
@@ -390,17 +390,15 @@ int main(int argc, char *argv[])

        eof = (fgets(read,MAX_NAME,input) == NULL);
        if (eof || strlen(read) < 1)
-          { free(core);
-            fclose(input);
+          { fclose(input);
            if (PIPE != NULL)
              { fprintf(stderr,"Standard input is empty, terminating!\n");
                break;
              }
-            else
-              { fprintf(stderr,"Skipping '%s', file is empty!\n",core);
+            fprintf(stderr,"Skipping '%s', file is empty!\n",core);
+            free(core);
            continue;
          }
-          }

        //  Check that core is not too long and name is unique or last source if PIPE'd


--- a/rangen.c
+++ b/rangen.c
@@ -148,6 +148,11 @@ int main(int argc, char *argv[])

  if (argc != 2)
    { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
+      fprintf(stderr,"\n");
+      fprintf(stderr,"      -r: Random number generator seed (default is process id).\n");
+      fprintf(stderr,"      -b: AT vs GC ratio or bias.\n");
+      fprintf(stderr,"      -w: Print -w bp per line (default is 80).\n");
+      fprintf(stderr,"      -U: Output sequence in upper case (default is lower case).\n");
      exit (1);
    }


--- a/simulator.c
+++ b/simulator.c
@@ -567,6 +567,19 @@ int main(int argc, char *argv[])
      { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage[0]);
        fprintf(stderr,"       %*s %s\n",(int) strlen(Prog_Name),"",Usage[1]);
        fprintf(stderr,"       %*s %s\n",(int) strlen(Prog_Name),"",Usage[2]);
+        fprintf(stderr,"\n");
+        fprintf(stderr,"      -m: average read length (log normal distribution).\n");
+        fprintf(stderr,"      -s: standard deviation of read lengths (log normal)\n");
+        fprintf(stderr,"      -x: ignore reads below this length\n");
+        fprintf(stderr,"      -f: forward/reverse strand sampling fraction\n");
+        fprintf(stderr,"      -e: error rate\n");
+        fprintf(stderr,"      -c: coverage of genome\n");
+        fprintf(stderr,"      -C: assume genome is circular (default is linear)\n");
+        fprintf(stderr,"\n");
+        fprintf(stderr,"      -r: Random number generator seed (default is process id).\n");
+        fprintf(stderr,"      -w: Print -w bp per line (default is 80).\n");
+        fprintf(stderr,"      -U: Use upper case for DNA (default is lower case).\n");
+        fprintf(stderr,"      -M: create a map file that indicates where every read was sampled\n");
        exit (1);
      }
  }