Commit f9830f55 authored by Andreas Tille's avatar Andreas Tille

New upstream version 2.8.2-5+dfsg

parent 03cd16f2
#!/bin/bash
TMP=/export/home/TMP
if [ ! -d "$TMP" ] ; then
echo $TMP is not found: skipping the test
exit 0
fi
if [ "$PANFS_PAN1" == "" ] ; then
echo PANFS_PAN1 is not set: exiting
exit 1
fi
I=`whoami`
DSTDIR=$TMP/$I/sra-sort-md
DST=$DSTDIR/sorted
rm -fr $DSTDIR
SORT=sra-sort
which $SORT > /dev/null 2>&1
if [ "$?" != "0" ] ; then
echo "sra-sort not found: add it to your PATH"
exit 10
fi
OPT="--tempdir $DSTDIR --mmapdir $DSTDIR --map-file-bsize 80000000000 --max-ref-idx-ids 4000000000 --max-idx-ids 4000000000 --max-large-idx-ids 800000000"
SRC=$PANFS_PAN1/sra-test/TEST-DATA/SRR5318091-sra-sort-md
CMD="$SORT -f -v -L6 $OPT $SRC $DST"
echo $ $CMD
$CMD
EXIT=$?
if [ $EXIT -ne 0 ] ; then
echo "sra-sort failed with $EXIT"
rm -r $DSTDIR
exit $EXIT
fi
if [ ! -d "$DST/md" ] ; then
echo Failure: md was not created in $DST:
echo $ ls $DST
ls $DST
rm -r $DSTDIR
exit 20
else
echo Success: md was created in $DST:
echo $ ls $DST
ls $DST
fi
rm -r $DSTDIR
...@@ -446,6 +446,10 @@ void cSRATblPairPostCopyAlign ( cSRATblPair *self, const ctx_t *ctx ) ...@@ -446,6 +446,10 @@ void cSRATblPairPostCopyAlign ( cSRATblPair *self, const ctx_t *ctx )
cSRAPair *csra = self -> csra; cSRAPair *csra = self -> csra;
struct KThread ** pt = NULL;
assert ( self );
pt = & self -> dad . thread;
RowSetIteratorRelease ( self -> rsi, ctx ); RowSetIteratorRelease ( self -> rsi, ctx );
self -> rsi = NULL; self -> rsi = NULL;
...@@ -458,10 +462,10 @@ void cSRATblPairPostCopyAlign ( cSRATblPair *self, const ctx_t *ctx ) ...@@ -458,10 +462,10 @@ void cSRATblPairPostCopyAlign ( cSRATblPair *self, const ctx_t *ctx )
switch ( self -> align_idx ) switch ( self -> align_idx )
{ {
case 1: case 1:
CrossCheckRefAlignTbl ( ctx, csra -> reference -> dtbl, csra -> prim_align -> dtbl, "PRIMARY_ALIGNMENT" ); CrossCheckRefAlignTbl ( ctx, csra -> reference -> dtbl, csra -> prim_align -> dtbl, "PRIMARY_ALIGNMENT", pt );
break; break;
case 2: case 2:
CrossCheckRefAlignTbl ( ctx, csra -> reference -> dtbl, csra -> sec_align -> dtbl, "SECONDARY_ALIGNMENT" ); CrossCheckRefAlignTbl ( ctx, csra -> reference -> dtbl, csra -> sec_align -> dtbl, "SECONDARY_ALIGNMENT", pt );
#if SEQUENCE_BEFORE_SECONDARY #if SEQUENCE_BEFORE_SECONDARY
cSRATblPairWhackMappingIdx ( self, ctx ); cSRATblPairWhackMappingIdx ( self, ctx );
......
...@@ -47,6 +47,7 @@ typedef struct TablePair StdTblPair; ...@@ -47,6 +47,7 @@ typedef struct TablePair StdTblPair;
#include <klib/text.h> #include <klib/text.h>
#include <klib/namelist.h> #include <klib/namelist.h>
#include <klib/rc.h> #include <klib/rc.h>
#include <kproc/thread.h> /* KThreadWait */
#include <string.h> #include <string.h>
...@@ -1050,6 +1051,43 @@ void TablePairDestroy ( TablePair *self, const ctx_t *ctx ) ...@@ -1050,6 +1051,43 @@ void TablePairDestroy ( TablePair *self, const ctx_t *ctx )
VTableRelease ( self -> stbl ); VTableRelease ( self -> stbl );
MemFree ( ctx, ( void* ) self -> full_spec, self -> full_spec_size + 1 ); MemFree ( ctx, ( void* ) self -> full_spec, self -> full_spec_size + 1 );
if ( self -> thread != NULL ) {
rc_t rc = 0;
rc_t status = 0;
STATUS ( 2, "waiting for background thread 0x%p to finish...",
self -> thread );
rc = KThreadWait ( self -> thread, & status );
if ( rc != 0 )
ERROR ( rc, "failed to wait for background thread 0x%p",
self -> thread );
else if ( status == 0 )
STATUS ( 2, "...background thread 0x%p succeed", self -> thread );
else {
ERROR ( status, "background thread 0x%p failed", self -> thread );
rc = status;
}
{
rc_t r2 = KThreadRelease ( self -> thread );
if ( r2 != 0 ) {
ERROR ( r2, "failed to release background thread 0x%p",
self -> thread );
if ( rc == 0 )
rc = r2;
}
}
if ( rc != 0 && ctx != NULL ) {
ctx_t * mctx = NULL;
for ( mctx = ( ctx_t * ) ctx; mctx != NULL && mctx -> rc == 0;
mctx = ( ctx_t* ) mctx -> caller )
{
mctx -> rc = rc;
}
}
}
memset ( self, 0, sizeof * self ); memset ( self, 0, sizeof * self );
} }
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
*/ */
struct VTable; struct VTable;
struct DbPair; struct DbPair;
struct KThread;
struct ColumnReader; struct ColumnReader;
struct ColumnWriter; struct ColumnWriter;
struct ColumnPair; struct ColumnPair;
...@@ -109,6 +110,9 @@ struct TablePair ...@@ -109,6 +110,9 @@ struct TablePair
/* true if already exploded */ /* true if already exploded */
bool exploded; bool exploded;
/* Thread launched by TablePairPostCopy [ to do consistency-check ] */
struct KThread * thread;
uint8_t align [ 2 ]; uint8_t align [ 2 ];
}; };
......
...@@ -341,11 +341,12 @@ rc_t CC CrossCheckRefAlignTblRun ( const KThread *self, void *data ) ...@@ -341,11 +341,12 @@ rc_t CC CrossCheckRefAlignTblRun ( const KThread *self, void *data )
ctx_t thread_ctx = { & pb -> caps, NULL, & ctx_info }; ctx_t thread_ctx = { & pb -> caps, NULL, & ctx_info };
const ctx_t *ctx = & thread_ctx; const ctx_t *ctx = & thread_ctx;
STATUS ( 2, "running consistency-check on background thread" ); STATUS ( 2, "running consistency-check on background thread 0x%p", self );
CrossCheckRefAlignTblInt ( ctx, pb -> ref_tbl, pb -> align_tbl, pb -> align_name ); CrossCheckRefAlignTblInt ( ctx, pb -> ref_tbl, pb -> align_tbl, pb -> align_name );
STATUS ( 2, "finished consistency-check on background thread" ); STATUS ( 2, "finished consistency-check on background thread 0x%p: %s",
self, ctx -> rc ? "failure" : "success ");
VTableRelease ( pb -> align_tbl ); VTableRelease ( pb -> align_tbl );
VTableRelease ( pb -> ref_tbl ); VTableRelease ( pb -> ref_tbl );
...@@ -357,7 +358,8 @@ rc_t CC CrossCheckRefAlignTblRun ( const KThread *self, void *data ) ...@@ -357,7 +358,8 @@ rc_t CC CrossCheckRefAlignTblRun ( const KThread *self, void *data )
#endif #endif
void CrossCheckRefAlignTbl ( const ctx_t *ctx, void CrossCheckRefAlignTbl ( const ctx_t *ctx,
const VTable *ref_tbl, const VTable *align_tbl, const char *align_name ) const VTable *ref_tbl, const VTable *align_tbl, const char *align_name,
KThread ** pt )
{ {
FUNC_ENTRY ( ctx ); FUNC_ENTRY ( ctx );
...@@ -368,6 +370,9 @@ void CrossCheckRefAlignTbl ( const ctx_t *ctx, ...@@ -368,6 +370,9 @@ void CrossCheckRefAlignTbl ( const ctx_t *ctx,
STATUS ( 2, "consistency-check on join indices between REFERENCE and %s tables", align_name ); STATUS ( 2, "consistency-check on join indices between REFERENCE and %s tables", align_name );
assert ( pt );
* pt = NULL;
#if USE_BGTHREAD #if USE_BGTHREAD
name_len = strlen ( align_name ); name_len = strlen ( align_name );
TRY ( pb = MemAlloc ( ctx, sizeof * pb + name_len, false ) ) TRY ( pb = MemAlloc ( ctx, sizeof * pb + name_len, false ) )
...@@ -391,6 +396,7 @@ void CrossCheckRefAlignTbl ( const ctx_t *ctx, ...@@ -391,6 +396,7 @@ void CrossCheckRefAlignTbl ( const ctx_t *ctx,
rc = KThreadMake ( & t, CrossCheckRefAlignTblRun, pb ); rc = KThreadMake ( & t, CrossCheckRefAlignTblRun, pb );
if ( rc == 0 ) if ( rc == 0 )
{ {
* pt = t;
return; return;
} }
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
/*-------------------------------------------------------------------------- /*--------------------------------------------------------------------------
* forwards * forwards
*/ */
struct KThread;
struct VTable; struct VTable;
...@@ -44,6 +45,6 @@ struct VTable; ...@@ -44,6 +45,6 @@ struct VTable;
* runs a cross-check of REFERENCE.<name>_IDS against <name>.REF_ID * runs a cross-check of REFERENCE.<name>_IDS against <name>.REF_ID
*/ */
void CrossCheckRefAlignTbl ( const ctx_t *ctx, void CrossCheckRefAlignTbl ( const ctx_t *ctx,
struct VTable const *ref_tbl, struct VTable const *align_tbl, const char *align_name ); struct VTable const *ref_tbl, struct VTable const *align_tbl, const char *align_name, struct KThread ** pt );
#endif #endif
...@@ -62,6 +62,7 @@ clean: stdclean ...@@ -62,6 +62,7 @@ clean: stdclean
# sra statistics # sra statistics
# #
SRASTAT_SRC = \ SRASTAT_SRC = \
assembly-statistics \
sra \ sra \
sra-stat \ sra-stat \
......
/*===========================================================================
*
* PUBLIC DOMAIN NOTICE
* National Center for Biotechnology Information
*
* This software/database is a "United States Government Work" under the
* terms of the United States Copyright Act. It was written as part of
* the author's official duties as a United States Government employee and
* thus cannot be copyrighted. This software/database is freely available
* to the public for use. The National Library of Medicine and the U.S.
* Government have not placed any restriction on its use or reproduction.
*
* Although all reasonable efforts have been taken to ensure the accuracy
* and reliability of the software and data, the NLM and the U.S.
* Government do not and cannot warrant the performance or results that
* may be obtained by using this software or data. The NLM and the U.S.
* Government disclaim all warranties, express or implied, including
* warranties of performance, merchantability or fitness for any particular
* purpose.
*
* Please cite the author in any work or product based on this material.
*
* ===========================================================================
*
*/
#include "sra-stat.h" /* Ctx */
#include <insdc/sra.h> /* INSDC_coord_len */
#include <kdb/table.h> /* KTable */
#include <klib/container.h> /* BSTNode */
#include <klib/debug.h> /* DBGMSG */
#include <klib/log.h> /* LOGERR */
#include <klib/out.h> /* OUTMSG */
#include <klib/rc.h>
#include <vdb/blob.h> /* VBlob */
#include <vdb/cursor.h> /* VCursor */
#include <vdb/database.h> /* VDatabase */
#include <vdb/table.h> /* VTable */
#include <vdb/vdb-priv.h> /* VTableOpenKTableRead */
typedef struct {
BSTNode n;
uint64_t length;
} Contig;
static void CC ContigWhack ( BSTNode * n, void * data ) {
free ( n );
}
static
int64_t CC ContigSort ( const BSTNode * item, const BSTNode * n )
{
const Contig * contigl = ( const Contig * ) item;
const Contig * contigr = ( const Contig * ) n;
assert ( contigl && contigr);
return contigl -> length < contigr -> length;
}
typedef struct {
uint64_t assemblyLength;
uint64_t contigLength;
uint64_t count;
uint64_t length;
BSTree bt;
uint64_t l50;
uint64_t n50;
uint64_t l90;
uint64_t n90;
} Contigs;
static void CC ContigNext ( BSTNode * n, void * data ) {
const Contig * contig = ( const Contig * ) n;
Contigs * nl = ( Contigs * ) data;
assert ( contig && nl );
++ nl -> count;
nl -> length += contig -> length;
DBGMSG ( DBG_APP, DBG_COND_1, ( "Contig %lu/%lu: %lu. Total: %lu/%lu\n",
nl -> count, nl -> contigLength, contig -> length,
nl -> length, nl -> assemblyLength ) );
if ( nl -> l50 == 0 && nl -> length * 2 >= nl -> assemblyLength ) {
nl -> n50 = contig -> length;
nl -> l50 = nl -> count;
DBGMSG ( DBG_APP, DBG_COND_1, ( "L50: %lu, N50: %lu (%lu>=%lu/2)\n",
nl -> l50, nl -> n50, nl -> length, nl -> assemblyLength ) );
}
if ( nl -> l90 == 0 &&
.9 * nl -> assemblyLength <= nl -> length )
{
nl -> n90 = contig -> length;
nl -> l90 = nl -> count;
DBGMSG ( DBG_APP, DBG_COND_1, ( "L90: %lu, N90: %lu (%lu*.9>=%lu)\n",
nl -> l90, nl -> n90, nl -> length, nl -> assemblyLength ) );
}
}
static void ContigsInit ( Contigs * self ) {
assert ( self );
memset ( self, 0, sizeof * self );
}
static rc_t ContigsAdd ( Contigs * self, uint32_t length ) {
Contig * contig = ( Contig * ) calloc ( 1, sizeof * contig );
assert ( self );
if ( contig == NULL )
return RC ( rcExe, rcStorage, rcAllocating, rcMemory, rcExhausted );
self -> assemblyLength += length;
++ self -> contigLength;
contig -> length = length;
return BSTreeInsert ( & self -> bt, ( BSTNode * ) contig, ContigSort );
}
static void ContigsCalculateStatistics ( Contigs * self ) {
assert ( self );
BSTreeForEach ( & self -> bt, false, ContigNext, self );
}
static void ContigsFini ( Contigs * self ) {
assert ( self );
BSTreeWhack ( & self -> bt, ContigWhack, NULL );
}
/* Calculate N50, L50 statistics:
see https://en.wikipedia.org/wiki/N50,_L50,_and_related_statistics */
rc_t CC CalculateNL ( const VDatabase * db, Ctx * ctx ) {
rc_t rc = 0;
const VTable * tbl = NULL;
const KTable * ktbl = NULL;
const KIndex * idx = NULL;
const VCursor * cursor = NULL;
uint32_t CMP_READ;
uint32_t READ_LEN;
Contigs contigs;
int64_t start = 1;
uint64_t count = 0;
/* Statictics is calculated just for VDatabase-s */
if ( db == NULL ) {
DBGMSG ( DBG_APP, DBG_COND_1,
( "CalculateAssemblyStatistics skipped: not a database\n" ) );
return 0;
}
rc = VDatabaseOpenTableRead ( db, & tbl, "REFERENCE" );
/* Statictics is calculated just for VDatabase-s with REFERENCE */
if ( rc != 0 && GetRCState ( rc ) == rcNotFound ) {
DBGMSG ( DBG_APP, DBG_COND_1,
( "CalculateAssemblyStatistics skipped: no REFERENCE table\n" ) );
return 0;
}
ContigsInit ( & contigs );
rc = VTableOpenKTableRead ( tbl, & ktbl );
DISP_RC ( rc, "while calling "
"VTableOpenKTableRead(VDatabaseOpenTableRead(REFERENCE))");
if ( rc == 0 ) {
rc = KTableOpenIndexRead ( ktbl, & idx, "i_name" );
DISP_RC ( rc, "while calling KTableOpenIndexRead"
"(VTableOpenKTableRead(VDatabaseOpenTableRead(REFERENCE),i_name)");
}
if ( rc == 0 ) {
rc = VTableCreateCursorRead ( tbl, & cursor );
DISP_RC ( rc, "while calling VTableCreateCursorRead(REFERENCE)");
}
if ( rc == 0 ) {
rc = VCursorAddColumn ( cursor, & CMP_READ, "CMP_READ" );
DISP_RC ( rc, "while calling VCursorAddColumn(REFERENCE,CMP_READ)");
}
if ( rc == 0 ) {
rc = VCursorAddColumn ( cursor, & READ_LEN, "READ_LEN" );
DISP_RC ( rc, "while calling VCursorAddColumn(REFERENCE,READ_LEN)");
}
if ( rc == 0 ) {
rc = VCursorOpen ( cursor );
DISP_RC ( rc, "while calling VCursorOpen(REFERENCE)");
}
for ( start = 1; rc == 0; start += count ) {
uint32_t row_len = 0;
const VBlob * blob = NULL;
size_t key_size = 0;
char key [ 4096 ];
rc = KIndexProjectText ( idx, start, & start, & count,
key, sizeof key, & key_size );
if ( rc == SILENT_RC
( rcDB, rcIndex, rcProjecting, rcId, rcNotFound ) )
{
rc = 0;
break; /* no more references */
}
DISP_RC ( rc, "while calling KIndexProjectText(KTableOpenIndexRead"
"(VTableOpenKTableRead(VDatabaseOpenTableRead(REFERENCE),i_name)");
if ( rc == 0 ) {
rc = VCursorGetBlobDirect ( cursor, & blob, start, CMP_READ );
DISP_RC ( rc, "while calling VCursorGetBlobDirect(CMP_READ)" );
}
if ( rc == 0 ) {
uint32_t elem_bits = 0;
const void * base = NULL;
uint32_t boff = 0;
rc = VBlobCellData ( blob, start,
& elem_bits, & base, & boff, & row_len );
DISP_RC ( rc, "while calling VBlobCellData(CMP_READ)" );
}
if ( rc == 0 ) {
if ( row_len == 0 ) {
/* When CMP_READ is not empty - local reference.
We calculate statistics just for local references */
DBGMSG ( DBG_APP, DBG_COND_1, ( "CalculateAssemblyStatistics: "
"%s skipped: not a local reference\n", key ) );
}
else {
uint64_t length = 0;
INSDC_coord_len buffer = 0;
uint32_t row_len = 0;
rc = VCursorReadDirect ( cursor, start,
READ_LEN, 8, & buffer, sizeof buffer, & row_len );
DISP_RC ( rc, "while calling VCursorReadDirect(READ_LEN,id)" );
if ( rc == 0 )
length = buffer;
if ( rc == 0 && count > 1 ) {
INSDC_coord_len buffer = 0;
uint32_t row_len = 0;
rc = VCursorReadDirect ( cursor, start + count - 1,
READ_LEN, 8, & buffer, sizeof buffer, & row_len );
DISP_RC ( rc,
"while calling VCursorReadDirect(READ_LEN,id+count)" );
if ( rc == 0 )
length = length * ( count - 1) + buffer;
}
if ( rc == 0 )
rc = ContigsAdd ( & contigs, length );
}
}
RELEASE ( VBlob, blob );
}
if ( rc == 0 )
ContigsCalculateStatistics ( & contigs );
RELEASE ( VCursor, cursor );
RELEASE ( KIndex, idx );
RELEASE ( KTable, ktbl );
RELEASE ( VTable, tbl );
if ( rc == 0 ) {
assert ( ctx );
assert ( contigs . assemblyLength == contigs . length );
assert ( contigs . contigLength == contigs . count );
if ( contigs . n90 > 0 ) {
ctx -> l50 = contigs . l50;
ctx -> n50 = contigs . n50;
ctx -> l90 = contigs . l90;
ctx -> n90 = contigs . n90;
ctx -> l = contigs . contigLength;
ctx -> n = contigs . assemblyLength;
}
}
ContigsFini ( & contigs );
return rc;
}
This diff is collapsed.
...@@ -33,10 +33,61 @@ struct KFile; ...@@ -33,10 +33,61 @@ struct KFile;
struct VCursor; struct VCursor;
struct VTable; struct VTable;
typedef enum EMetaState {
eMSNotFound,
eMSFound
} EMetaState;
typedef struct QualityStats {
struct Quality* QUALITY;
size_t allocated;
size_t used;
} QualityStats;
typedef struct TableCounts {
EMetaState state;
struct Counts* count;
size_t allocated;
size_t used;
} TableCounts;
typedef struct Ctx {
const struct BSTree* tr;
const struct MetaDataStats* meta_stats;
const struct SraMeta* info;
const struct SraSizeStats* sizes;
const struct ArcInfo* arc_info;
struct srastat_parms* pb;
struct SraStatsTotal* total;
const struct VDatabase * db; /* sra-srat argument is a DB */
const struct VTable * tbl; /* sra-srat argument is a table */
const struct KMetadata* meta; /* from Table (when running on table) */
QualityStats quality;
TableCounts tables;
uint64_t n;
uint64_t l;
uint64_t n50;
uint64_t l50;
uint64_t n90;
uint64_t l90;
} Ctx;
rc_t CC VCursorColumnRead(const struct VCursor *self, int64_t id, rc_t CC VCursorColumnRead(const struct VCursor *self, int64_t id,
uint32_t idx, const void **base, bitsz_t *offset, bitsz_t *size); uint32_t idx, const void **base, bitsz_t *offset, bitsz_t *size);
rc_t CC VTableMakeSingleFileArchive(const struct VTable *self, rc_t CC VTableMakeSingleFileArchive(const struct VTable *self,
const struct KFile **sfa, bool lightweight); const struct KFile **sfa, bool lightweight);
rc_t CC CalculateNL ( const struct VDatabase * db, Ctx * ctx );
#define RELEASE(type, obj) do { rc_t rc2 = type##Release(obj); \
if (rc2 && !rc) { rc = rc2; } obj = NULL; } while (false)
#define DISP_RC(rc, msg) (void)((rc == 0) ? 0 : LOGERR(klogInt, rc, msg))
#endif /* _h_sra_stat_tools_ */ #endif /* _h_sra_stat_tools_ */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment