Skip to content
Commits on Source (6)
......@@ -27,7 +27,7 @@ else
STDF = -std=c99
endif
CFLAGS := $(ENABLE_MULTITHREAD) $(ENABLE_ATOMIC) $(STDF) $(CFLAGS) -I../core -MMD -MP
MYCFLAGS := $(ENABLE_MULTITHREAD) $(ENABLE_ATOMIC) $(STDF) $(CFLAGS) -I../core -MMD -MP
OBJDIR := ./obj
ifeq "$(strip $(OBJDIR))" ""
......@@ -71,7 +71,7 @@ mpiscript : mpiscript.tmpl
$(OBJDIR)/%.o : %.c
-@mkdir -p $(OBJDIR)
@[ -d $(OBJDIR) ]
$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -o $@ -c $<
$(CC) $(MYCFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -o $@ -c $<
clean :
rm -f $(OBJNODPAIR) $(DEPSNODPAIR) $(PROGS) $(SCRIPTS) *.o *.a *.exe *~
......
......@@ -12006,87 +12006,6 @@ void commongappick_record( int nseq, char **seq, int *map )
}
void commongappick( int nseq, char **seq )
{
int i, j, count;
int len = strlen( seq[0] );
#if 1
int *mapfromnewtoold;
mapfromnewtoold = calloc( len+1, sizeof( int ) );
for( i=0, count=0; i<=len; i++ )
{
for( j=0; j<nseq; j++ )
if( seq[j][i] != '-' ) break;
if( j != nseq )
{
mapfromnewtoold[count++] = i;
}
}
// mapfromnewtoold[count] = -1; // iranai
for( j=0; j<nseq; j++ )
{
for( i=0; i<count; i++ )
{
seq[j][i] = seq[j][mapfromnewtoold[i]];
}
}
free( mapfromnewtoold );
#else
--------------------------
int *mapfromoldtonew;
int pos;
mapfromoldtonew = calloc( len+1, sizeof( int ) );
for( i=0; i<=len; i++ ) mapfromoldtonew[i] = -1;
for( i=0, count=0; i<=len; i++ )
{
for( j=0; j<nseq; j++ )
if( seq[j][i] != '-' ) break;
if( j != nseq )
{
mapfromoldtonew[i] = count;
count++;
}
}
for( j=0; j<nseq; j++ )
{
for( i=0; i<=len; i++ )
{
if( (pos=mapfromoldtonew[i]) != -1 )
seq[j][pos] = seq[j][i];
}
}
free( mapfromoldtonew );
--------------------------
for( i=0, count=0; i<=len; i++ )
{
/*
allgap = 1;
for( j=0; j<nseq; j++ )
allgap *= ( seq[j][i] == '-' );
if( !allgap )
*/
for( j=0; j<nseq; j++ )
if( seq[j][i] != '-' ) break;
if( j != nseq )
{
for( j=0; j<nseq; j++ )
{
seq[j][count] = seq[j][i];
}
count++;
}
}
#endif
}
#if 0
void commongaprecord( int nseq, char **seq, char *originallygapped )
{
......
......@@ -4280,7 +4280,7 @@ fprintf( stderr, "\n" );
reporterr( "diff = %f\n\n", (pairscore - wm + *impmatch ) / fpenalty );
#if 1
if( ( !trywarp && abs( pairscore - wm +*impmatch ) > 0.01 ) || PFACERROR )
if( ( !trywarp && fabs( pairscore - wm +*impmatch ) > 0.01 ) || PFACERROR ) // abs() -> fabs(), 2019/Jan/25
// if( abs( pairscore - wm +*impmatch ) > 0.01 )
#else
if( abs( pairscore - wm +*impmatch ) > 0.01 )
......
......@@ -14,6 +14,10 @@ ENABLE_MULTITHREAD = -Denablemultithread
# Comment out the above line if your compiler
# does not support "atomic_int".
#DASH_CLIENT = dash_client
# Uncomment the above line to use protein 3D
# structural information. Go language is required.
CC = gcc
#CC = icc
CFLAGS = -O3
......@@ -50,7 +54,8 @@ INSTALL = install
PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance pairlocalalign \
multi2hat3s pairash addsingle maffttext2hex hex2maffttext \
splittbfast disttbfast tbfast nodepair mafft-profile f2cl mccaskillwrap contrafoldwrap countlen \
seq2regtable regtable2seq score getlag dndpre setcore replaceu restoreu setdirection makedirectionlist version
seq2regtable regtable2seq score getlag dndpre setcore replaceu restoreu setdirection makedirectionlist version \
$(DASH_CLIENT)
SOS = libdisttbfast.so
DLLS = libdisttbfast.dll
DYLIBS = libdisttbfast.dylib
......@@ -152,6 +157,9 @@ sos : $(SOS)
dylibs : $(DYLIBS)
dlls : $(DLLS)
$(DASH_CLIENT): dash_client.go
go build dash_client.go
univscript: univscript.tmpl Makefile
sed "s:_PROGS:$(PROGS):" univscript.tmpl > univscript
......@@ -537,7 +545,7 @@ score.o : score.c $(HEADER)
$(CC) $(MYCFLAGS) -c score.c
clean :
rm -f *.o *.a *.exe *~ $(PERLPROGS) $(PROGS) $(SCRIPTS) $(SOS) $(DYLIBS) $(DLLS) *.gcda *.gcno
rm -f *.o *.a *.exe *~ $(PERLPROGS) $(PROGS) $(SCRIPTS) $(SOS) $(DYLIBS) $(DLLS) *.gcda *.gcno $(DASH_CLIENT)
# rm -f ../binaries/* ../scripts/*
install : all
......
SUPPORT environments without Pthreads
......@@ -2931,7 +2931,9 @@ int main( int argc, char *argv[] )
fprintf( stderr, "njobc = %d\n", njobc );
if( norg > 1000 || nadd > 1000 ) use_fft = 0;
fullseqlen = alloclen = nlenmax*4+1; //chuui!
// fullseqlen = alloclen = nlenmax*2+1; //chuui!
seq = AllocateCharMtx( njob, alloclen );
name = AllocateCharMtx( njob, B+1 );
......
......@@ -3,7 +3,7 @@
#define DEFAULTOFS_B -123 /* +10 -- -50 teido ka ? */
void BLOSUMmtx( int n, double **matrix, double *freq, unsigned char *amino, char *amino_grp )
void BLOSUMmtx( int n, double **matrix, double *freq, unsigned char *amino, char *amino_grp, int *rescalept )
{
/*
char locaminod[26] = "GASTPLIMVDNEQFYWKRHCXXX.-U";
......@@ -112,6 +112,7 @@ void BLOSUMmtx( int n, double **matrix, double *freq, unsigned char *amino, char
-2, -1, -2, -3, -3, -1, -2, -3, 2, -1, -1, -2, 0, 4, -3, -2, -2, 2, 8,
0, -3, -3, -4, -1, -3, -3, -4, -4, 4, 1, -3, 1, -1, -3, -2, 0, -3, -1, 5,
};
#if 0
double tmpmtx62[] =
{
6,
......@@ -135,6 +136,30 @@ void BLOSUMmtx( int n, double **matrix, double *freq, unsigned char *amino, char
-3, -3, -3, -5, -4, -2, -3, -5, 3, -2, -2, -3, -1, 4, -4, -3, -2, 3, 10,
0, -4, -4, -5, -1, -3, -4, -5, -5, 4, 1, -3, 1, -1, -4, -2, 0, -4, -2, 6,
};
#else
double tmpmtx62[] =
{ 5.893685,
-2.120252, 8.210189,
-2.296072, -0.659672, 8.479856,
-2.630151, -2.408668, 1.907550, 8.661363,
-0.612761, -5.083814, -3.989626, -5.189966, 12.873172,
-1.206025, 1.474162, 0.002529, -0.470069, -4.352838, 7.927704,
-1.295821, -0.173087, -0.402015, 2.265459, -5.418729, 2.781955, 7.354247,
0.239392, -3.456163, -0.634136, -1.970281, -3.750621, -2.677743, -3.165266, 8.344902,
-2.437724, -0.374792, 0.867735, -1.678363, -4.481724, 0.672051, -0.176497, -3.061315, 11.266586,
-1.982718, -4.485360, -4.825558, -4.681732, -1.841495, -4.154454, -4.791538, -5.587336, -4.847345, 5.997760,
-2.196882, -3.231860, -5.068375, -5.408471, -1.916207, -3.200863, -4.269723, -5.440437, -4.180099, 2.282412, 5.774148,
-1.101017, 3.163105, -0.268534, -1.052724, -4.554510, 1.908859, 1.163010, -2.291924, -1.081539, -4.005209, -3.670219, 6.756827,
-1.402897, -2.050705, -3.226290, -4.587785, -2.129758, -0.631437, -2.997038, -4.014898, -2.326896, 1.690191, 2.987638, -2.032119, 8.088951,
-3.315080, -4.179521, -4.491005, -5.225795, -3.563219, -4.746598, -4.788639, -4.661029, -1.851231, -0.241317, 0.622170, -4.618016, 0.018880, 9.069126,
-1.221394, -3.162863, -3.000581, -2.220163, -4.192770, -1.922917, -1.674258, -3.200320, -3.241363, -4.135001, -4.290107, -1.520445, -3.714633, -5.395930, 11.046892,
1.673639, -1.147170, 0.901353, -0.391548, -1.312485, -0.151708, -0.220375, -0.438748, -1.322366, -3.522266, -3.663923, -0.305170, -2.221304, -3.553533, -1.213470, 5.826527,
-0.068042, -1.683495, -0.069138, -1.576054, -1.299983, -1.012997, -1.294878, -2.363065, -2.528844, -1.076382, -1.796229, -1.004336, -0.999449, -3.161436, -1.612919, 2.071710, 6.817956,
-3.790328, -4.019108, -5.543911, -6.321502, -3.456164, -2.919725, -4.253197, -3.737232, -3.513238, -3.870811, -2.447829, -4.434676, -2.137255, 1.376341, -5.481260, -4.127804, -3.643382, 15.756041,
-2.646022, -2.540799, -3.122641, -4.597428, -3.610671, -2.131601, -3.030688, -4.559647, 2.538948, -1.997058, -1.593097, -2.730047, -1.492308, 4.408690, -4.379667, -2.528713, -2.408996, 3.231335, 9.892544,
-0.284140, -3.753871, -4.314525, -4.713963, -1.211518, -3.297575, -3.663425, -4.708118, -4.676220, 3.820569, 1.182672, -3.393535, 1.030861, -1.273542, -3.523054, -2.469318, -0.083276, -4.251392, -1.811267, 5.653391,
};
#endif
double tmpmtx80[] =
{
7,
......@@ -240,7 +265,7 @@ void BLOSUMmtx( int n, double **matrix, double *freq, unsigned char *amino, char
else if( n == 90 ) tmpmtx = tmpmtx90;
else if( n == 100 ) tmpmtx = tmpmtx100;
else if( n == 0 ) tmpmtx = tmpmtx0;
else if( n == -1 ) tmpmtx = loadaamtx();
else if( n == -1 ) tmpmtx = loadaamtx( rescalept );
else
{
fprintf( stderr, "blosum %d ?\n", n );
......@@ -333,8 +358,10 @@ static void overridematrix( double **matrix )
fp = fopen( "_aamtx", "r" );
if( fp == NULL )
{
fprintf( stderr, "Cannot open scorematrix\n" );
exit( 1 );
fprintf( stderr, "warning: cannot open scorematrix. Use the default one.\n" );
// f2cl.c de tomaranai youni
// exit( 1 );
return;
}
while( 1 )
......
......@@ -688,8 +688,8 @@ void constants( int nseq, char **seq )
for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ )
average += n_distmp[i][j] * freq1[i] * freq1[j];
}
#if TEST
fprintf( stdout, "####### average2 = %f\n", average );
#if 1
if( disp ) fprintf( stdout, "####### average2 = %f\n", average );
#endif
for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ )
......@@ -710,8 +710,8 @@ void constants( int nseq, char **seq )
average = 0.0;
for( i=0; i<nalphabets; i++ )
average += n_distmp[i][i] * freq1[i];
#if TEST
fprintf( stdout, "####### average1 = %f\n", average );
#if 1
if( disp ) fprintf( stdout, "####### average1 = %f\n", average );
#endif
if( average < 0.0 )
......@@ -807,7 +807,7 @@ void constants( int nseq, char **seq )
double iaverage;
// double tmp;
double **n_distmp;
int makeaverage0;
int rescale = 1;
if( nblosum == 0 )
......@@ -820,10 +820,6 @@ void constants( int nseq, char **seq )
// nblosum *= -1;
// makeaverage0 = 0;
// }
else
{
makeaverage0 = 1;
}
nalphabets = 26;
nscoredalphabets = 20;
......@@ -855,7 +851,9 @@ void constants( int nseq, char **seq )
penaltyLN = (int)( 600.0 / 1000.0 * -2000 + 0.5);
penalty_exLN = (int)( 600.0 / 1000.0 * -100 + 0.5);
BLOSUMmtx( nblosum, n_distmp, freq, amino, amino_grp );
BLOSUMmtx( nblosum, n_distmp, freq, amino, amino_grp, &rescale );
reporterr( "rescale = %d\n", rescale );
if( trywarp ) sprintf( shiftmodel, "%4.2f", -(double)penalty_shift/600 );
else sprintf( shiftmodel, "noshift" );
......@@ -894,26 +892,26 @@ void constants( int nseq, char **seq )
average = 0.0;
else
{
for( i=0; i<20; i++ )
#if TEST
for( i=0; i<20; i++ )
fprintf( stdout, "freq[%c] = %f, datafreq[%c] = %f, freq1[] = %f\n", amino[i], freq[i], amino[i], datafreq[i], freq1[i] );
#endif
average = 0.0;
for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
average += n_distmp[i][j] * freq1[i] * freq1[j];
}
#if TEST
fprintf( stdout, "####### average2 = %f\n", average );
#if 1
if( disp ) fprintf( stdout, "####### average2 = %f\n", average );
#endif
if( makeaverage0 )
if( rescale )
{
for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
n_distmp[i][j] -= average;
}
#if TEST
fprintf( stdout, "average2 = %f\n", average );
fprintf( stdout, "after average substruction : \n" );
fprintf( stdout, "after average subtraction : \n" );
for( i=0; i<20; i++ )
{
for( j=0; j<20; j++ )
......@@ -927,12 +925,20 @@ void constants( int nseq, char **seq )
average = 0.0;
for( i=0; i<20; i++ )
average += n_distmp[i][i] * freq1[i];
#if TEST
fprintf( stdout, "####### average1 = %f\n", average );
#if 1
if( disp ) fprintf( stdout, "####### average1 = %f\n", average );
#endif
if( rescale )
{
for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
n_distmp[i][j] *= 600.0 / average;
}
else
{
for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
n_distmp[i][j] *= 600.0;
}
#if TEST
fprintf( stdout, "after average division : \n" );
for( i=0; i<20; i++ )
......@@ -969,27 +975,27 @@ void constants( int nseq, char **seq )
if( disp )
{
fprintf( stdout, " scoring matrix \n" );
fprintf( stderr, " scoring matrix \n" );
for( i=0; i<20; i++ )
{
fprintf( stdout, "%c ", amino[i] );
fprintf( stderr, "%c ", amino[i] );
for( j=0; j<20; j++ )
fprintf( stdout, "%5.0f", n_distmp[i][j] );
fprintf( stdout, "\n" );
fprintf( stderr, "%5.0f", n_distmp[i][j] );
fprintf( stderr, "\n" );
}
fprintf( stdout, " " );
fprintf( stderr, " " );
for( i=0; i<20; i++ )
fprintf( stdout, " %c", amino[i] );
fprintf( stderr, " %c", amino[i] );
average = 0.0;
for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
average += n_distmp[i][j] * freq1[i] * freq1[j];
fprintf( stdout, "\naverage = %f\n", average );
fprintf( stderr, "\naverage = %f\n", average );
iaverage = 0.0;
for( i=0; i<20; i++ )
iaverage += n_distmp[i][i] * freq1[i];
fprintf( stdout, "itch average = %f, E=%f\n", iaverage, average/iaverage );
fprintf( stderr, "itch average = %f, E=%f\n", iaverage, average/iaverage );
reporterr( "parameters: %d, %d, %d\n", penalty, penalty_ex, offset );
......
This diff is collapsed.
......@@ -2653,7 +2653,7 @@ int disttbfast( int ngui, int lgui, char **namegui, char **seqgui, int argc, cha
}
#ifndef mingw
#if !defined(mingw) && !defined(_MSC_VER)
setstacksize( 200 * njob ); // topolorder() de ookime no stack wo shiyou.
#endif
......
#include "mltaln.h"
#include <sys/types.h>
#include <unistd.h>
#define DEBUG 0
#define TEST 0
......
#include "mltaln.h"
#include <sys/types.h>
#include <unistd.h>
#define DEBUG 0
#define TEST 0
......
......@@ -7,6 +7,7 @@ static char *comment;
static char *orderfile;
static int format;
static int namelen;
static int excludedashseq;
static int extendedalphabet;
static void fillspace( char *seq, int lenmax )
......@@ -166,6 +167,7 @@ void setmark( int nlen, int nseq, char **seq, char *mark )
void arguments( int argc, char *argv[] )
{
int c;
excludedashseq = 0;
namelen = -1;
scoremtx = 1;
nblosum = 62;
......@@ -207,6 +209,9 @@ void arguments( int argc, char *argv[] )
case 'f':
format = 'f';
break;
case 'd':
excludedashseq = 1;
break;
case 'y':
format = 'y';
break;
......@@ -233,17 +238,17 @@ void arguments( int argc, char *argv[] )
}
}
int main( int argc, char *argv[] )
{
static int *nlen;
static char **name, **seq, *mark;
static int *order;
int i;
static int *nlen, *onlen;
static char **name, **oname, **seq, **oseq, *mark;
static int *order, *oorder;
int i, j;
FILE *infp;
FILE *orderfp;
char gett[B];
int nlenmin;
int nout;
arguments( argc, argv );
......@@ -269,7 +274,6 @@ int main( int argc, char *argv[] )
name = AllocateCharMtx( njob, B+1 );
nlen = AllocateIntVec( njob );
if( orderfile )
{
orderfp = fopen( orderfile, "r" );
......@@ -290,6 +294,7 @@ int main( int argc, char *argv[] )
for( i=0; i<njob; i++ ) order[i] = i;
}
readData_pointer_casepreserve( infp, name, nlen, seq );
fclose( infp );
......@@ -300,24 +305,79 @@ int main( int argc, char *argv[] )
// initFiles();
if( excludedashseq )
{
nout = 0;
for( i=0; i<njob; i++ )
{
if( !strncmp( name[order[i]]+1, "DASH|", 5 ) || strstr( name[order[i]]+1, "_numo_e_DASH|" ) ) continue;
nout++;
}
reporterr( "nout=%d\n", nout );
oseq = calloc( sizeof( char * ), nout );
oname = calloc( sizeof( char * ), nout );
onlen = AllocateIntVec( nout );
oorder = AllocateIntVec( nout );
for( i=0,j=0; i<njob; i++ )
{
if( !strncmp( name[order[i]]+1, "DASH|", 5 ) || strstr( name[order[i]]+1, "_numo_e_DASH|" ) ) continue;
oname[j] = name[order[i]];
oseq[j] = seq[order[i]];
onlen[j] = nlen[order[i]];
oorder[j] = j;
j++;
}
commongappick( nout, oseq );
}
else
{
oseq = seq;
oname = name;
onlen = nlen;
oorder = order;
nout = njob;
}
reporterr( "seq = %p, oseq=%p\n", seq, oseq );
// setmark( nlenmax, njob, seq, mark );
setmark_clustal( nlenmax, njob, seq, mark );
setmark_clustal( nlenmax, nout, oseq, mark );
#if mingw
setmode( fileno( stdout ), O_TEXT ); // windows deha saishuu tekina output nomi text mode
#endif
if( format == 'f' )
writeData_reorder_pointer( stdout, njob, name, nlen, seq, order );
writeData_reorder_pointer( stdout, nout, oname, onlen, oseq, oorder );
else if( format == 'c' )
clustalout_pointer( stdout, njob, nlenmax, seq, name, mark, comment, order, namelen );
clustalout_pointer( stdout, nout, nlenmax, oseq, oname, mark, comment, oorder, namelen );
else if( format == 'y' )
phylipout_pointer( stdout, njob, nlenmax, seq, name, order, namelen );
phylipout_pointer( stdout, nout, nlenmax, oseq, oname, oorder, namelen );
else
fprintf( stderr, "Unknown format\n" );
FreeCharMtx( seq ); seq = NULL;
FreeCharMtx( name ); name = NULL;
free( nlen ); nlen = NULL;
free( order ); order = NULL;
if( excludedashseq )
{
free( oseq ); oseq = NULL;
free( oname ); oname = NULL;
free( onlen ); onlen = NULL;
free( oorder ); oorder = NULL;
}
free( mark ); mark = NULL;
freeconstants();
// SHOWVERSION;
return( 0 );
}
......@@ -177,7 +177,7 @@ extern void topolswap( int s1[], int s2[], int *mpt1, int *mpt2 );
extern void reduc( double **mtx, int nseq, int im, int jm );
extern void nj( int nseq, double **omtx, int ***topol, double **dis );
extern void JTTmtx( double **rsr, double *freq, unsigned char locamino[0x80], char locgrp[0x80], int isTM );
extern void BLOSUMmtx( int n, double **matrix, double *freq, unsigned char *amino, char *amino_grp );
extern void BLOSUMmtx( int n, double **matrix, double *freq, unsigned char *amino, char *amino_grp, int *rescale );
extern void extendedmtx( double **matrix, double *freq, unsigned char *amino, char *amino_grp );
extern void putlocalhom2( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, char korh );
extern void putlocalhom_str( char *al1, char *al2, double *equiv, double scale, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, char korh );
......@@ -300,7 +300,7 @@ extern void getdiaminofreq_st( double *freq, int clus, char **seq, double *eff,
extern void getdigapfreq_st( double *freq, int clus, char **seq, double *eff, int len );
extern void st_getGapPattern( Gappat **gpat, int clus, char **seq, double *eff, int len );
extern void getkyokaigap( char *g, char **s, int pos, int n );
extern double *loadaamtx( void );
extern double *loadaamtx( int *rescalept );
extern double naivepairscore( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, int penal );
extern double naivepairscore11( char *seq1, char *seq2, int penal );
extern double naivepairscore11_dynmtx( double **, char *seq1, char *seq2, int penal );
......
......@@ -81,12 +81,13 @@ int main( int ac, char **av )
buf[0] = (unsigned char)c;
fgetstilspace( buf+1, fp );
//fprintf( stderr, "buf=%s\n", buf );
if( strchr( buf, '-' ) )
if( strchr( (const char *)buf, '-' ) ) // added cast, 2019/Jan/25
{
printf( "-" );
continue;
}
res = sscanf( buf, " %x ", &c );
//res = sscanf( buf, " %x ", &c );
res = sscanf( (const char *)buf, " %x ", &c ); // added cast, 2019/Jan/25
if( res == EOF )
{
//fprintf( stderr, "%s was ignored.\n", buf );
......
......@@ -5391,7 +5391,7 @@ static void showaamtxexample()
exit( 1 );
}
double *loadaamtx( void )
double *loadaamtx( int *rescalept )
{
int i, j, k, ii, jj;
double *val;
......@@ -5404,6 +5404,8 @@ double *loadaamtx( void )
char *ptr2;
char *mtxfname = "_aamtx";
FILE *mf;
char key[1000];
raw = AllocateDoubleMtx( 21, 20 );
val = AllocateDoubleVec( 420 );
......@@ -5479,13 +5481,24 @@ double *loadaamtx( void )
if( i > 19 ) break;
}
*rescalept = 1;
for( i=0; i<20; i++ ) raw[20][i] = -1.0;
while( !feof( mf ) )
{
fgets( line, 999, mf );
if( line[0] == 'f' )
sscanf( line, "%s", key );
if( !strcmp( key, "norescale" ) )
{
// fprintf( stderr, "line = %s\n", line );
reporterr( "no rescale\n" );
*rescalept = 0;
break;
}
// else if( line[0] == 'f' )
else if( !strcmp( key, "frequency" ) )
{
// fprintf( stderr, "found! line = %s\n", line );
ptr1 = line;
for( j=0; j<20; j++ )
{
......@@ -5959,7 +5972,7 @@ void reporterr( const char *str, ... )
}
#ifndef mingw
#if !defined(mingw) && !defined(_MSC_VER)
void setstacksize(rlim_t kStackSize )
{
// const rlim_t kStackSize = 100 * 1024 * 1024; // min stack size = 10MB
......@@ -6148,3 +6161,85 @@ void use_getrusage(void)
}
#endif
void commongappick( int nseq, char **seq )
{
int i, j, count;
int len = strlen( seq[0] );
#if 1
int *mapfromnewtoold;
mapfromnewtoold = calloc( len+1, sizeof( int ) );
for( i=0, count=0; i<=len; i++ )
{
for( j=0; j<nseq; j++ )
if( seq[j][i] != '-' ) break;
if( j != nseq )
{
mapfromnewtoold[count++] = i;
}
}
// mapfromnewtoold[count] = -1; // iranai
for( j=0; j<nseq; j++ )
{
for( i=0; i<count; i++ )
{
seq[j][i] = seq[j][mapfromnewtoold[i]];
}
}
free( mapfromnewtoold );
#else
--------------------------
int *mapfromoldtonew;
int pos;
mapfromoldtonew = calloc( len+1, sizeof( int ) );
for( i=0; i<=len; i++ ) mapfromoldtonew[i] = -1;
for( i=0, count=0; i<=len; i++ )
{
for( j=0; j<nseq; j++ )
if( seq[j][i] != '-' ) break;
if( j != nseq )
{
mapfromoldtonew[i] = count;
count++;
}
}
for( j=0; j<nseq; j++ )
{
for( i=0; i<=len; i++ )
{
if( (pos=mapfromoldtonew[i]) != -1 )
seq[j][pos] = seq[j][i];
}
}
free( mapfromoldtonew );
--------------------------
for( i=0, count=0; i<=len; i++ )
{
/*
allgap = 1;
for( j=0; j<nseq; j++ )
allgap *= ( seq[j][i] == '-' );
if( !allgap )
*/
for( j=0; j<nseq; j++ )
if( seq[j][i] != '-' ) break;
if( j != nseq )
{
for( j=0; j<nseq; j++ )
{
seq[j][count] = seq[j][i];
}
count++;
}
}
#endif
}
......@@ -9,7 +9,7 @@ mafftpath = "_BINDIR/mafft"
# path of mafft. "/usr/local/bin/mafft"
# if mafft is in your command path, "mafft" is ok.
blastpath = "blastall"
blastpath = "psiblast"
# path of blastall.
# if blastall is in your command path, "blastall" is ok.
......@@ -39,6 +39,10 @@ if ENV["MAFFT_BLAST"] && ENV["MAFFT_BLAST"] != "" then
blastpath = ENV["MAFFT_BLAST"]
end
if ENV["MAFFT_HOMOLOGS_MAFFT"] && ENV["MAFFT_HOMOLOGS_MAFFT"] != "" then
mafftpath = ENV["MAFFT_HOMOLOGS_MAFFT"]
end
# mktemp
GC.disable
temp_vf = Tempfile.new("_vf").path
......@@ -91,21 +95,24 @@ def readfasta( fp, name, seq )
return nseq
end
nadd = 50
eval = 1e-10
nadd = 600
num_alignments = 600
num_threads_blast = 4
eval = 1e-1
local = 0
fullout = 0
entiresearch = 0
entiresearch = 1
corewin = 50
corethr = 0.3
mafftopt = " --op 1.53 --ep 0.123 --localpair --maxiterate 1000 --reorder "
#mafftopt = " --op 1.53 --ep 0.123 --localpair --maxiterate 1000 --reorder "
mafftopt = " --op 1.53 --ep 0.0 --globalpair --maxiterate 1000 --reorder "
#if getopts( "s", "f", "w", "l", "h", "e:", "a:", "o:", "c:", "d:" ) == nil || ARGV.length == 0 || $OPT_h then
# puts "Usage: #{$0} [-h -l -e# -a# -o\"[options for mafft]\"] input_file"
# exit
#end
params = ARGV.getopts( "sfwlhe:a:o:c:d:" )
params = ARGV.getopts( "sfwlhe:a:o:c:d:n:N:" )
#if $OPT_c then
......@@ -114,8 +121,20 @@ if params["c"] != nil then
end
#if $OPT_d then
#if params["d"] != nil then
# corethr = params["d"].to_f
#end
#
if params["d"] != nil then
corethr = params["d"].to_f
localdb = params["d"].to_s
end
if params["n"] != nil then
num_alignments = params["n"].to_s
end
if params["N"] != nil then
num_threads_blast = params["N"].to_s
end
#if $OPT_w
......@@ -165,6 +184,10 @@ for i in 0..(nar-1)
end
end
if fullout == 0 then
mafftopt += " --excludehomologs "
end
nseq = 0
ifp = File.open( "#{temp_if}", 'r' )
while ifp.gets
......@@ -172,8 +195,8 @@ ifp = File.open( "#{temp_if}", 'r' )
end
ifp.close
if nseq >= 100 then
STDERR.puts "The number of input sequences must be <100."
if nseq >= 10000 then
STDERR.puts "The number of input sequences must be <10000."
exit
elsif nseq == 1 then
system( "cp #{temp_if}" + " #{temp_pf}" )
......@@ -239,7 +262,11 @@ STDERR.puts "Searching .. \n"
ids = []
add = []
sco = []
nblast = 0 # ato de tsukau kamo
for i in 0..(nin-1)
singleids = []
singleadd = []
inseq[i].gsub!(/-/,"")
afp.puts ">" + orname[i]
afp.puts orseq[i]
......@@ -296,13 +323,23 @@ for i in 0..(nin-1)
qfp.puts "> "
qfp.puts inseq[i]
qfp.close
command = blastpath + " -p blastp -e #{eval} -b 1000 -m 7 -i #{temp_qf} -d #{localdb} > #{temp_res}"
command = blastpath + " -num_iterations 2 -num_threads #{num_threads_blast} -evalue #{eval} -num_alignments #{num_alignments} -outfmt 5 -query #{temp_qf} -db #{localdb} > #{temp_res}"
system command
resp = File.open( "#{temp_res}", 'r' )
# system "cp #{temp_res} _res"
end
STDERR.puts " Done.\n\n"
resp = File.open( "#{temp_res}", 'r' )
hitnum = 0
lasteval = "nohit"
while resp.gets
break if $_ =~ /<Iteration_iter-num>2<\/Iteration_iter-num>/
end
if $_ == nil then
STDERR.puts "no hit"
else
while 1
while resp.gets
break if $_ =~ /<Hit_id>(.*)<\/Hit_id>/ || $_ =~ /(<Iteration_stat>)/
......@@ -310,11 +347,62 @@ for i in 0..(nin-1)
id = $1
break if $_ =~ /<Iteration_stat>/
# p id
while resp.gets
break if $_ =~ /<Hsp_bit-score>(.*)<\/Hsp_bit-score>/
starthit = 9999999
endhit = -1
startquery = 9999999
endquery = -1
target = ""
score = 0.0
while line = resp.gets
if line =~ /<Hsp_hit-from>(.*)<\/Hsp_hit-from>/
starthitcand=$1.to_i
elsif line =~ /<Hsp_hit-to>(.*)<\/Hsp_hit-to>/
endhitcand=$1.to_i
elsif line =~ /<Hsp_query-from>(.*)<\/Hsp_query-from>/
startquerycand=$1.to_i
elsif line =~ /<Hsp_query-to>(.*)<\/Hsp_query-to>/
endquerycand=$1.to_i
elsif $_ =~ /<Hsp_hseq>(.*)<\/Hsp_hseq>/
targetcand = $1.sub( /-/, "" ).sub( /U/, "X" )
elsif line =~ /<Hsp_bit-score>(.*)<\/Hsp_bit-score>/
scorecand=$1.to_f
elsif line =~ /<Hsp_evalue>(.*)<\/Hsp_evalue>/
evalcand=$1.to_s
elsif line =~ /<\/Hsp>/
if endhit == -1 then
starthit = starthitcand
endhit= endhitcand
startquery = startquerycand
endquery= endquerycand
target = targetcand
score = scorecand
lasteval = evalcand
else
# if endhit <= endhitcand && endquery <= endquerycand then
if endhit <= starthitcand && endquery <= startquerycand then
endhit = endhitcand
endquery = endquerycand
target = target + "XX" + targetcand
score = score + scorecand
end
# if starthitcand <= starthit && startquerycand <= startquery then
if endhitcand <= starthit && endquerycand <= startquery then
starthit = starthitcand
startquery = startquerycand
target = targetcand + "XX" + target
score = score + scorecand
end
end
elsif line =~ /<\/Hit>/
hitnum = hitnum + 1
break;
end
score = $1.to_f
# p score
end
singleids.push( id )
singleadd.push( target )
known = ids.index( id )
if known != nil then
......@@ -326,35 +414,43 @@ for i in 0..(nin-1)
sco.delete_at( known )
end
end
while resp.gets
break if $_ =~ /<Hsp_hseq>(.*)<\/Hsp_hseq>/
end
# break if $1 == nil
target = $1.sub( /-/, "" ).sub( /U/, "X" )
# p target
# STDERR.puts "adding 1 seq"
ids.push( id )
sco.push( score )
add.push( target )
end
resp.close
end
n = ids.length
n = singleids.length
outnum = 0
while n > 0 && outnum < nadd
m = rand( n )
afp.puts ">_addedbymaffte_" + ids[m]
afp.puts add[m]
ids.delete_at( m )
add.delete_at( m )
n -= 1
outnum += 1
totalprob = 0
prob = []
for m in 0..(n-1)
# prob[m] = 1.0 / population[eclass[m]]
prob[m] = 1.0
totalprob += prob[m]
end
# puts ""
for m in 0..(n-1)
prob[m] /= (totalprob)
prob[m] *= (nadd.to_f / nin.to_f)
prob[m] = 1 if prob[m] > 1
end
for m in 0..(n-1)
if rand( 1000000 ).to_f/1000000 < prob[m] then
# STDERR.puts "hit in " + m.to_s
afp.puts ">_addedbymaffte_" + singleids[m]
afp.puts singleadd[m]
end
end
end
afp.close
STDERR.puts "Performing alignment .. "
STDERR.puts "Aligning .. "
system( mafftpath + mafftopt + "#{temp_af} > #{temp_bf}" )
STDERR.puts "done."
......@@ -402,6 +498,7 @@ end
system( "rm -rf #{temp_if} #{temp_vf} #{temp_af} #{temp_bf} #{temp_pf} #{temp_qf} #{temp_res} #{temp_rid}" )
#system( "cp #{temp_if} #{temp_vf} #{temp_af} #{temp_bf} #{temp_pf} #{temp_qf} #{temp_res} #{temp_rid} ." )
if File.exist?( "#{temp_af}.tree" ) then
system( "sed 's/_addedbymaffte_/_ho_/' #{temp_af}.tree > #{ARGV[0].to_s}.tree" )
system( "rm #{temp_af}.tree" )
......
......@@ -149,8 +149,8 @@ infp.close
lenhash = {}
if outnum then
for i in 0..(tin-1)
tname[i] = "_numo_s_#{i}_numo_e_" + tname[i]
for i in 1..(tin)
tname[i-1] = "_numo_s_0#{i}_numo_e_" + tname[i-1]
end
end
......
@echo off
setlocal enabledelayedexpansion
cls
cls; 1>&2
chcp 65001 1>&2
for /f "usebackq tokens=*" %%i IN (`cd`) DO @set current_dir=%%i
if /i "%current_dir%" == "%windir%" (
if /i "%current_dir%" == "%systemroot%" (
set mafft_working_dir="%~dp0"
) else (
set mafft_working_dir="%current_dir%"
......@@ -14,7 +15,6 @@ echo Preparing environment to run MAFFT on Windows. 1>&2
echo This may take a while, if real-time scanning by anti-virus software is on. 1>&2
set ROOTDIR=%~d0%~p0
set ROOTDIRWBS=!ROOTDIR:^\\=\\\!
set PATH=/usr/bin/:%PATH%
set MAFFT_BINARIES=/usr/lib/mafft
set TMPDIR=%TMP%
......@@ -27,7 +27,7 @@ REM (typically C:\Users\username\AppData\Local\Temp\), then
REM uncomment (remove REM) the above two lines to use an alternative
REM temporary folder.
"%ROOTDIR%\usr\bin\bash" "%ROOTDIRWBS%\usr\bin\mafft" %*
"%ROOTDIR%\usr\bin\bash" "/usr/bin/mafft" %*
popd
exit /b
......@@ -6,6 +6,7 @@ Set-Item Env:Path "/usr/bin;$Env:Path"
Set-Item Env:MAFFT_BINARIES "/usr/lib/mafft"
Set-Item Env:TMPDIR "$Env:TMP"
Set-Item Env:MAFFT_TMPDIR "$Env:TMP"
Set-Item Env:mafft_working_dir "$PWD"
#Set-Item Env:TMPDIR "/tmp"
#Set-Item Env:MAFFT_TMPDIR "/tmp"
......@@ -16,6 +17,5 @@ Set-Item Env:MAFFT_TMPDIR "$Env:TMP"
#$ROOTDIR=$PSScriptRoot # not supported by powershell versions <= 2
$ROOTDIR=Split-Path -Parent $MyInvocation.MyCommand.Path
$ROOTDIRWBS=$ROOTDIR.Replace('\\', '\\\')
$proc = Start-Process -Wait -NoNewWindow -PassThru -FilePath "$ROOTDIR\usr\bin\bash.exe" -ArgumentList "'$ROOTDIRWBS\usr\bin\mafft' $args"
$proc = Start-Process -Wait -NoNewWindow -PassThru -FilePath "$ROOTDIR\usr\bin\bash.exe" -ArgumentList "'/usr/bin/mafft' $args"
exit $proc.ExitCode