/* gbnlprobit (ver. 5.6) -- Non linear probit regression Copyright (C) 2009-2014 Giulio Bottazzi This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License (version 2) as published by the Free Software Foundation; This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "tools.h" #include "matheval.h" #include "assert.h" #include "multimin.h" #include #include /* #include */ #include #include #include #include #include #include #include #include /* handy structure used in computation */ typedef struct function { void *f; size_t argnum; char **argname; void **df; void ***ddf; } Function; struct objdata { Function * F; size_t rows0; size_t rows1; size_t columns; double **data0; double **data1; double *prob0; double *prob1; double weight0; double weight1; }; /* Maximum likelihood estimation */ /* ============================= */ /* the objective function is the negative log-likelihood. It can be written as: - \sum_{y=0} log ( 1-F(g(x)) ) - \sum_{y=1} log ( F(g(x)) ) where F is the Normal distribution function with mean zero and variance 1 F(x) = (1/\sqrt{2\pi}) \int_{-\infty}^x dt \exp(-t^2/2). and where g(x) is the function to be estimated. Notice that F(0)=1/2, F(+\infty)=1 and F(-x)=1-F(x) and Q(x)=1-F(x). In GSL: F(x) = 1-gsl_sf_erf_Q(x) f(x) = F'(x) = gsl_sf_erf_Z(x) or one can use instead: F(x) = ( 1+erf(x/sqrt{2}) )/2 = (2-erfc(x/sqrt{2})/2 = erfc(-x/sqrt{2})/2 Q(x) = ( 1-erf(x/sqrt{2}) )/2 = erfc(x/sqrt{2})/2 */ void obj_f (const size_t pnum, const double *x,void *params,double *fval){ const Function *F = ((struct objdata *)params)->F; const size_t rows0=((struct objdata *)params)->rows0; const size_t rows1=((struct objdata *)params)->rows1; const size_t columns=((struct objdata *)params)->columns; double **data0=((struct objdata *)params)->data0; double **data1=((struct objdata *)params)->data1; double values[pnum+columns]; size_t i,j; double res=0; /* set the parameter values */ for(i=columns;if,columns+pnum,F->argname,values); /* cdfQ(x) = erfc(x/\sqrt(2))/2 */ /* res += -log( gsl_sf_erf_Q (tempg) ); */ res += log(2)-gsl_sf_log_erfc(tmpg/sqrt(2)); } /* cycle on one occurrences */ for(i=0;if,columns+pnum,F->argname,values); /* cdfP(x) = erfc(-x/\sqrt(2))/2 */ /* res += -log( 1-gsl_sf_erf_Q (tempg) ); */ res += log(2)-gsl_sf_log_erfc(-tmpg/sqrt(2)); } *fval=res; /* rescale log-likelihood */ /* *fval=res/(rows0+rows1); */ /* fprintf(stderr," f: %+12.6e\n",res); */ /* fprintf(stderr," x:"); */ /* for(i=0;iF; const size_t rows0=((struct objdata *)params)->rows0; const size_t rows1=((struct objdata *)params)->rows1; const size_t columns=((struct objdata *)params)->columns; double **data0=((struct objdata *)params)->data0; double **data1=((struct objdata *)params)->data1; double values[pnum+columns]; size_t i,j; /* variables used in computation */ double tmpg; /* the value of the internal function */ /* initialize gradient */ for(i=0;if,columns+pnum,F->argname,values); for(j=0;jdf)[j],columns+pnum,F->argname,values)*gsl_sf_erf_Z(tmpg)/gsl_sf_erf_Q(tmpg); */ grad[j] += evaluator_evaluate ( (F->df)[j],columns+pnum,F->argname,values)*gsl_sf_erf_Z(tmpg)/( exp( gsl_sf_log_erfc(tmpg/sqrt(2)) )/2 ); } } /* cycle on one occurrences */ for(i=0;if,columns+pnum,F->argname,values); for(j=0;jdf)[j],columns+pnum,F->argname,values)*gsl_sf_erf_Z(tmpg)/(1-gsl_sf_erf_Q(tmpg)); */ grad[j] += -evaluator_evaluate ( (F->df)[j],columns+pnum,F->argname,values)*gsl_sf_erf_Z(tmpg)/( exp( gsl_sf_log_erfc(-tmpg/sqrt(2)) )/2 ); } } /* rescale log-likelihood */ /* for (j=0;jF; const size_t rows0=((struct objdata *)params)->rows0; const size_t rows1=((struct objdata *)params)->rows1; const size_t columns=((struct objdata *)params)->columns; double **data0=((struct objdata *)params)->data0; double **data1=((struct objdata *)params)->data1; double values[pnum+columns]; size_t i,j; double res=0; /* variables used in computation */ double tmpg; /* the value of the internal function */ /* initialize gradient */ for(i=0;if,columns+pnum,F->argname,values); /* cdfQ(x) = erfc(x/\sqrt(2))/2 */ /* res += -log( gsl_sf_erf_Q (tmpg) ); */ res += log(2)-gsl_sf_log_erfc(tmpg/sqrt(2)); for(j=0;jdf)[j],columns+pnum,F->argname,values)*gsl_sf_erf_Z(tmpg)/gsl_sf_erf_Q(tmpg); */ grad[j] += evaluator_evaluate ( (F->df)[j],columns+pnum,F->argname,values)*gsl_sf_erf_Z(tmpg)/( exp( gsl_sf_log_erfc(tmpg/sqrt(2)) )/2 ); } } /* cycle on one occurrences */ for(i=0;if,columns+pnum,F->argname,values); /* cdfP(x) = erfc(-x/\sqrt(2))/2 */ /* res += -log( 1-gsl_sf_erf_Q (tmpg) ); */ res += log(2)-gsl_sf_log_erfc(-tmpg/sqrt(2)); for(j=0;jdf)[j],columns+pnum,F->argname,values)*gsl_sf_erf_Z(tmpg)/(1-gsl_sf_erf_Q(tmpg)); */ grad[j] += -evaluator_evaluate ( (F->df)[j],columns+pnum,F->argname,values)*gsl_sf_erf_Z(tmpg)/( exp( gsl_sf_log_erfc(-tmpg/sqrt(2)) )/2 ); } } *fval=res; /* rescale log-likelihood */ /* for (j=0;jf,columns+Pnum,F->argname,values)); for(j=0;jdf)[j],columns+Pnum,F->argname,values); for(h=0;hdf)[h],columns+Pnum,F->argname,values); gsl_matrix_set (dJ,j,h,tmpdg1*tmpdg2*dtmp1*dtmp1); } } gsl_matrix_add (J,dJ); } for(i=0;if,columns+Pnum,F->argname,values)); for(j=0;jdf)[j],columns+Pnum,F->argname,values); for(h=0;hdf)[h],columns+Pnum,F->argname,values); gsl_matrix_set (dJ,j,h,tmpdg1*tmpdg2*dtmp1*dtmp1); } } gsl_matrix_add (J,dJ); } /* invert information matrix; dJ store temporary LU decomp. */ gsl_matrix_memcpy (dJ,J); gsl_linalg_LU_decomp (dJ,P,&signum); gsl_linalg_LU_invert (dJ,P,covar); /* gsl_matrix_scale (covar,a*a); */ gsl_matrix_free(J); gsl_matrix_free(dJ); gsl_permutation_free(P); } break; case 2: /* H^{-1} */ { gsl_permutation * P = gsl_permutation_alloc (Pnum); int signum; gsl_matrix *H = gsl_matrix_calloc (Pnum,Pnum); gsl_matrix *dH = gsl_matrix_calloc (Pnum,Pnum); /* notice that for Gaussian density f it is f'=-x*f */ /* compute Hessian */ for(i=0;if,columns+Pnum,F->argname,values); const double dtmp2 = gsl_sf_hazard(dtmp1); for(j=0;jdf)[j],columns+Pnum,F->argname,values); for(h=0;hdf)[h],columns+Pnum,F->argname,values); const double tmpddg = evaluator_evaluate ((F->ddf)[j][h],columns+Pnum,F->argname,values); gsl_matrix_set (dH,j,h,tmpddg*dtmp2-tmpdg1*tmpdg2*dtmp1*dtmp2+ tmpdg1*tmpdg2*dtmp2*dtmp2); } } gsl_matrix_add (H,dH); } for(i=0;if,columns+Pnum,F->argname,values); const double dtmp2 = gsl_sf_hazard(-dtmp1); for(j=0;jdf)[j],columns+Pnum,F->argname,values); for(h=0;hdf)[h],columns+Pnum,F->argname,values); const double tmpddg = evaluator_evaluate ((F->ddf)[j][h],columns+Pnum,F->argname,values); gsl_matrix_set (dH,j,h,-tmpddg*dtmp2+tmpdg1*tmpdg2*dtmp1*dtmp2+ tmpdg1*tmpdg2*dtmp2*dtmp2); } } gsl_matrix_add (H,dH); } /* invert Hessian; dH store temporary LU decomp. */ gsl_matrix_memcpy (dH,H); gsl_linalg_LU_decomp (dH,P,&signum); gsl_linalg_LU_invert (dH,P,covar); /* gsl_matrix_scale (covar,a*a); */ gsl_matrix_free(H); gsl_matrix_free(dH); gsl_permutation_free(P); } break; case 3: /* H^{-1} J H^{-1} */ { gsl_permutation * P = gsl_permutation_alloc (Pnum); int signum; gsl_matrix *H = gsl_matrix_calloc (Pnum,Pnum); gsl_matrix *dH = gsl_matrix_calloc (Pnum,Pnum); gsl_matrix *J = gsl_matrix_calloc (Pnum,Pnum); gsl_matrix *dJ = gsl_matrix_calloc (Pnum,Pnum); /* compute Hessian and information matrix */ for(i=0;if,columns+Pnum,F->argname,values); const double dtmp2 = gsl_sf_hazard(dtmp1); for(j=0;jdf)[j],columns+Pnum,F->argname,values); for(h=0;hdf)[h],columns+Pnum,F->argname,values); const double tmpddg = evaluator_evaluate ((F->ddf)[j][h],columns+Pnum,F->argname,values); gsl_matrix_set (dH,j,h,tmpddg*dtmp2-tmpdg1*tmpdg2*dtmp1*dtmp2+ tmpdg1*tmpdg2*pow(dtmp2,2)); gsl_matrix_set (dJ,j,h,tmpdg1*tmpdg2*pow(dtmp2,2)); } } gsl_matrix_add (H,dH); gsl_matrix_add (J,dJ); } for(i=0;if,columns+Pnum,F->argname,values); const double dtmp2 = gsl_sf_hazard(-dtmp1); for(j=0;jdf)[j],columns+Pnum,F->argname,values); for(h=0;hdf)[h],columns+Pnum,F->argname,values); const double tmpddg = evaluator_evaluate ((F->ddf)[j][h],columns+Pnum,F->argname,values); gsl_matrix_set (dH,j,h,-tmpddg*dtmp2+tmpdg1*tmpdg2*dtmp1*dtmp2+ tmpdg1*tmpdg2*pow(dtmp2,2)); gsl_matrix_set (dJ,j,h,tmpdg1*tmpdg2*pow(dtmp2,2)); } } gsl_matrix_add (H,dH); gsl_matrix_add (J,dJ); } /* invert Hessian; dH store temporary LU decomp. */ gsl_matrix_memcpy (dH,H); gsl_linalg_LU_decomp (dH,P,&signum); gsl_linalg_LU_invert (dH,P,H); /* dJ = H^{-1} J ; covar = dJ H^{-1} */ gsl_blas_dgemm (CblasNoTrans, CblasNoTrans,1.0,H,J,0.0,dJ); gsl_blas_dgemm (CblasNoTrans, CblasNoTrans,1.0,dJ,H,0.0,covar); /* gsl_matrix_scale (covar,a*a); */ gsl_matrix_free(H); gsl_matrix_free(dH); gsl_matrix_free(J); gsl_matrix_free(dJ); gsl_permutation_free(P); } break; } return covar; } /* Compute probabilities used in threshold statistics */ /* ================================================== */ void probabilities(const size_t pnum, const double *x,void *params){ const Function *F = ((struct objdata *)params)->F; const size_t rows0=((struct objdata *)params)->rows0; const size_t rows1=((struct objdata *)params)->rows1; const size_t columns=((struct objdata *)params)->columns; double **data0=((struct objdata *)params)->data0; double **data1=((struct objdata *)params)->data1; double *prob0=((struct objdata *)params)->prob0; double *prob1=((struct objdata *)params)->prob1; double values[pnum+columns]; size_t i,j; /* set the parameter values */ for(i=columns;if,columns+pnum,F->argname,values)); } /* cycle on one occurrences */ for(i=0;if,columns+pnum,F->argname,values)); } } /* Score function for the determination of optimal threshold */ /* ========================================================= */ double score(const double t, void *params){ const size_t rows0=((struct objdata *)params)->rows0; const size_t rows1=((struct objdata *)params)->rows1; double *prob0=((struct objdata *)params)->prob0; double *prob1=((struct objdata *)params)->prob1; size_t i; double errors0=0; double errors1=0; for(i=0;i t) errors0 +=1; for(i=0;iF; const size_t rows0=((struct objdata *)params)->rows0; const size_t rows1=((struct objdata *)params)->rows1; const size_t columns=((struct objdata *)params)->columns; double **data0=((struct objdata *)params)->data0; double **data1=((struct objdata *)params)->data1; /* double *prob0=((struct objdata *)params)->prob0; */ /* double *prob1=((struct objdata *)params)->prob1; */ double values[pnum-1+columns]; /* last parameter is the threshold */ const double weight0=((struct objdata *)params)->weight0; const double weight1=((struct objdata *)params)->weight1; size_t i,j; double res=0; /* variables used in computation */ double tmpg; /* the value of the internal function */ /* set the parameter values */ for(i=columns;if,columns+pnum-1,F->argname,values); /* prob0[i] = tmpF = gsl_cdf_ugaussian_P(tmpg); */ if( tmpg > x[pnum-1] ) res += weight0; } /* cycle on one occurrences */ for(i=0;if,columns+pnum-1,F->argname,values); /* prob1[i] = tmpF= gsl_cdf_ugaussian_P(tmpg); */ if(tmpg < x[pnum-1]) res += weight1; } *fval=res; } int main(int argc,char* argv[]){ int i; /* options and default values */ char *splitstring = strdup(" \t"); int o_verbose=0; int o_output=0; int o_varcovar=0; int o_zscore=0; int o_method=0; int o_threshold=0; /* =0 ignore threshold; in (0,1) take as given; =1 compute optimal only global; =2 compute optimal global and local */ int o_provided_t=0; /* threshold is provided */ int o_onlyglobal_t=0; /* threshold is computed only globally */ /* data from sdtin */ size_t rows0=0,rows1=0,columns=0; double **data0=NULL; double **data1=NULL; int *vartype=NULL; /* =0 reg. variate; =1 dummy; =2 dummy which cannot be estimated; =3 constant*/ Function F; /* definition of the function */ char **Param=NULL; /* parameter names */ double *Pval=NULL; /* parameter values */ gsl_matrix *Pcovar=NULL; /* covariance matrix */ size_t Pnum=0; /* parameters number */ double llmin; /* minimization parameters */ struct multimin_params mmparams={.01,.1,100,1e-6,1e-6,5,0}; /* estimated probabilities */ double *prob0=NULL; double *prob1=NULL; /* statistics */ double brier=0; /* brier score */ double threshold=0; /* threshold */ double thmin; /* value of the score function */ size_t thsteps=10; /* steps for threshold identification */ unsigned wrongs0=0; /* mistakes in pred. 0 */ unsigned wrongs1=0; /* mistakes in pred. 1 */ /* error handler */ gsl_error_handler_t old_handler; double scoremin=0; struct multimin_params scoremmparams={1,.01,100,1e-6,1e-4,4,0}; /* parameters for the likelihood expression */ struct objdata llparams; /* variables for reading command line options */ /* ------------------------------------------ */ char opt; /* ------------------------------------------ */ /* COMMAND LINE PROCESSING */ while((opt=getopt_long(argc,argv,"v:hF:O:V:A:t:g:zB:M:",gb_long_options, &gb_option_index))!=EOF){ if(opt==0){ gbutils_header(argv[0],stdout); exit(0); } else if(opt=='?'){ fprintf(stderr,"option %c not recognized\n",optopt); return(-1); } else if(opt=='h'){ /*print help*/ fprintf(stdout,"Non linear probit estimation. Minimize the negative log-likelihood\n"); fprintf(stdout,"\n"); fprintf(stdout," sum_{i in N_0} log(1-F(g(X_i))) + sum_{i in N_1} log(F(g(X_i)))\n"); fprintf(stdout,"\n"); fprintf(stdout,"where N_0 and N_1 are the sets of 0 and 1 observations, g is a generic \n"); fprintf(stdout,"function of the independent variables and F is the normal CDF. It is also\n"); fprintf(stdout,"possible to minimize the score function\n"); fprintf(stdout,"\n"); fprintf(stdout,"w_0 sum_{i in N_0} theta(F(g(X_i))-t) + \n"); fprintf(stdout," w_1 sum_{i in N_1} theta(t-F(g(X_i))) \n"); fprintf(stdout,"\n"); fprintf(stdout,"where theta is the Heaviside function and t a threshold level. Weights \n"); fprintf(stdout,"w_0 and w_1 scale the contribution of the two subpopulations. The first \n"); fprintf(stdout,"column of data contains 0/1 entries. Successive columns are independent \n"); fprintf(stdout,"variables. The model is specified by a function g(x1,x2...) where x1,.. \n"); fprintf(stdout,"stands for the first,second .. N-th column independent variables.\n"); fprintf(stdout,"\nUsage: %s [options] \n\n",argv[0]); fprintf(stdout,"options:\n"); fprintf(stdout," -O type of output (default 0)\n"); fprintf(stdout," 0 parameters \n"); fprintf(stdout," 1 parameters and errors \n"); fprintf(stdout," 2 and probabilities \n"); fprintf(stdout," 3 parameters and variance matrix\n"); fprintf(stdout," 4 marginal effects \n"); fprintf(stdout," -V variance matrix estimation (default 0)\n"); fprintf(stdout," 0 \n"); fprintf(stdout," 1 < J^{-1} > \n"); fprintf(stdout," 2 < H^{-1} > \n"); fprintf(stdout," 3 < H^{-1} J H^{-1} > \n"); fprintf(stdout," -z take zscore (not of 0/1 dummies)\n"); fprintf(stdout," -F input fields separators (default \" \\t\")\n"); fprintf(stdout," -v verbosity level (default 0)\n"); fprintf(stdout," 0 just results \n"); fprintf(stdout," 1 comment headers \n"); fprintf(stdout," 2 summary statistics \n"); fprintf(stdout," 3 covariance matrix \n"); fprintf(stdout," 4 minimization steps (default 10)\n"); fprintf(stdout," 5 model definition \n"); fprintf(stdout," -g set number of point for global optimal threshold identification \n"); fprintf(stdout," -h this help \n"); fprintf(stdout," -t set threshold value (default 0)\n"); fprintf(stdout," 0 ignore threshold \n"); fprintf(stdout," (0,1) user provided threshold \n"); fprintf(stdout," 1 compute optimal only global \n"); fprintf(stdout," 2 compute optimal \n"); fprintf(stdout," -M estimation method \n"); fprintf(stdout," 0 maximum likelihood \n"); fprintf(stdout," 1 min. score (w0=w1=1) \n"); fprintf(stdout," 2 min. score (w0=1/N0, w1=1/N1) \n"); fprintf(stdout," -A MLL optimization options (default 0.01,0.1,100,1e-6,1e-6,5)\n"); fprintf(stdout," fields are step,tol,iter,eps,msize,algo. Empty fields for default\n"); fprintf(stdout," step initial step size of the searching algorithm \n"); fprintf(stdout," tol line search tolerance iter: maximum number of iterations \n"); fprintf(stdout," eps gradient tolerance : stopping criteria ||gradient||0 && threshold <1){ /* user provided threshold */ o_provided_t=1; } else if (threshold == 1){ /* compute global optimal threshold */ o_provided_t=0; o_onlyglobal_t=1; } else if (threshold == 2){ /* compute optimal threshold */ o_provided_t=0; } else { fprintf(stderr,"ERROR (%s): wrong threshold specification: '%f'\n", GB_PROGNAME,threshold); exit(-1); } } else if(opt=='M'){ /*set the estimation method*/ o_method = atoi(optarg); if(o_method<0 || o_method>2){ fprintf(stderr,"ERROR (%s): method option '%d' not recognized. Try option -h.\n", GB_PROGNAME,o_method); exit(-1); } } else if(opt=='z'){ /* z-score the variates */ o_zscore=1; } else if(opt=='g'){ /* set the number of steps for global threshold identification */ thsteps = (size_t) atoi(optarg); } else if(opt=='F'){ /*set the fields separator string*/ free(splitstring); splitstring = strdup(optarg); } else if(opt=='O'){ /* set the type of output */ o_output = atoi(optarg); if(o_output<0 || o_output>6){ fprintf(stderr,"ERROR (%s): output option '%d' not recognized. Try option -h.\n", GB_PROGNAME,o_output); exit(-1); } } else if(opt=='V'){ /* set the type of covariance */ o_varcovar = atoi(optarg); if(o_varcovar<0 || o_varcovar>3){ fprintf(stderr,"ERROR (%s): variance option '%d' not recognized. Try option -h.\n", GB_PROGNAME,o_varcovar); exit(-1); } } else if(opt=='A'){ char *stmp1=strdup (optarg); char *stmp2; char *stmp3=stmp1; if( (stmp2=strsep (&stmp1,",")) != NULL){ if(strlen(stmp2)>0) mmparams.step_size=atof(stmp2); } if( (stmp2=strsep (&stmp1,",")) != NULL){ if(strlen(stmp2)>0) mmparams.tol=atof(stmp2); } if( (stmp2=strsep (&stmp1,",")) != NULL){ if(strlen(stmp2)>0) mmparams.maxiter=(unsigned) atoi(stmp2); } if( (stmp2=strsep (&stmp1,",")) != NULL){ if(strlen(stmp2)>0) mmparams.epsabs=atof(stmp2); } if( (stmp2=strsep (&stmp1,",")) != NULL){ if(strlen(stmp2)>0) mmparams.maxsize=atof(stmp2); } if( (stmp2=strsep (&stmp1,",")) != NULL){ if(strlen(stmp2)>0) mmparams.method= (unsigned) atoi(stmp2); } free(stmp3); } else if(opt=='B'){ char *stmp1=strdup (optarg); char *stmp2; char *stmp3=stmp1; if( (stmp2=strsep (&stmp1,",")) != NULL){ if(strlen(stmp2)>0) scoremmparams.step_size=atof(stmp2); } if( (stmp2=strsep (&stmp1,",")) != NULL){ if(strlen(stmp2)>0) scoremmparams.maxiter=(unsigned) atoi(stmp2); } if( (stmp2=strsep (&stmp1,",")) != NULL){ if(strlen(stmp2)>0) scoremmparams.maxsize=atof(stmp2); } free(stmp3); } else if(opt=='v'){ o_verbose = atoi(optarg); mmparams.verbosity=(o_verbose>2?o_verbose-2:0); scoremmparams.verbosity=(o_verbose>2?o_verbose-2:0); } } /* END OF COMMAND LINE PROCESSING */ /* initialize global variables */ initialize_program(argv[0]); /* define the dataset */ { size_t i,i0,i1,j; size_t rows=0; double **data=NULL; /* load data */ loadtable(&data,&rows,&columns,0,splitstring); /* check if there are dummies. vartype: 0 reg. variate; 1 dummy; 2 dummy which cannot be estimated; 3 constant*/ vartype = (int *) my_alloc((columns-1)*sizeof(int)); for(i=1;i0 && strlen(stmp4)>0){ Pnum++; Param=(char **) my_realloc((void *) Param,Pnum*sizeof(char *)); Pval=(double *) my_realloc((void *) Pval,Pnum*sizeof(double)); Param[Pnum-1] = strdup(stmp5); Pval[Pnum-1] = atof(stmp4); } } else{ /* allocate new function */ F.f = evaluator_create (stmp3); assert(F.f); } free(stmp3); } free(piece); } /* check that everything is correct */ if(Pnum==0){ fprintf(stderr,"ERROR (%s): please provide a parameter to estimate.\n", GB_PROGNAME); exit(-1); } { size_t i,j; char **NewParam=NULL; double *NewPval=NULL; size_t NewPnum=0; char **storedname=NULL; size_t storednum=0; /* retrieve list of arguments and their number */ /* notice that storedname is not allocated */ { int argnum; evaluator_get_variables (F.f,&storedname,&argnum); storednum = (size_t) argnum; } /* check the definition of the function: column specifications refer to existing columns and parameters are properly initialized */ for(i=0;i0?atoi(stmp1+1):0); if(index>columns){ fprintf(stderr,"ERROR (%s): column %zu not present in data\n", GB_PROGNAME,index); exit(-1); } if(vartype[index-1]==2) fprintf(stderr,"WARNING (%s): dummy variable in column %zu is completely separated\n",GB_PROGNAME,index); } else { for(j=0;j1){ F.ddf = (void ***) my_alloc(Pnum*sizeof(void **)); for(i=0;i3){ size_t i,j; fprintf(stderr," ------------------------------------------------------------\n"); fprintf (stderr,"\n Parameters and initial conditions:\n"); for(i=0;i1){ fprintf (stderr,"\n Model second derivatives:\n"); for(i=0;i2){ fprintf(stderr,"##--- START score minimization ---\n"); fprintf(stderr,"# initial score %f initial threshold %f\n",scoremin,Pval[Pnum-1]); } multimin(Pnum,Pval,&scoremin,xtype,xmin,xmax,score_obj_f,NULL,NULL,(void *) &llparams,scoremmparams); if(o_verbose>2){ fprintf(stderr,"# final score %f final threshold %f\n",scoremin,Pval[Pnum-1]); fprintf(stderr,"##--- END score minimization ---\n"); } free(xmin); free(xmax); free(xtype); } /* threshold is found by the minimization */ o_threshold=1; o_provided_t=1; threshold=Pval[Pnum-1]; } /* Compute implied probabilities */ /* ----------------------------- */ /* necessary for threshold computation and Brier score */ if(o_threshold==1 || o_verbose >1){ prob0 = (double *) my_alloc(rows0*sizeof(double)); prob1 = (double *) my_alloc(rows1*sizeof(double)); llparams.prob0 = prob0; llparams.prob1 = prob1; probabilities(Pnum-1,Pval,&llparams); /* threshold value, which is the last parameter, is not used */ } /* compute optimal threshold */ /* ------------------------- */ if(o_threshold==1 && o_provided_t==0) { size_t i; if(o_verbose>2){ fprintf(stderr,"##--- START global optimal threshold computation ---\n"); fprintf(stderr,"#grid size: %zd\n",thsteps); } thmin=1; for(i=0;i1) */ /* fprintf(stderr,"%f %f\n",dtmp1,dtmp2); */ if(dtmp22) fprintf(stderr,"# threshold global minimum : t=%f f=%f\n",threshold,thmin); if(o_verbose>2) fprintf(stderr,"##--- END global optimal threshold computation ---\n"); } /* local refinement minimization */ if(o_threshold==1 && o_provided_t==0 && o_onlyglobal_t==0){ /* variables for the local minimization */ int status; int iter = 0, max_iter = 100; const gsl_min_fminimizer_type *T; gsl_min_fminimizer *s; gsl_function F; /* ------------------------------------ */ double tmin,tmax=0; /* boundaries to threshold minimization */ /* initialization */ F.function = &score; F.params = &llparams; tmin=threshold-1/(thsteps-1.); tmax=threshold+1/(thsteps-1.); T = gsl_min_fminimizer_brent; s = gsl_min_fminimizer_alloc (T); gsl_min_fminimizer_set (s, &F,threshold,tmin,tmax); do { iter++; status = gsl_min_fminimizer_iterate (s); threshold = gsl_min_fminimizer_x_minimum (s); thmin = gsl_min_fminimizer_f_minimum (s); tmin = gsl_min_fminimizer_x_lower (s); tmax = gsl_min_fminimizer_x_upper (s); status = gsl_min_test_interval (tmin, tmax, 0.001, 0.0); /* if (status == GSL_SUCCESS) */ /* printf ("Converged:\n"); */ /* printf ("%5d [%.7f, %.7f] f=%f\n", */ /* iter, tmin, tmax, fmin); */ } while (status == GSL_CONTINUE && iter < max_iter); gsl_min_fminimizer_free (s); if(o_verbose>2){ fprintf(stderr,"# threshold local refinement: t=%f f=%f\n",threshold,thmin); fprintf(stderr,"##--- END optimal threshold computation ---\n"); } } /* build the covariance matrix */ /* --------------------------- */ if(o_method == 0){ if(o_output > 0) Pcovar = varcovar(o_varcovar,Pnum,Pval,llparams); } else { /* errors are unknown */ Pcovar = gsl_matrix_alloc (Pnum,Pnum); gsl_matrix_set_all (Pcovar,NAN); } /* compute brier score */ /* ------------------- */ if(o_verbose>1){ size_t i; for(i=0;i1 && o_threshold==1){ size_t i; for(i=0;ithreshold) wrongs0 +=1; for(i=0;i1 && o_output > 0){ size_t i,j; fprintf(stderr," ------------------------------------------------------------\n"); fprintf(stderr,"%*s point est. st. err. se/pe P{pe=0}",(int) strlen(Param[0])+2,""); if(o_verbose>2){ fprintf(stderr," var-covar = "); switch(o_varcovar){ case 0: fprintf(stderr,"\n\n"); break; case 1: fprintf(stderr,"J^{-1}\n\n"); break; case 2: fprintf(stderr,"H^{-1}\n\n"); break; case 3: fprintf(stderr,"H^{-1} J H^{-1}\n\n"); break; } } else fprintf(stderr,"\n"); for(i=0;i2){ fprintf(stderr," | "); for(j=0;j1) fprintf(stderr," ------------------------------------------------------------\n"); /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ if(o_verbose>1){ fprintf(stderr,"#\n"); fprintf(stderr,"#Method: "); switch(o_method){ case 0: fprintf(stderr,"Maximum Likelihood\n"); fprintf(stderr,"#log-likelihood : %f\n",-llmin); fprintf(stderr,"#number of observations : %zu\n",rows0+rows1); fprintf(stderr,"#log-likelihood/obs : %f\n",-llmin/(rows0+rows1)); if(o_threshold==1){ if(o_provided_t==0){ fprintf(stderr,"#optimal threshold : %f\n",threshold); fprintf(stderr,"#score function : %f\n",thmin); } else fprintf(stderr,"#provided threshold : %f\n",threshold); } break; case 1: case 2: fprintf(stderr,"Score minimization\n"); fprintf(stderr,"# weights w0=%f w1=%f\n", llparams.weight0,llparams.weight1); fprintf(stderr,"#score minimum %f\n",scoremin); fprintf(stderr,"#optimal threshold %f\n",Pval[Pnum-1]); break; } fprintf(stderr,"#\n"); fprintf(stderr,"#summary statistics \n"); if(o_threshold==1){ fprintf(stderr,"#errors of Type I : %d\n",wrongs1); fprintf(stderr,"#errors of Type II : %d\n",wrongs0); fprintf(stderr,"#correctly predicted 0 : %f\n",1.-((double) wrongs0)/rows0); fprintf(stderr,"#correctly predicted 1 : %f\n",1.-((double) wrongs1)/rows1); fprintf(stderr,"#total corr. pred. : %f\n",((double) rows0-wrongs0+rows1-wrongs1)/(rows0+rows1)); } fprintf(stderr,"#brier score : %f\n",brier); fprintf(stderr,"#\n"); } /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ switch(o_output){ case 0: /* point estimates */ { size_t i; /* +++++++++++++++++++++++++++++++++++ */ if(o_verbose>0){ fprintf(stdout,"#"); for(i=0;i0){ fprintf(stdout,"#"); for(i=0;i0){ */ /* fprintf(stderr,"#"); */ /* for(i=0;i0){ fprintf(stdout,"#"); fprintf(stdout,EMPTY_SEP,"obs."); for(i=0;i0){ fprintf(stdout,"#point estimates\n"); fprintf(stdout,"#"); for(i=0;i0){ fprintf(stdout,"#var-covar matrix\n"); fprintf(stdout,"#"); for(i=0;i0){ fprintf(stdout,"#"); for(i=0;i0){ fprintf(stdout,"#"); for(i=0;i0) Pnum--; evaluator_destroy(F.f); for(i=0;i