Commit 5280f2c2 authored by Jerome Benoit's avatar Jerome Benoit

Imported Upstream version 0.999b

parent 5517faec
......@@ -8,6 +8,11 @@ https://github.com/b-k/Apophenia/commits/master
!!Big.
]
August 2014
** default for apop_data_pack is .all_pages='y' (was 'n').
** remove apop_plot_lattice, apop_plot_triangle, apop_plot_line_and_scatter, apop_plot_qq.
Find them at https://github.com/b-k/apophenia/wiki/gnuplot_snippets .
March 2014
--Command-line tools print help should a user add a --help option.
......
AM_CFLAGS = -g -Wall -O3 $(SQLITE_CFLAGS) $(MYSQL_CFLAGS) $(OPENMP_CFLAGS)
AM_LDFLAGS = $(SQLITE_LDFLAGS) $(OPENMP_CFLAGS) $(MYSQL_LDFLAGS)
CC = $(PTHREAD_CC)
ACLOCAL_AMFLAGS=-I m4
ACLOCAL_AMFLAGS = -I m4
AUTOMAKE_OPTIONS = \
dist-xz \
dist-bzip2 \
dist-zip
AM_CFLAGS = -g -Wall -O3
## Library versioning (C:R:A == current:revision:age)
LIBAPOPHENIA_LT_VERSION = 0:0:0
SUBDIRS = transform model . cmd tests docs eg
lib_LTLIBRARIES = libapophenia.la
libapophenia_la_SOURCES = \
apop_arms.c apop_asst.c apop_bootstrap.c apop_conversions.c \
apop_data.c apop_db.c apop_fexact.c apop_hist.c \
apop_linear_algebra.c apop_linear_constraint.c apop_mapply.c \
apop_mcmc.c apop_missing_data.c apop_mle.c apop_model.c \
apop_name.c apop_output.c apop_rake.c \
apop_regression.c apop_settings.c apop_smoothing.c apop_sort.c \
apop_stats.c apop_tests.c apop_update.c apop_vtables.c \
asprintf.c \
transform/apop_dcompose.c \
transform/apop_dconstrain.c transform/apop_fix_params.c \
transform/apop_coordinate_transform.c \
transform/apop_mixture.c transform/apop_stack.c \
model/apop_bernoulli.c model/apop_beta.c \
model/apop_dirichlet.c \
model/apop_exponential.c model/apop_gamma.c \
model/apop_histogram.c model/apop_loess.c \
model/apop_multinomial.c model/apop_multivariate_normal.c \
model/apop_normal.c model/apop_ols.c \
model/apop_pmf.c model/apop_poisson.c \
model/apop_probit.c model/apop_t.c \
model/apop_uniform.c model/apop_wishart.c \
model/apop_yule.c model/apop_zipf.c
include_HEADERS = apop.h
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA= apophenia.pc
pkgconfigdir = `pkg-config --variable pc_path pkg-config | cut -f1 -d:`
SUBDIRS=. tests
lib_LTLIBRARIES = libapophenia.la
libapophenia_la_LD_VERSION_SCRIPT=
if HAVE_LD_VERSION_SCRIPT
libapophenia_la_LD_VERSION_SCRIPT+= -Wl,--version-script=$(top_srcdir)/apophenia.map
endif
SUBLIBS = \
libapopkernel.la \
transform/libapoptransform.la \
model/libapopmodel.la
libapophenia_la_SOURCES = \
asprintf.c
noinst_LTLIBRARIES = libapopkernel.la
noinst_HEADERS = apop_internal.h
#The programs
bin_PROGRAMS = apop_text_to_db apop_db_to_crosstab apop_plot_query
apop_text_to_db_SOURCES = cmd/apop_text_to_db.c
apop_db_to_crosstab_SOURCES = cmd/apop_db_to_crosstab.c
apop_plot_query_SOURCES = cmd/apop_plot_query.c
libapopkernel_la_SOURCES = \
apop_arms.c \
apop_asst.c \
apop_bootstrap.c \
apop_conversions.c \
apop_data.c \
apop_db.c \
apop_fexact.c \
apop_hist.c \
apop_linear_algebra.c \
apop_linear_constraint.c \
apop_mapply.c \
apop_mcmc.c \
apop_missing_data.c \
apop_mle.c apop_model.c \
apop_name.c \
apop_output.c \
apop_rake.c \
apop_regression.c \
apop_settings.c \
apop_smoothing.c \
apop_sort.c \
apop_stats.c \
apop_tests.c \
apop_update.c \
apop_vtables.c
apop_text_to_db_LDADD = libapophenia.la
apop_db_to_crosstab_LDADD = libapophenia.la
apop_plot_query_LDADD = libapophenia.la
apop_db_INCLUDES = \
apop_db_mysql.c \
apop_db_sqlite.c
VPATH= @srcdir@
apop_db.c: $(apop_db_INCLUDES)
EXTRA_DIST = docs/doxyconfig $(srcdir)/docs/*.png $(srcdir)/docs/*.gif \
$(srcdir)/docs/*.js \
docs/edit_group $(srcdir)/docs/*.html docs/typical.css \
docs/make_model_doc.awk docs/documentation.h rpm.spec \
eg apop_db_mysql.c apop_db_sqlite.c \
apop_internal.h COPYING2 apophenia.pc
libapopkernel_la_CFLAGS = \
$(PTHREAD_CFLAGS) \
$(OPENMP_CFLAGS) \
$(MYSQL_CFLAGS) \
$(SQLITE3_CFLAGS) \
$(GSL_CFLAGS)
DISTCLEANFILES = apophenia.pc* apophenia-uninstalled.pc apophenia-uninstalled.sh tests/*dat* tests/printing_sample 1
libapophenia_la_LDFLAGS = \
-version-info $(LIBAPOPHENIA_LT_VERSION) \
$(libapophenia_la_LD_VERSION_SCRIPT)
%.c: %.m4.c
m4 -P prep_variadics.m4 $< > $@
libapophenia_la_LIBADD = \
$(SUBLIBS) \
$(MYSQL_LDFLAGS) \
$(SQLITE3_LDFLAGS) \
$(GSL_LIBS) \
$(PTHREAD_LIBS) \
$(LIBM)
%.h: %.m4.h
m4 -P prep_variadics.m4 $< > $@
EXTRA_DIST = \
COPYING2 \
rpm.spec \
apophenia.pc.in \
apophenia.map
#The Doxygen documentation, which you'll have to call yourself (via make doc).
EXTRA_DIST += \
$(apop_db_INCLUDES)
## compatibility
doc:
-rm tests/apophenia/*h
cat model/*.c transform/*.c | awk -f docs/make_model_doc.awk > docs/model_doc.h
doxygen docs/doxyconfig
cp docs/flake.gif docs/search.gif docs/right.png docs/down.png html/
@sed -i -e 's|Outlineheader \([^ ]*\)\(.*\)</p>|<h2><a class="anchor" name="\1"><div class="trigger" onClick="showBranch('\''\1d'\'');swapFolder('\''\1f'\'')"><img src="right.png" border="0" id="\1f" alt="pip">\2</div></a></h2><div class="branch" id="\1d">|' \
-e 's|endofdiv</p>|</div>|' \
-e 's|ALLBUTTON|<span class="trigger" onClick="showAll();"<a>Expand all </a></span> \| <span class="trigger" onClick="closeAll();"<a>Collapse all </a></span>|' html/index.html html/outline.html
@sed -i -e '/index_x/s/- x -/ /' -e '/index_a/s/- a -/ /' -e '/Here is a list of all/d' -e '/<span>[a-z]<\/span><\/a><\/li>/d' html/globals.html
@sed -i -e '/div class="contents"/aSee also the <a class="el" href="group__models.html">models</a> and <a class="el" href="group__settings.html">settings</a> pages.' html/globals.html
@sed -i 's|<td width="100%"></td>||g' html/*html
@sed -i -f docs/edit_group html/group__models.html
cp docs/*js html/
sudo cp man/man3/* /usr/share/man/man3/
install-data-local:
@echo
@echo "OK. If you'd like to generate documentation via Doxygen, run make doc; to test, run make check."
@echo
-$(MAKE) -C docs doc
This diff is collapsed.
......@@ -8,6 +8,11 @@ https://github.com/b-k/Apophenia/commits/master
!!Big.
]
August 2014
** default for apop_data_pack is .all_pages='y' (was 'n').
** remove apop_plot_lattice, apop_plot_triangle, apop_plot_line_and_scatter, apop_plot_qq.
Find them at https://github.com/b-k/apophenia/wiki/gnuplot_snippets .
March 2014
--Command-line tools print help should a user add a --help option.
......
This diff is collapsed.
This diff is collapsed.
......@@ -553,14 +553,6 @@ double apop_matrix_map_all_sum(const gsl_matrix *in, double (*fn)(double));
// Some output routines
#ifdef APOP_NO_VARIADIC
void apop_plot_line_and_scatter(apop_data *data, apop_model *est, char const * output_name, FILE *output_pipe, char output_type, char output_append) ;
#else
void apop_plot_line_and_scatter_base(apop_data *data, apop_model *est, char const * output_name, FILE *output_pipe, char output_type, char output_append) ;
apop_varad_declare(void, apop_plot_line_and_scatter, apop_data *data; apop_model *est; char const * output_name; FILE *output_pipe; char output_type; char output_append);
#define apop_plot_line_and_scatter(...) apop_varad_link(apop_plot_line_and_scatter, __VA_ARGS__)
#endif
#ifdef APOP_NO_VARIADIC
void apop_plot_histogram(gsl_vector *data, size_t bin_count, char *with, char const *output_name, FILE *output_pipe, char output_type, char output_append) ;
#else
......@@ -569,30 +561,6 @@ double apop_matrix_map_all_sum(const gsl_matrix *in, double (*fn)(double));
#define apop_plot_histogram(...) apop_varad_link(apop_plot_histogram, __VA_ARGS__)
#endif
#ifdef APOP_NO_VARIADIC
void apop_plot_lattice(const apop_data *d, char const *output_name, FILE *output_pipe, char output_type, char output_append) ;
#else
void apop_plot_lattice_base(const apop_data *d, char const *output_name, FILE *output_pipe, char output_type, char output_append) ;
apop_varad_declare(void, apop_plot_lattice, const apop_data *d; char const *output_name; FILE *output_pipe; char output_type; char output_append);
#define apop_plot_lattice(...) apop_varad_link(apop_plot_lattice, __VA_ARGS__)
#endif
#ifdef APOP_NO_VARIADIC
void apop_plot_qq(gsl_vector *v, apop_model *m, char const *output_name, FILE *output_pipe, char output_type, char output_append, size_t bins, gsl_rng *r) ;
#else
void apop_plot_qq_base(gsl_vector *v, apop_model *m, char const *output_name, FILE *output_pipe, char output_type, char output_append, size_t bins, gsl_rng *r) ;
apop_varad_declare(void, apop_plot_qq, gsl_vector *v; apop_model *m; char const *output_name; FILE *output_pipe; char output_type; char output_append; size_t bins; gsl_rng *r);
#define apop_plot_qq(...) apop_varad_link(apop_plot_qq, __VA_ARGS__)
#endif
#ifdef APOP_NO_VARIADIC
void apop_plot_triangle(apop_data *in, char const *output_name, FILE *output_pipe, char output_type, char output_append) ;
#else
void apop_plot_triangle_base(apop_data *in, char const *output_name, FILE *output_pipe, char output_type, char output_append) ;
apop_varad_declare(void, apop_plot_triangle, apop_data *in; char const *output_name; FILE *output_pipe; char output_type; char output_append);
#define apop_plot_triangle(...) apop_varad_link(apop_plot_triangle, __VA_ARGS__)
#endif
#ifdef APOP_NO_VARIADIC
void apop_matrix_print(const gsl_matrix *data, char const *output_name, FILE *output_pipe, char output_type, char output_append) ;
......@@ -764,10 +732,10 @@ gsl_vector * apop_vector_unique_elements(const gsl_vector *v);
#ifdef APOP_NO_VARIADIC
double apop_kl_divergence(apop_model *from, apop_model *to, int draw_ct, gsl_rng *rng, apop_model *top, apop_model *bottom) ;
double apop_kl_divergence(apop_model *from, apop_model *to, int draw_ct, gsl_rng *rng) ;
#else
double apop_kl_divergence_base(apop_model *from, apop_model *to, int draw_ct, gsl_rng *rng, apop_model *top, apop_model *bottom) ;
apop_varad_declare(double, apop_kl_divergence, apop_model *from; apop_model *to; int draw_ct; gsl_rng *rng; apop_model *top; apop_model *bottom);
double apop_kl_divergence_base(apop_model *from, apop_model *to, int draw_ct, gsl_rng *rng) ;
apop_varad_declare(double, apop_kl_divergence, apop_model *from; apop_model *to; int draw_ct; gsl_rng *rng);
#define apop_kl_divergence(...) apop_varad_link(apop_kl_divergence, __VA_ARGS__)
#endif
......@@ -1264,7 +1232,7 @@ gsl_vector * v = &( apop_vv_##v );
/* Not (yet) for public use. \hideinitializer */
#define apop_subvector(v, start, len) ( \
((v) == NULL || (v)->size < ((start)+(len)) || (start) < 0) ? NULL \
: &(gsl_vector){.size=(len), .stride=1, .data=(v)->data+(start)})
: &(gsl_vector){.size=(len), .stride=(v)->stride, .data=(v)->data+(start*(v)->stride)})
/* Not (yet) for public use. \hideinitializer */
#define apop_mrow(m, row) ( \
......@@ -1404,14 +1372,14 @@ double apop_vector_skew(const gsl_vector *in);
*/
/** \def Apop_subm(m, srow, scol, nrow, ncol)
/** \def Apop_subm(data_to_view, srow, scol, nrows, ncols)
Generate a subview of a submatrix within a \c gsl_matrix. Like \ref Apop_r, et al., the view is an automatically-allocated variable that is lost once the program flow leaves the scope in which it is declared.
\param m The root matrix
\param data_to_view The root matrix
\param srow the first row (in the root matrix) of the top of the submatrix
\param scol the first column (in the root matrix) of the left edge of the submatrix
\param nrow number of rows in the submatrix
\param ncol number of columns in the submatrix
\param nrows number of rows in the submatrix
\param ncols number of columns in the submatrix
\hideinitializer */
/** \def Apop_row_t(m, row_name, v)
......@@ -1461,7 +1429,7 @@ The view expires as soon as the program leaves the current scope (like with the
/** \def Apop_rv(d, row)
A macro to generate a temporary one-row view of the matrix in an \ref apop_data set \c d, pulling out only
row \c row. The view is a \ref gsl_vector set.
row \c row. The view is a \c gsl_vector set.
\code
gsl_vector *v = Apop_rv(your_data, i);
......@@ -1476,7 +1444,7 @@ The view is automatically allocated, and disappears as soon as the program leave
/** \def Apop_cv(d, col)
A macro to generate a temporary one-column view of the matrix in an \ref apop_data
set \c d, pulling out only column \c col. The view is a \ref gsl_vector set.
set \c d, pulling out only column \c col. The view is a \c gsl_vector set.
As usual, column -1 is the vector element of the \ref apop_data set.
......@@ -1544,9 +1512,6 @@ double apop_query_to_float(const char * fmt, ...) __attribute__ ((format (printf
int apop_data_to_db(const apop_data *set, const char *tabname, char);
double apop_db_t_test(char * tab1, char *col1, char *tab2, char *col2); //deprecated
double apop_db_paired_t_test(char * tab1, char *col1, char *col2); //deprecated
//////Settings groups
......
......@@ -769,7 +769,7 @@ will return the original data set (stripped of text and names).
\param in an \c apop_data set. No default; if \c NULL, return \c NULL.
\param out If this is not \c NULL, then put the output here. The dimensions must match exactly. If \c NULL, then allocate a new data set. Default = \c NULL.
\param all_pages If \c 'y', then follow the <tt> ->more</tt> pointer to fill subsequent
pages; else fill only the first page. Informational pages will still be ignored, unless you set <tt>.use_info_pages='y'</tt> as well. Default = \c 'n'.
pages; else fill only the first page. Informational pages will still be ignored, unless you set <tt>.use_info_pages='y'</tt> as well. Default = \c 'y'.
\param use_info_pages Pages in XML-style brackets, such as <tt>\<Covariance\></tt> will
be ignored unless you set <tt>.use_info_pages='y'</tt>. Be sure that this is set to the
same thing when you both pack and unpack. Default: <tt>'n'</tt>.
......@@ -786,7 +786,7 @@ apop_varad_head(gsl_vector *, apop_data_pack){
const apop_data * apop_varad_var(in, NULL);
if (!in) return NULL;
gsl_vector * apop_varad_var(out, NULL);
char apop_varad_var(all_pages, 'n');
char apop_varad_var(all_pages, 'y');
char apop_varad_var(use_info_pages, 'n');
if (out) {
size_t total_size = sizecount(in, (all_pages == 'y' || all_pages == 'Y'), (use_info_pages =='y' || use_info_pages =='Y'));
......@@ -826,7 +826,7 @@ apop_varad_head(gsl_vector *, apop_data_pack){
in = in->more;
if (in->more){
vout = gsl_vector_subvector((gsl_vector *)out, offset, out->size - offset).vector;
apop_data_pack(in->more, &vout, .all_pages='y');
apop_data_pack(in->more, &vout);
}
}
return out;
......
......@@ -734,38 +734,3 @@ int apop_data_to_db(const apop_data *set, const char *tabname, const char output
free(q);
return 0;
}
// Some stats wrappers
/** Do a \f$t\f$-test entirely inside the database.
Returns only the two-tailed p-value.
\ingroup ttest
*/
double apop_db_t_test(char * tab1, char *col1, char *tab2, char *col2){
gsl_matrix *result1, *result2;
result1 = apop_query_to_matrix("select avg(%s), var(%s), count(*) from %s", col1, col1, tab1);
result2 = apop_query_to_matrix("select avg(%s), var(%s), count(*) from %s", col2, col2, tab2);
double a_avg = gsl_matrix_get(result1, 0, 0),
a_var = gsl_matrix_get(result1, 0, 1),
a_count = gsl_matrix_get(result1, 0, 2),
b_avg = gsl_matrix_get(result2, 0, 0),
b_var = gsl_matrix_get(result2, 0, 1),
b_count = gsl_matrix_get(result2, 0, 2),
stat = (a_avg - b_avg)/ sqrt(b_var/(b_count-1) + a_var/(a_count-1));
return fabs(1 - (1 - gsl_cdf_tdist_P(stat, a_count+b_count-2))*2); //two-tailify a one-tailed lookup.
}
/** Do a paired \f$t\f$-test entirely inside the database.
Returns only the two-tailed p-value.
\ingroup ttest
*/
double apop_db_paired_t_test(char * tab1, char *col1, char *col2){
gsl_matrix *result=
apop_query_to_matrix("select avg(%s - %s), var(%s - %s), count(*) from %s tab1",
col1,col2, col1, col2, tab1);
double avg = gsl_matrix_get(result, 0, 0),
var = gsl_matrix_get(result, 0, 1),
count = gsl_matrix_get(result, 0, 2),
stat = avg/ sqrt(var/(count-1));
return 2*GSL_MIN(gsl_cdf_tdist_P(stat, count-1),gsl_cdf_tdist_Q(stat, count-1));
}
......@@ -58,9 +58,11 @@ void apop_gsl_error(char const *reason, char const *file, int line, int gsl_errn
#define PRAGMA(x) _Pragma(#x)
#define OMP_critical(tag) PRAGMA(omp critical ( tag ))
#define OMP_for(...) _Pragma("omp parallel for") for(__VA_ARGS__)
#define OMP_for_reduce(red, ...) PRAGMA(omp parallel for reduction( red )) for(__VA_ARGS__)
#else
#define OMP_critical(tag)
#define OMP_for(...) for(__VA_ARGS__)
#define OMP_for_reduce(...) for(__VA_ARGS__)
#endif
#include "config.h"
......@@ -79,6 +81,6 @@ extern int vasprintf (char **res, const char *format, va_list args)
#endif
#include "apop.h"
void add_info_criteria(apop_data *d, apop_model *m, apop_model *est, double ll); //In apop_mle.c
void add_info_criteria(apop_data *d, apop_model *m, apop_model *est, double ll, int param_ct); //In apop_mle.c
apop_model *maybe_prep(apop_data *d, apop_model *m, _Bool *is_a_copy); //in apop_mcmc, for apop_update.
......@@ -309,7 +309,7 @@ apop_varad_head(apop_model *, apop_model_metropolis){
bool m_is_a_copy = 0;
m = maybe_prep(d, m, &m_is_a_copy);
s->last_ll = GSL_NEGINF;
gsl_vector * drawv = apop_data_pack(m->parameters, .all_pages='y');
gsl_vector * drawv = apop_data_pack(m->parameters);
Apop_stopif(s->burnin > 1, s->burnin/=(s->periods + 0.0),
1, "Burn-in should be a fraction of the number of periods, "
"not a whole number of periods. Rescaling to burnin=%g."
......
......@@ -98,7 +98,7 @@ static double one_d(double b, void *in){
static void apop_internal_numerical_gradient(apop_fn_with_params ll,
infostruct* info, gsl_vector *out, double delta){
double result, err;
gsl_vector *beta = apop_data_pack(info->model->parameters, NULL, .all_pages='y');
gsl_vector *beta = apop_data_pack(info->model->parameters);
infostruct i = *info;
i.f = &ll;
i.gp = &(grad_params){ .beta = gsl_vector_alloc(beta->size)};
......@@ -318,7 +318,7 @@ static double negshell (const gsl_vector *beta, void * in){
apop_data_unpack(beta, i->model->parameters);
if (i->use_constraint && i->model->constraint)
penalty = i->model->constraint(i->data, i->model);
if (penalty) apop_data_pack(i->model->parameters, (gsl_vector*) beta, .all_pages='y');
if (penalty) apop_data_pack(i->model->parameters, (gsl_vector*) beta);
double f_val = f(i->data, i->model);
out = penalty - f_val; //negative llikelihood
Apop_stopif(gsl_isnan(out), longjmp(i->bad_eval_jump, -1),
......@@ -332,13 +332,11 @@ static double negshell (const gsl_vector *beta, void * in){
if(gsl_isnan(this_ll)){
Apop_stopif(!i->model->log_likelihood && penalty > f_val, i->want_info='n',
1, "Your model's p evaluates as %g, and your penalty is %g, for an "
"adjusted p of %g. Please make sure that this is positive, perhaps by "
"rescaling your penalty. Continuing, but will not report covariance or other "
"log likelihood-based statistics.\n", f_val, penalty, f_val-penalty);
1, "Model's p=%g, penalty=%g, for a negative adjusted p=%g. "
"Continuing, but can not report covariance or other "
"log likelihood-based statistics.", f_val, penalty, f_val-penalty);
Apop_stopif(1, apop_data_show(i->model->parameters); i->want_info='n',
1, "NaN resulted from the following value tried by the maximum likelihood system. "
"Tighten your constraint? Log of a negative p?\n");
1, "NaN resulted from the following value tried by the maximum likelihood system.");
}
i->best_ll = GSL_MAX(i->best_ll, this_ll);
}
......@@ -362,7 +360,7 @@ Finally, reverse the sign, since the GSL is trying to minimize instead of maximi
/* In all cases, negshell gets called first, so the constraint is already
checked and beta nudged accordingly.
if(i->model->constraint && i->model->constraint(i->data, i->model))
apop_data_pack(i->model->parameters, (gsl_vector *) beta, .all_pages='y'); */
apop_data_pack(i->model->parameters, (gsl_vector *) beta); */
apop_score_type ms = apop_score_vtable_get(i->model);
if (ms) ms(i->data, g, i->model);
else {
......@@ -391,13 +389,10 @@ static int setup_starting_point(apop_mle_settings *mp, gsl_vector *x){
return 0;
}
void add_info_criteria(apop_data *d, apop_model *m, apop_model *est, double ll){
void add_info_criteria(apop_data *d, apop_model *m, apop_model *est, double ll, int param_ct){
//Did the sending function save last value of f()?
if (!ll) ll = apop_log_likelihood(d, m);
Get_vmsizes(est->parameters); //tsize
int param_ct = tsize;
if (!est->info) est->info = apop_data_alloc();
apop_data_add_named_elmt(est->info, "log likelihood", ll);
double AIC = 2*param_ct - 2 *ll;
......@@ -424,7 +419,7 @@ static void auxinfo(apop_data *params, infostruct *i, int status, double ll){
}
if (!est->info) est->info = apop_data_alloc();
apop_data_add_named_elmt(est->info, "status", status);
if (i->want_info=='y') add_info_criteria(i->data, i->model, est, ll);
if (i->want_info=='y') add_info_criteria(i->data, i->model, est, ll, i->beta->size);
}
static void apop_maximum_likelihood_w_d(apop_data * data, infostruct *i){
......@@ -560,6 +555,9 @@ static void dim_cycle(apop_data *d, apop_model *est, infostruct info){
apop_mle_settings *mp = Apop_settings_get_group(est, apop_mle);
double tol = mp->dim_cycle_tolerance;
int betasize = info.beta->size;
Apop_settings_set(est, apop_mle, dim_cycle_tolerance, 0);//so sub-estimations won't use this function.
gsl_vector *paramv = apop_data_pack(est->parameters);
apop_model *full_est = NULL; //an alias
do {
if (mp->verbose){
if (!(iteration++))
......@@ -567,7 +565,6 @@ static void dim_cycle(apop_data *d, apop_model *est, infostruct info){
printf("Iteration %i:\n", iteration);
}
last_ll = this_ll;
Apop_settings_set(est, apop_mle, dim_cycle_tolerance, 0);//so sub-estimations won't use this function.
for (int i=0; i< betasize; i++){
gsl_vector_set(info.beta, i, GSL_NAN);
apop_data_unpack(info.beta, est->parameters);
......@@ -575,14 +572,17 @@ static void dim_cycle(apop_data *d, apop_model *est, infostruct info){
apop_prep(d, m_onedim);
apop_maximum_likelihood(d, m_onedim);
gsl_vector_set(info.beta, i, m_onedim->parameters->vector->data[0]);
apop_model *full_est = apop_model_fix_params_get_base(m_onedim);//points to est, but filled.
full_est = apop_model_fix_params_get_base(m_onedim);//points to est, but filled.
this_ll = get_ll(d, full_est);//only used on the last iteration.
if (mp->verbose) printf("(%i):%g\t", i, this_ll), fflush(NULL);
apop_model_free(m_onedim);
}
if (mp->verbose) printf("\n");
apop_data_pack(full_est->parameters, paramv);
Apop_settings_add(est, apop_mle, starting_pt, paramv->data);
} while (fabs(this_ll - last_ll) > tol);
Apop_settings_set(est, apop_mle, dim_cycle_tolerance, tol);
gsl_vector_free(paramv);
}
void get_desires(apop_model *m, infostruct *info){
......@@ -658,7 +658,7 @@ void apop_maximum_likelihood(apop_data * data, apop_model *dist){
.path = mp->path,
.model = dist};
get_desires(dist, &info);
info.beta = apop_data_pack(dist->parameters, NULL, .all_pages='y');
info.beta = apop_data_pack(dist->parameters);
if (info.path) *info.path = apop_data_alloc();
if (setup_starting_point(mp, info.beta)) return;
info.model->data = data;
......@@ -740,12 +740,12 @@ apop_varad_head(apop_model *, apop_estimate_restart){
memcpy(prm0->starting_pt, prm->starting_pt, sizeof(double)*size);
}
else if (!strcmp(starting_pt, "np")){
v = apop_data_pack(copy->parameters, NULL, .all_pages='y');
v = apop_data_pack(copy->parameters);
prm->starting_pt = malloc(sizeof(double)*v->size);
memcpy(prm->starting_pt, v->data, sizeof(double)*v->size);
}
else if (e->parameters){//"ep" or default.
v = apop_data_pack(e->parameters, NULL, .all_pages='y');
v = apop_data_pack(e->parameters);
prm->starting_pt = malloc(sizeof(double)*v->size);
memcpy(prm->starting_pt, v->data, sizeof(double)*v->size);
}
......@@ -796,7 +796,7 @@ static double annealing_distance(void *xin, void *yin) {
static void annealing_check_constraint(infostruct *i){
apop_data_unpack(i->beta, i->model->parameters);
if (i->model->constraint && i->model->constraint(i->data, i->model))
apop_data_pack(i->model->parameters, i->beta, .all_pages='y');
apop_data_pack(i->model->parameters, i->beta);
}
static void annealing_step(const gsl_rng * r, void *in, double step_size){
......@@ -868,7 +868,7 @@ static void apop_annealing(infostruct *i){
.t_min = mp->t_min};
gsl_rng *r = mp->rng ? mp->rng : apop_rng_get_thread();
//these two are done at apop_maximum_likelihood:
//i->beta = apop_data_pack(ep->parameters, NULL, .all_pages='y');
//i->beta = apop_data_pack(ep->parameters);
//setup_starting_point(mp, i->beta);
int betasize = i->beta->size;
int apopstatus = -1;
......
This diff is collapsed.
......@@ -137,7 +137,7 @@ numbers: (1, 2, 3) and (-1.32, 0, 27) work identically.
A few examples:
\include "../tests/sort_example.c"
\include "sort_example.c"
\li This function uses the \ref designated syntax for inputs.
*/
......
......@@ -17,7 +17,7 @@ These functions simply take in a GSL vector and return its mean, variance, or ku
\see db_moments
For \ref apop_vector_var_m<tt>(vector, mean)</tt>, <tt>mean</tt> is the mean of the
For <tt>apop_vector_var_m(vector, mean)</tt>, <tt>mean</tt> is the mean of the
vector. This saves the trouble of re-calculating the mean if you've
already done so. E.g.,
......@@ -667,11 +667,6 @@ apop_data *apop_data_correlation(const apop_data *in){
return out;
}
static void get_one_row(apop_data *p, apop_data *a_row, int i, int min, int max){
for (int j=min; j< max; j++)
apop_data_set(a_row, 0, j-min, apop_data_get(p, i, j));
}
/** Kullback-Leibler divergence.
This measure of the divergence of one distribution from another
......@@ -682,9 +677,7 @@ static void get_one_row(apop_data *p, apop_data *a_row, int i, int min, int max)
\param from the \f$p\f$ in the above formula. (No default; must not be \c NULL)
\param to the \f$q\f$ in the above formula. (No default; must not be \c NULL)
\param draw_ct If I do the calculation via random draws, how many? (Default = 1e5)
\param rng A \c gsl_rng. If NULL, I'll take care of the RNG; see \ref apop_rng_get_thread. (Default = \c NULL)
\param top deprecated synonym for \c from.
\param bottom deprecated synonym for \c to.
\param rng A \c gsl_rng. If \c NULL or number of threads is greater than 1, I'll take care of the RNG; see \ref apop_rng_get_thread. (Default = \c NULL)
This function can take empirical histogram-type models (\ref apop_pmf) or continuous models like \ref apop_loess
or \ref apop_normal.
......@@ -703,21 +696,19 @@ If neither distribution is a PMF, then I'll take \c draw_ct random draws from \c
\li This function uses the \ref designated syntax for inputs.
*/
#ifdef APOP_NO_VARIADIC
double apop_kl_divergence(apop_model *from, apop_model *to, int draw_ct, gsl_rng *rng, apop_model *top, apop_model *bottom){
double apop_kl_divergence(apop_model *from, apop_model *to, int draw_ct, gsl_rng *rng){
#else
apop_varad_head(double, apop_kl_divergence){
apop_model * apop_varad_var(top, NULL);
apop_model * apop_varad_var(bottom, NULL);
apop_model * apop_varad_var(from, (top ? top : NULL));
apop_model * apop_varad_var(to, (bottom ? bottom : NULL));
Apop_assert(from, "The first model is NULL.");
Apop_assert(to, "The second model is NULL.");
apop_model * apop_varad_var(from, NULL);
apop_model * apop_varad_var(to, NULL);
Apop_stopif(!from, return NAN, 0, "The first model is NULL; returning NaN.");
Apop_stopif(!to, return NAN, 0, "The second model is NULL.");
double apop_varad_var(draw_ct, 1e5);
gsl_rng * apop_varad_var(rng, apop_rng_get_thread());
return apop_kl_divergence_base(from, to, draw_ct, rng, top, bottom);
return apop_kl_divergence_base(from, to, draw_ct, rng);
}
double apop_kl_divergence_base(apop_model *from, apop_model *to, int draw_ct, gsl_rng *rng, apop_model *top, apop_model *bottom){
double apop_kl_divergence_base(apop_model *from, apop_model *to, int draw_ct, gsl_rng *rng){
#endif
double div = 0;
Apop_notify(3, "p(from)\tp(to)\tfrom*log(from/to)\n");
......@@ -725,36 +716,33 @@ apop_varad_head(double, apop_kl_divergence){
apop_data *p = from->data;
apop_pmf_settings *settings = Apop_settings_get_group(from, apop_pmf);
Get_vmsizes(p); //maxsize
apop_data *a_row = apop_data_alloc(vsize, (msize1 ? 1 : 0), msize2);
for (int i=0; i < (vsize ? vsize : msize1); i++){
OMP_for_reduce (+:div, int i=0; i < maxsize; i++){
double pi = p->weights ? gsl_vector_get(p->weights, i)/settings->total_weight : 1./maxsize;
if (!pi){
Apop_notify(3, "0\t--\t0");
continue;
} //else:
get_one_row(p, a_row, i, firstcol, msize2);
double qi = apop_p(a_row, to);
Apop_assert_c(qi, GSL_NEGINF, 1, "The PMFs aren't synced: to-distribution has a value where "
"from-distribution doesn't (which produces infinite divergence).");
double qi = apop_p(Apop_r(p, i), to);
Apop_notify(3,"%g\t%g\t%g", pi, qi, pi ? pi * log(pi/qi):0);
Apop_stopif(!qi, div+=GSL_NEGINF; break, 1, "The PMFs aren't synced: from-distribution has a value where "
"to-distribution doesn't (which produces infinite divergence).");
div += pi * log(pi/qi);
}