apop_update.c 11.7 KB
 Jerome Benoit committed Aug 15, 2014 1 2 3  /** \file The \ref apop_update function. */  Jerome Benoit committed Jul 10, 2015 4 /* Copyright (c) 2006--2009, 2014 by Ben Klemens. Licensed under the GPLv2; see COPYING. */  Jerome Benoit committed Aug 15, 2014 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124  #include "apop_internal.h" #include /* This file in four parts: --an apop_model named product, purpose-built for apop_update to send to apop_model_metropolis --apop_mcmc settings and their defaults. --apop_update and its equipment, which has three cases: --conjugates, in which case see the functions --call Metropolis */ /* This will be used by apop_update to send to apop_mcmc below. To set it up, add a more pointer to an array of two models, the prior and likelihood. The total likelihood of a data point is (likelihood these parameters are drawn from prior)*(likelihood of these parameters and the data set using the likelihood fn) */ static long double product_ll(apop_data *d, apop_model *m){ apop_model **pl = m->more; gsl_vector *v = apop_data_pack(m->parameters); apop_data_unpack(v, pl[1]->parameters); gsl_vector_free(v); return apop_log_likelihood(m->parameters, pl[0]) + apop_log_likelihood(d, pl[1]); } static long double product_constraint(apop_data *data, apop_model *m){ apop_model **pl = m->more; gsl_vector *v = apop_data_pack(m->parameters); apop_data_unpack(v, pl[1]->parameters); gsl_vector_free(v); return pl[1]->constraint(data, pl[1]); } apop_model *product = &(apop_model){"product of two models", .log_likelihood=product_ll, .constraint=product_constraint}; ///////////the conjugate table static apop_model *betabinom(apop_data *data, apop_model *prior, apop_model *likelihood){ apop_model *outp = apop_model_copy(prior); if (!data && likelihood->parameters){ double n = likelihood->parameters->vector->data[0]; double p = likelihood->parameters->vector->data[1]; *gsl_vector_ptr(outp->parameters->vector, 0) += n*p; *gsl_vector_ptr(outp->parameters->vector, 1) += n*(1-p); } else { gsl_vector *hits = Apop_cv(data, 1); gsl_vector *misses = Apop_cv(data, 0); *gsl_vector_ptr(outp->parameters->vector, 0) += apop_sum(hits); *gsl_vector_ptr(outp->parameters->vector, 1) += apop_sum(misses); } return outp; } double countup(double in){return in!=0;} static apop_model *betabernie(apop_data *data, apop_model *prior, apop_model *likelihood){ apop_model *outp = apop_model_copy(prior); Get_vmsizes(data);//tsize double sum = apop_map_sum(data, .fn_d=countup, .part='a'); *gsl_vector_ptr(outp->parameters->vector, 0) += sum; *gsl_vector_ptr(outp->parameters->vector, 1) += tsize - sum; return outp; } static apop_model *gammaexpo(apop_data *data, apop_model *prior, apop_model *likelihood){ apop_model *outp = apop_model_copy(prior); Get_vmsizes(data); //maxsize *gsl_vector_ptr(outp->parameters->vector, 0) += maxsize; apop_data_set(outp->parameters, 1, .val=1./ (1./apop_data_get(outp->parameters, 1) + (data->matrix ? apop_matrix_sum(data->matrix) : 0) + (data->vector ? apop_sum(data->vector) : 0))); return outp; } static apop_model *gammapoisson(apop_data *data, apop_model *prior, apop_model *likelihood){ /* Posterior alpha = alpha_0 + sum x; posterior beta = beta_0/(beta_0*n + 1) */ apop_model *outp = apop_model_copy(prior); Get_vmsizes(data); //vsize, msize1,maxsize *gsl_vector_ptr(outp->parameters->vector, 0) += (vsize ? apop_sum(data->vector): 0) + (msize1 ? apop_matrix_sum(data->matrix): 0); double *beta = gsl_vector_ptr(outp->parameters->vector, 1); *beta = *beta/(*beta * maxsize + 1); return outp; } static apop_model *normnorm(apop_data *data, apop_model *prior, apop_model *likelihood){ /* output \f$(\mu, \sigma) = (\frac{\mu_0}{\sigma_0^2} + \frac{\sum_{i=1}^n x_i}{\sigma^2})/(\frac{1}{\sigma_0^2} + \frac{n}{\sigma^2}), (\frac{1}{\sigma_0^2} + \frac{n}{\sigma^2})^{-1}\f$ That is, the output is weighted by the number of data points for the likelihood. If you give me a parametrized normal, with no data, then I'll take the weight to be \f$n=1\f$. */ double mu_like, var_like; long int n; apop_model *outp = apop_model_copy(prior); apop_prep(data, outp); long double mu_pri = prior->parameters->vector->data[0]; long double var_pri = gsl_pow_2(prior->parameters->vector->data[1]); if (!data && likelihood->parameters){ mu_like = likelihood->parameters->vector->data[0]; var_like = gsl_pow_2(likelihood->parameters->vector->data[1]); n = 1; } else { n = data->matrix->size1 * data->matrix->size2; apop_matrix_mean_and_var(data->matrix, &mu_like, &var_like); } gsl_vector_set(outp->parameters->vector, 0, (mu_pri/var_pri + n*mu_like/var_like)/(1/var_pri + n/var_like)); gsl_vector_set(outp->parameters->vector, 1, pow((1/var_pri + n/var_like), -.5)); return outp; } /** Take in a prior and likelihood distribution, and output a posterior distribution.  Jerome Benoit committed Jul 10, 2015 125 126 127 \li This function first checks a table of conjugate distributions for the pair you sent in. If the models are listed on the table, then the function returns a corresponding closed-form model with updated parameters.  Jerome Benoit committed Aug 15, 2014 128   Jerome Benoit committed Jul 10, 2015 129 130 131 132 133 \li If the parameters aren't in the table of conjugate, and the prior distribution has a \c p or \c log_likelihood element, then use \ref apop_model_metropolis to generate the posterior. If you expect MCMC to run, you may add an \ref apop_mcmc_settings group to your prior to control the details of the search. See also the \ref apop_model_metropolis documentation.  Jerome Benoit committed Aug 15, 2014 134   Jerome Benoit committed Jul 10, 2015 135 136 137 138 \li If the prior does not have a \c p or \c log_likelihood but does have a \c draw element, then make draws from the prior and weight them by the \c p given by the likelihood distribution. This is not a rejection sampling method, so the burnin is ignored.  Jerome Benoit committed Aug 15, 2014 139   Jerome Benoit committed Jul 10, 2015 140 141 142 143 144 145 146 147 148 149 150 151 \param data The input data, that will be used by the likelihood function (default = \c NULL.) \param prior The prior \ref apop_model. If the system needs to estimate the posterior via MCMC, this needs to have a \c log_likelihood or \c p method. (No default, must not be \c NULL.) \param likelihood The likelihood \ref apop_model. If the system needs to estimate the posterior via MCMC, this needs to have a \c log_likelihood or \c p method (ll preferred). (No default, must not be \c NULL.) \param rng A \c gsl_rng, already initialized (e.g., via \ref apop_rng_alloc). (default: an RNG from \ref apop_rng_get_thread) \return an \ref apop_model struct representing the posterior, with updated parameters. \li In all cases, the output is a \ref apop_model that can be used as the input to this function, so you can chain Bayesian updating procedures. \li Here are the conjugate distributions currently defined:  Jerome Benoit committed Aug 15, 2014 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 
Likelihood Notes \ref apop_binomial "Binomial" \ref apop_bernoulli "Bernoulli" \ref apop_gamma "Gamma" Gamma likelihood represents the distribution of \f$\lambda^{-1}\f$, not plain \f$\lambda\f$ \ref apop_normal "Normal" Assumes prior with fixed \f$\sigma\f$; updates distribution for \f$\mu\f$ \ref apop_poisson "Poisson" Uses sum and size of the data  Jerome Benoit committed Jul 10, 2015 169 170 171 172 173 174 175 Here is a test function that compares the output via conjugate table and via Metropolis-Hastings sampling: \include test_updating.c \li The conjugate table is stored using a vtable; see \ref vtables for details. If you are writing a new vtable entry, the typedef new functions must conform to and the hash used for lookups are:  Jerome Benoit committed Aug 15, 2014 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190  \code typedef apop_model *(*apop_update_type)(apop_data *, apop_model , apop_model); #define apop_update_hash(m1, m2) ((size_t)(m1).draw + (size_t)((m2).log_likelihood ? (m2).log_likelihood : (m2).p)*33) \endcode \li This function uses the \ref designated syntax for inputs. */ #ifdef APOP_NO_VARIADIC apop_model * apop_update(apop_data *data, apop_model *prior, apop_model *likelihood, gsl_rng *rng){ #else apop_varad_head(apop_model *, apop_update){ apop_data *apop_varad_var(data, NULL); apop_model *apop_varad_var(prior, NULL); apop_model *apop_varad_var(likelihood, NULL);  Jerome Benoit committed Jul 10, 2015 191  gsl_rng *apop_varad_var(rng, apop_rng_get_thread(-1));  Jerome Benoit committed Aug 15, 2014 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208  return apop_update_base(data, prior, likelihood, rng); } apop_model * apop_update_base(apop_data *data, apop_model *prior, apop_model *likelihood, gsl_rng *rng){ #endif static int setup=0; if (!(setup++)){ apop_update_vtable_add(betabinom, apop_beta, apop_binomial); apop_update_vtable_add(betabernie, apop_beta, apop_bernoulli); apop_update_vtable_add(gammaexpo, apop_gamma, apop_exponential); apop_update_vtable_add(gammapoisson, apop_gamma, apop_poisson); apop_update_vtable_add(normnorm, apop_normal, apop_normal); } apop_update_type conj = apop_update_vtable_get(prior, likelihood); if (conj) return conj(data, prior, likelihood); apop_mcmc_settings *s = apop_settings_get_group(prior, apop_mcmc);  Jerome Benoit committed Jul 10, 2015 209 210  apop_prep(NULL, prior); //probably a no-op apop_prep(data, likelihood); //probably a no-op  Jerome Benoit committed Sep 08, 2014 211 212 213  gsl_vector *pack = apop_data_pack(likelihood->parameters); int tsize = pack->size; gsl_vector_free(pack);  Jerome Benoit committed Aug 15, 2014 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239  Apop_stopif(prior->dsize != tsize, return apop_model_copy(&(apop_model){.error='d'}), 0, "Size of a draw from the prior does not match " "the size of the likelihood's parameters (%i != %i).%s", prior->dsize, tsize, (tsize > prior->dsize) ? " Perhaps use apop_model_fix_params to reduce the " "likelihood's parameter count?" : ""); if (prior->p || prior->log_likelihood){ apop_model *p = apop_model_copy(product); //pending revision, a memory leak: p->more = malloc(sizeof(apop_model*)*2); ((apop_model**)p->more)[0] = apop_model_copy(prior); ((apop_model**)p->more)[1] = apop_model_copy(likelihood); p->more_size = sizeof(apop_model*) * 2; p->parameters = apop_data_alloc(prior->dsize); p->data = data; if (s) apop_settings_copy_group(p, prior, "apop_mcmc"); apop_model *out = apop_model_metropolis(data, rng, p); return out; } Apop_stopif(!prior->draw, return NULL, 0, "prior does not have a .p, .log_likelihood, or .draw element. I am stumped. Returning NULL."); if (!s) s = Apop_model_add_group(prior, apop_mcmc);  Jerome Benoit committed Sep 08, 2014 240  gsl_vector *draw = gsl_vector_alloc(tsize);  Jerome Benoit committed Aug 15, 2014 241 242 243  apop_data *out = apop_data_alloc(s->periods, tsize); out->weights = gsl_vector_alloc(s->periods);  Jerome Benoit committed Sep 08, 2014 244 245  apop_draw(draw->data, rng, prior); //set starting point. apop_data_unpack(draw, likelihood->parameters);  Jerome Benoit committed Aug 15, 2014 246 247 248  for (int i=0; i< s->periods; i++){ newdraw:  Jerome Benoit committed Sep 08, 2014 249 250  apop_draw(draw->data, rng, prior); apop_data_unpack(draw, likelihood->parameters);  Jerome Benoit committed Aug 15, 2014 251 252 253 254 255 256 257 258 259 260 261 262 263 264  long double p = apop_p(data, likelihood); Apop_notify(3, "p=%Lg for parameters:\t", p); if (apop_opts.verbose >=3) apop_data_print(likelihood->parameters); Apop_stopif(gsl_isnan(p), goto newdraw, 1, "Trouble evaluating the " "likelihood function at vector beginning with %g. " "Throwing it out and trying again.\n" , likelihood->parameters->vector->data[0]); apop_data_pack(likelihood->parameters, Apop_rv(out, i)); gsl_vector_set(out->weights, i, p); } apop_model *outp = apop_estimate(out, apop_pmf);  Jerome Benoit committed Sep 08, 2014 265  gsl_vector_free(draw);  Jerome Benoit committed Aug 15, 2014 266 267  return outp; }