apop_linear_constraint.c 8.85 KB
Newer Older
1 2 3 4

/** \file apop_linear_constraint.c 
  \c apop_linear_constraint finds a point that meets a set of linear constraints. This takes a lot of machinery, so it gets its own file.

5
Copyright (c) 2007, 2009 by Ben Klemens.  Licensed under the GPLv2; see COPYING.  
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
*/
#include "apop_internal.h"

static double magnitude(gsl_vector *v){
    double out;
    gsl_blas_ddot(v, v, &out);
    return out;
}

static void find_nearest_point(gsl_vector *V, double k, gsl_vector *B, gsl_vector *out){
    /* Find X such that BX =K and there is an S such that X + SB=V. */
    double S=0; //S = (BV-K)/B'B.
    gsl_blas_ddot(B, V, &S);
    S   -= k;
assert(!gsl_isnan(S));
    S   /= magnitude(B);
assert(!gsl_isnan(S));
    gsl_vector_memcpy(out, B); //X = -SB +V
    gsl_vector_scale(out, -S);
    gsl_vector_add(out, V);
assert(!gsl_isnan(gsl_vector_get(out,0)));
}

static int binds(gsl_vector const *v, double k, gsl_vector const *b, double margin){
    double d;
    gsl_blas_ddot(v, b, &d);
    return d < k + margin;
}

static double trig_bit(gsl_vector *dimv, gsl_vector *otherv, double off_by){
    double theta, costheta, dot, out;
    gsl_blas_ddot(dimv, otherv, &dot);
    costheta = dot/(magnitude(dimv)*magnitude(otherv));
    theta = acos(costheta);
    out = off_by/gsl_pow_2(sin(theta)); 
    return out;
}

/* The hard part is when your candidate point does not satisfy other
   constraints, so you need to translate the point until it meets the new hypersurface.
   How far is that? Project beta onto the new surface, and find the
   distance between that projection and the original surface. Then
   translate beta toward the original surface by that amount. The
   projection of the translated beta onto the new surface now also touches the old
   surface.
   */
static void get_candiate(gsl_vector *beta, apop_data *constraint, int current, gsl_vector *candidate, double margin){
    double k, ck, off_by, s;
    gsl_vector *pseudobeta        = NULL;
    gsl_vector *pseudocandidate   = NULL;
    gsl_vector *pseudocandidate2  = NULL;
    gsl_vector *fix               = NULL;
    gsl_vector *cc = Apop_rv(constraint, current);
    ck = gsl_vector_get(constraint->vector, current);
    find_nearest_point(beta, ck, cc, candidate);
    for (size_t i=0; i< constraint->vector->size; i++){
        if (i!=current){
            gsl_vector *other = Apop_rv(constraint, i);
            k   =apop_data_get(constraint, i, -1);
            if (binds(candidate, k, other, margin)){
                if (!pseudobeta){
                    pseudobeta          = gsl_vector_alloc(beta->size);
                    gsl_vector_memcpy(pseudobeta, beta);
                    pseudocandidate     = gsl_vector_alloc(beta->size);
                    pseudocandidate2    = gsl_vector_alloc(beta->size);
                    fix                 = gsl_vector_alloc(beta->size);
                }
                find_nearest_point(pseudobeta, k, other, pseudocandidate);
                find_nearest_point(pseudocandidate, ck, cc, pseudocandidate2);
                off_by  = apop_vector_distance(pseudocandidate, pseudocandidate2);
                s       = trig_bit(cc, other, off_by);
                gsl_vector_memcpy(fix, cc);
                gsl_vector_scale(fix, magnitude(cc));
                gsl_vector_scale(fix, s);
                gsl_vector_add(pseudobeta, fix);
                find_nearest_point(pseudobeta, k, other, candidate);
                gsl_vector_memcpy(pseudobeta, candidate);
            } 
        }
    }
    if (fix){ 
        gsl_vector_free(fix); gsl_vector_free(pseudobeta);
        gsl_vector_free(pseudocandidate); gsl_vector_free(pseudocandidate2);
    }
}

/** This is designed to be called from within the constraint method of your \ref
apop_model. Just write the constraint vector+matrix and this will do the rest.
94
See \ref constr for detailed discussion. 
95 96 97 98 99 100 101 102 103 104 105 106
 
\param beta    The proposed vector about to be tested. No default, must not be \c NULL.

\param constraint  
A vector/matrix pair [v | m1 m2 ... mn] where each row is interpreted as a less-than inequality:
\f$v < m1x1+ m2x2 + ... + mnxn\f$.  For example, say your constraints are 
\f$3 < 2x + 4y - 7z\f$ and \f$y\f$ is positive, i.e. \f$0 < y\f$.
Allocate and fill the matrix representing these two constraints via:
\code
apop_data *constr = apop_data_falloc((2,2,3), 3,  2, 4, 7,
                                              0,  0, 1, 0);
\endcode
107
. Default: each elements is greater than zero. For three parameters this would be equivalent to setting
108 109 110 111 112 113 114 115
\code
apop_data *constr = apop_data_falloc((3,3,3), 0,  1, 0, 0,
                                              0,  0, 1, 0,
                                              0,  0, 0, 1);
\endcode

\param margin If zero, then this is a >= constraint, otherwise I will return a point this amount within the borders. You could try \c GSL_DBL_EPSILON, which is the smallest value a \c double can hold, or something like 1e-3. Default = 0.

116 117
\return The penalty: the distance between beta and the closest point that meets the constraints.
If the constraint is met, the penalty is zero.
118 119
If the constraint is not met, this \c beta is shifted by \c margin (Euclidean distance) to meet the constraints. 

120 121 122 123
  \li If your \ref apop_data has more structure than a vector, try \ref apop_data_pack to pack it
into a vector. This is what \ref apop_maximum_likelihood does.
  \li The function doesn't check for odd cases like coplanar constraints.
  \li This function uses the \ref designated syntax for inputs.
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
*/
#ifdef APOP_NO_VARIADIC
long double apop_linear_constraint(gsl_vector *beta, apop_data * constraint, double margin){
#else
apop_varad_head(long double, apop_linear_constraint){
    static threadlocal apop_data *default_constraint;
    gsl_vector * apop_varad_var(beta, NULL);
    double apop_varad_var(margin, 0);
    apop_data * apop_varad_var(constraint, NULL);
    Apop_assert(beta, "The vector to be checked is NULL.");
    if (!constraint){
        if (default_constraint && beta->size != default_constraint->vector->size){
            apop_data_free(default_constraint);
            default_constraint = NULL;
        }
        if (!default_constraint){
            default_constraint = apop_data_alloc(0,beta->size, beta->size);
            default_constraint->vector = gsl_vector_calloc(beta->size);
            gsl_matrix_set_identity(default_constraint->matrix);
        }
        constraint = default_constraint;
    }
    return apop_linear_constraint_base(beta, constraint, margin);
}

 long double apop_linear_constraint_base(gsl_vector *beta, apop_data * constraint, double margin){
#endif
    static threadlocal gsl_vector *closest_pt = NULL;
    static threadlocal gsl_vector *candidate  = NULL;
    static threadlocal gsl_vector *fix        = NULL;
    int constraint_ct = constraint->matrix->size1;
    int bindlist[constraint_ct];
    int i, bound = 0;
    /* For added efficiency, keep a scratch vector or two on hand. */
    if (closest_pt==NULL || closest_pt->size != constraint->matrix->size2){
        closest_pt  = gsl_vector_calloc(beta->size);
        candidate   = gsl_vector_alloc(beta->size);
        fix         = gsl_vector_alloc(beta->size);
        closest_pt->data[0] = GSL_NEGINF;
    }
    /* Do any constraints bind?*/
    memset(bindlist, 0, sizeof(int)*constraint_ct);
    for (i=0; i< constraint_ct; i++){
        gsl_vector *c = Apop_rv(constraint, i);
        bound       +=
        bindlist[i] = binds(beta, apop_data_get(constraint, i, -1), c, margin);
    }
    if (!bound) return 0;   //All constraints met.
    gsl_vector *base_beta = apop_vector_copy(beta);
    /* With only one constraint, it's easy. */
    if (constraint->vector->size==1){
        gsl_vector *c = Apop_rv(constraint, 0);
        find_nearest_point(base_beta, constraint->vector->data[0], c, beta);
        goto add_margin;
    }
    /* Finally, multiple constraints, at least one binding.
       For each surface, pick a candidate point.
       Check whether the point meets the other constraints. 
            if not, translate to a new point that works.
            [Do this by maintaining a pseudopoint that translates by the
            necessary amount.]
        Once you have a candidate point, compare its distance to the
        current favorite; keep the best.
     */
    for (i=0; i< constraint_ct; i++)
        if (bindlist[i]){
            get_candiate(base_beta, constraint, i, candidate, margin);
            if(apop_vector_distance(base_beta, candidate) < apop_vector_distance(base_beta, closest_pt))
                gsl_vector_memcpy(closest_pt, candidate);
        }
    gsl_vector_memcpy(beta, closest_pt);
add_margin:
    for (i=0; i< constraint_ct; i++){
        if(bindlist[i]){
            gsl_vector_memcpy(fix, Apop_rv(constraint, i));
            gsl_vector_scale(fix, magnitude(fix));
            gsl_vector_scale(fix, margin);
            gsl_vector_add(beta, fix);
        }
    }
    long double out = apop_vector_distance(base_beta, beta);
    gsl_vector_free(base_beta);
    return out;
}