bn_mp_exptmod_fast.c 8.36 KB
Newer Older
1
#include "tommath_private.h"
2 3 4 5 6 7 8 9 10 11
#ifdef BN_MP_EXPTMOD_FAST_C
/* LibTomMath, multiple-precision integer library -- Tom St Denis
 *
 * LibTomMath is a library that provides multiple-precision
 * integer arithmetic as well as number theoretic functionality.
 *
 * The library was designed directly after the MPI library by
 * Michael Fromberger but has been written from scratch with
 * additional optimizations in place.
 *
12
 * SPDX-License-Identifier: Unlicense
13 14 15 16 17 18 19 20 21 22 23
 */

/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
 *
 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
 * The value of k changes based on the size of the exponent.
 *
 * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
 */

#ifdef MP_LOW_MEM
24
#   define TAB_SIZE 32
25
#else
26
#   define TAB_SIZE 256
27 28
#endif

29
int mp_exptmod_fast(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode)
30
{
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
   mp_int  M[TAB_SIZE], res;
   mp_digit buf, mp;
   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;

   /* use a pointer to the reduction algorithm.  This allows us to use
    * one of many reduction algorithms without modding the guts of
    * the code with if statements everywhere.
    */
   int (*redux)(mp_int *x, const mp_int *n, mp_digit rho);

   /* find window size */
   x = mp_count_bits(X);
   if (x <= 7) {
      winsize = 2;
   } else if (x <= 36) {
      winsize = 3;
   } else if (x <= 140) {
      winsize = 4;
   } else if (x <= 450) {
      winsize = 5;
   } else if (x <= 1303) {
      winsize = 6;
   } else if (x <= 3529) {
      winsize = 7;
   } else {
      winsize = 8;
   }
58 59

#ifdef MP_LOW_MEM
60 61 62
   if (winsize > 5) {
      winsize = 5;
   }
63 64
#endif

65 66 67
   /* init M array */
   /* init first cell */
   if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) {
68
      return err;
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
   }

   /* now init the second half of the array */
   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
      if ((err = mp_init_size(&M[x], P->alloc)) != MP_OKAY) {
         for (y = 1<<(winsize-1); y < x; y++) {
            mp_clear(&M[y]);
         }
         mp_clear(&M[1]);
         return err;
      }
   }

   /* determine and setup reduction code */
   if (redmode == 0) {
#ifdef BN_MP_MONTGOMERY_SETUP_C
      /* now setup montgomery  */
      if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY) {
         goto LBL_M;
      }
89
#else
90 91
      err = MP_VAL;
      goto LBL_M;
92 93
#endif

94
      /* automatically pick the comba one if available (saves quite a few calls/ifs) */
95
#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
96
      if ((((P->used * 2) + 1) < (int)MP_WARRAY) &&
97
          (P->used < (1 << ((CHAR_BIT * sizeof(mp_word)) - (2 * DIGIT_BIT))))) {
98 99
         redux = fast_mp_montgomery_reduce;
      } else
100
#endif
101
      {
102
#ifdef BN_MP_MONTGOMERY_REDUCE_C
103 104
         /* use slower baseline Montgomery method */
         redux = mp_montgomery_reduce;
105
#else
106 107
         err = MP_VAL;
         goto LBL_M;
108
#endif
109 110
      }
   } else if (redmode == 1) {
111
#if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
112 113 114
      /* setup DR reduction for moduli of the form B**k - b */
      mp_dr_setup(P, &mp);
      redux = mp_dr_reduce;
115
#else
116 117
      err = MP_VAL;
      goto LBL_M;
118
#endif
119
   } else {
120
#if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
121 122 123 124 125
      /* setup DR reduction for moduli of the form 2**k - b */
      if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
         goto LBL_M;
      }
      redux = mp_reduce_2k;
126
#else
127 128
      err = MP_VAL;
      goto LBL_M;
129
#endif
130
   }
131

132 133 134 135
   /* setup result */
   if ((err = mp_init_size(&res, P->alloc)) != MP_OKAY) {
      goto LBL_M;
   }
136

137 138
   /* create M table
    *
139

140 141 142
    *
    * The first half of the table is not computed though accept for M[0] and M[1]
    */
143

144
   if (redmode == 0) {
145
#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
146 147 148 149 150 151 152 153 154
      /* now we need R mod m */
      if ((err = mp_montgomery_calc_normalization(&res, P)) != MP_OKAY) {
         goto LBL_RES;
      }

      /* now set M[1] to G * R mod m */
      if ((err = mp_mulmod(G, &res, P, &M[1])) != MP_OKAY) {
         goto LBL_RES;
      }
155
#else
156
      err = MP_VAL;
157
      goto LBL_RES;
158 159 160 161 162 163 164
#endif
   } else {
      mp_set(&res, 1uL);
      if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
         goto LBL_RES;
      }
   }
165

166 167
   /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
   if ((err = mp_copy(&M[1], &M[(size_t)1 << (winsize - 1)])) != MP_OKAY) {
168
      goto LBL_RES;
169 170 171 172 173
   }

   for (x = 0; x < (winsize - 1); x++) {
      if ((err = mp_sqr(&M[(size_t)1 << (winsize - 1)], &M[(size_t)1 << (winsize - 1)])) != MP_OKAY) {
         goto LBL_RES;
174
      }
175 176
      if ((err = redux(&M[(size_t)1 << (winsize - 1)], P, mp)) != MP_OKAY) {
         goto LBL_RES;
177
      }
178 179 180 181 182 183 184 185 186
   }

   /* create upper table */
   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
      if ((err = mp_mul(&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
         goto LBL_RES;
      }
      if ((err = redux(&M[x], P, mp)) != MP_OKAY) {
         goto LBL_RES;
187
      }
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
   }

   /* set initial mode and bit cnt */
   mode   = 0;
   bitcnt = 1;
   buf    = 0;
   digidx = X->used - 1;
   bitcpy = 0;
   bitbuf = 0;

   for (;;) {
      /* grab next digit as required */
      if (--bitcnt == 0) {
         /* if digidx == -1 we are out of digits so break */
         if (digidx == -1) {
            break;
         }
         /* read next digit and reset bitcnt */
         buf    = X->dp[digidx--];
         bitcnt = (int)DIGIT_BIT;
208 209
      }

210 211 212 213 214 215 216 217 218 219 220
      /* grab the next msb from the exponent */
      y     = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
      buf <<= (mp_digit)1;

      /* if the bit is zero and mode == 0 then we ignore it
       * These represent the leading zero bits before the first 1 bit
       * in the exponent.  Technically this opt is not required but it
       * does lower the # of trivial squaring/reductions used
       */
      if ((mode == 0) && (y == 0)) {
         continue;
221
      }
222 223 224 225 226 227 228 229 230 231

      /* if the bit is zero and mode == 1 then we square */
      if ((mode == 1) && (y == 0)) {
         if ((err = mp_sqr(&res, &res)) != MP_OKAY) {
            goto LBL_RES;
         }
         if ((err = redux(&res, P, mp)) != MP_OKAY) {
            goto LBL_RES;
         }
         continue;
232 233
      }

234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261
      /* else we add it to the window */
      bitbuf |= (y << (winsize - ++bitcpy));
      mode    = 2;

      if (bitcpy == winsize) {
         /* ok window is filled so square as required and multiply  */
         /* square first */
         for (x = 0; x < winsize; x++) {
            if ((err = mp_sqr(&res, &res)) != MP_OKAY) {
               goto LBL_RES;
            }
            if ((err = redux(&res, P, mp)) != MP_OKAY) {
               goto LBL_RES;
            }
         }

         /* then multiply */
         if ((err = mp_mul(&res, &M[bitbuf], &res)) != MP_OKAY) {
            goto LBL_RES;
         }
         if ((err = redux(&res, P, mp)) != MP_OKAY) {
            goto LBL_RES;
         }

         /* empty window and reset */
         bitcpy = 0;
         bitbuf = 0;
         mode   = 1;
262
      }
263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
   }

   /* if bits remain then square/multiply */
   if ((mode == 2) && (bitcpy > 0)) {
      /* square then multiply if the bit is set */
      for (x = 0; x < bitcpy; x++) {
         if ((err = mp_sqr(&res, &res)) != MP_OKAY) {
            goto LBL_RES;
         }
         if ((err = redux(&res, P, mp)) != MP_OKAY) {
            goto LBL_RES;
         }

         /* get next bit of the window */
         bitbuf <<= 1;
         if ((bitbuf & (1 << winsize)) != 0) {
            /* then multiply */
            if ((err = mp_mul(&res, &M[1], &res)) != MP_OKAY) {
               goto LBL_RES;
            }
            if ((err = redux(&res, P, mp)) != MP_OKAY) {
               goto LBL_RES;
            }
         }
287
      }
288 289 290 291 292 293 294 295 296 297 298
   }

   if (redmode == 0) {
      /* fixup result if Montgomery reduction is used
       * recall that any value in a Montgomery system is
       * actually multiplied by R mod n.  So we have
       * to reduce one more time to cancel out the factor
       * of R.
       */
      if ((err = redux(&res, P, mp)) != MP_OKAY) {
         goto LBL_RES;
299
      }
300 301 302 303 304 305 306
   }

   /* swap res with Y */
   mp_exch(&res, Y);
   err = MP_OKAY;
LBL_RES:
   mp_clear(&res);
307
LBL_M:
308 309 310 311 312
   mp_clear(&M[1]);
   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
      mp_clear(&M[x]);
   }
   return err;
313 314 315 316
}
#endif


317 318 319
/* ref:         HEAD -> master, tag: v1.1.0 */
/* git commit:  08549ad6bc8b0cede0b357a9c341c5c6473a9c55 */
/* commit time: 2019-01-28 20:32:32 +0100 */