iscannum.c 11.8 KB
Newer Older
1
/* Copyright (C) 2001-2019 Artifex Software, Inc.
2
   All Rights Reserved.
3

4 5 6
   This software is provided AS-IS with no warranty, either express or
   implied.

7 8 9 10 11
   This software is distributed under license and may not be copied,
   modified or distributed except as expressly authorized under the terms
   of the license contained in the file LICENSE in this distribution.

   Refer to licensing information at http://www.artifex.com or contact
12 13
   Artifex Software, Inc.,  1305 Grant Avenue - Suite 200, Novato,
   CA 94945, U.S.A., +1(415)492-9861, for further information.
14 15
*/

16

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
/* Number scanner for Ghostscript interpreter */
#include "math_.h"
#include "ghost.h"
#include "ierrors.h"
#include "scommon.h"
#include "iscan.h"
#include "iscannum.h"		/* defines interface */
#include "scanchar.h"
#include "store.h"

/*
 * Warning: this file has a "spaghetti" control structure.  But since this
 * code accounts for over 10% of the execution time of some PostScript
 * files, this is one of the few places we feel this is justified.
 */

/*
 * Scan a number.  If the number consumes the entire string, return 0;
 * if not, set *psp to the first character beyond the number and return 1.
 */
int
scan_number(const byte * str, const byte * end, int sign,
39
            ref * pref, const byte ** psp, int scanner_options)
40 41 42 43 44 45 46 47 48 49 50
{
    const byte *sp = str;
#define GET_NEXT(cvar, sp, end_action)\
  if (sp >= end) { end_action; } else cvar = *sp++

    /*
     * Powers of 10 up to 6 can be represented accurately as
     * a single-precision float.
     */
#define NUM_POWERS_10 6
    static const float powers_10[NUM_POWERS_10 + 1] = {
51
        1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6
52 53
    };
    static const double neg_powers_10[NUM_POWERS_10 + 1] = {
54
        1e0, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6
55 56
    };

57
    ps_int ival;
58 59 60 61
    double dval;
    int exp10;
    int code = 0;
    int c, d;
62 63
    ps_uint max_scan; /* max signed or unsigned int */
    ps_int max_ps_int_scan, min_ps_int_scan;
64 65 66 67
    const byte *const decoder = scan_char_decoder;
#define IS_DIGIT(d, c)\
  ((d = decoder[c]) < 10)
#define WOULD_OVERFLOW(val, d, maxv)\
68
  (val >= maxv / 10 && (val > maxv / 10 || d > (int64_t)(maxv % 10)))
69

70
    GET_NEXT(c, sp, return_error(gs_error_syntaxerror));
71
    if (!IS_DIGIT(d, c)) {
72
        if (c != '.')
73
            return_error(gs_error_syntaxerror);
74
        /* Might be a number starting with '.'. */
75
        GET_NEXT(c, sp, return_error(gs_error_syntaxerror));
76
        if (!IS_DIGIT(d, c))
77
            return_error(gs_error_syntaxerror);
78 79
        ival = 0;
        goto i2r;
80 81 82 83 84 85 86
    }
    /* Accumulate an integer in ival. */
    /* Do up to 4 digits without a loop, */
    /* since we know this can't overflow and since */
    /* most numbers have 4 (integer) digits or fewer. */
    ival = d;
    if (end - sp >= 3) {	/* just check once */
87 88 89 90 91 92 93 94 95 96 97 98 99 100
        if (!IS_DIGIT(d, (c = *sp))) {
            sp++;
            goto ind;
        }
        ival = ival * 10 + d;
        if (!IS_DIGIT(d, (c = sp[1]))) {
            sp += 2;
            goto ind;
        }
        ival = ival * 10 + d;
        sp += 3;
        if (!IS_DIGIT(d, (c = sp[-1])))
            goto ind;
        ival = ival * 10 + d;
101
    }
102 103 104 105 106 107

    max_ps_int_scan = scanner_options & SCAN_CPSI_MODE ? MAX_PS_INT32 : MAX_PS_INT;
    min_ps_int_scan = scanner_options & SCAN_CPSI_MODE ? MIN_PS_INT32 : MIN_PS_INT;

    max_scan = scanner_options & SCAN_PDF_UNSIGNED && sign >= 0 ? ~((ps_int)0) : max_ps_int_scan;

108
    for (;; ival = ival * 10 + d) {
109 110 111
        GET_NEXT(c, sp, goto iret);
        if (!IS_DIGIT(d, c))
            break;
112 113
        if (WOULD_OVERFLOW(((ps_uint)ival), d, max_scan)) {
            if (ival == max_ps_int_scan / 10 && d == (max_ps_int_scan % 10) + 1 && sign < 0) {
114
                GET_NEXT(c, sp, c = EOFC);
115
                dval = -(double)min_ps_int_scan;
116 117 118 119
                if (c == 'e' || c == 'E') {
                    exp10 = 0;
                    goto fs;
                } else if (c == '.') {
120
                    GET_NEXT(c, sp, c = EOFC);
121 122
                    exp10 = 0;
                    goto fd;
123
                } else if (!IS_DIGIT(d, c)) {
124
                    ival = min_ps_int_scan;
125 126 127
                    break;
                }
            } else
128
                dval = (double)ival;
129
            goto l2d;
130
        }
131 132 133
    }
  ind:				/* We saw a non-digit while accumulating an integer in ival. */
    switch (c) {
134 135 136 137 138 139
        case '.':
            GET_NEXT(c, sp, c = EOFC);
            goto i2r;
        default:
            *psp = sp;
            code = 1;
140 141
            break;
        case EOFC:
142 143 144 145 146
            break;
        case 'e':
        case 'E':
            if (sign < 0)
                ival = -ival;
147
            dval = (double)ival;
148 149 150 151 152
            exp10 = 0;
            goto fe;
        case '#':
            {
                const int radix = ival;
153
                ps_int uval = 0, imax;
154

155
                if (sign || radix < min_radix || radix > max_radix)
156
                    return_error(gs_error_syntaxerror);
157 158 159
                /* Avoid multiplies for power-of-2 radix. */
                if (!(radix & (radix - 1))) {
                    int shift;
160

161 162
                    switch (radix) {
                        case 2:
163
                            shift = 1, imax = MAX_PS_UINT >> 1;
164 165
                            break;
                        case 4:
166
                            shift = 2, imax = MAX_PS_UINT >> 2;
167 168
                            break;
                        case 8:
169
                            shift = 3, imax = MAX_PS_UINT >> 3;
170 171
                            break;
                        case 16:
172
                            shift = 4, imax = MAX_PS_UINT >> 4;
173 174
                            break;
                        case 32:
175
                            shift = 5, imax = MAX_PS_UINT >> 5;
176 177
                            break;
                        default:	/* can't happen */
178
                            return_error(gs_error_rangecheck);
179 180 181 182 183 184 185 186 187 188
                    }
                    for (;; uval = (uval << shift) + d) {
                        GET_NEXT(c, sp, break);
                        d = decoder[c];
                        if (d >= radix) {
                            *psp = sp;
                            code = 1;
                            break;
                        }
                        if (uval > imax)
189
                            return_error(gs_error_limitcheck);
190 191
                    }
                } else {
192
                    ps_int irem = MAX_PS_UINT % radix;
193

194
                    imax = MAX_PS_UINT / radix;
195 196 197 198 199 200 201 202 203 204 205
                    for (;; uval = uval * radix + d) {
                        GET_NEXT(c, sp, break);
                        d = decoder[c];
                        if (d >= radix) {
                            *psp = sp;
                            code = 1;
                            break;
                        }
                        if (uval >= imax &&
                            (uval > imax || d > irem)
                            )
206
                            return_error(gs_error_limitcheck);
207 208
                    }
                }
209 210 211 212 213 214 215 216
                if (scanner_options & SCAN_CPSI_MODE) {
                    ps_uint32 int1 = 0;
                    int1 |= (uval & 0xffffffff);
                    make_int(pref, (ps_int)((ps_int32)int1));
                }
                else
                    make_int(pref, uval);

217 218
                return code;
            }
219 220
    }
iret:
221 222 223 224 225 226
    if (scanner_options & SCAN_CPSI_MODE) {
        make_int(pref, (sign < 0 ? (ps_int32)-ival : (ps_int32)ival));
    }
    else {
        make_int(pref, (sign < 0 ? (ps_int)-ival : (ps_int)ival));
    }
227 228 229 230 231 232
    return code;

    /* Accumulate a double in dval. */
l2d:
    exp10 = 0;
    for (;;) {
233 234 235 236
        dval = dval * 10 + d;
        GET_NEXT(c, sp, c = EOFC);
        if (!IS_DIGIT(d, c))
            break;
237 238
    }
    switch (c) {
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
        case '.':
            GET_NEXT(c, sp, c = EOFC);
            exp10 = 0;
            goto fd;
        default:
            *psp = sp;
            code = 1;
            /* falls through */
        case EOFC:
            if (sign < 0)
                dval = -dval;
            goto rret;
        case 'e':
        case 'E':
            exp10 = 0;
            goto fs;
        case '#':
256
            return_error(gs_error_syntaxerror);
257 258 259 260 261 262
    }

    /* We saw a '.' while accumulating an integer in ival. */
i2r:
    exp10 = 0;
    while (IS_DIGIT(d, c) || c == '-') {
263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
        /*
         * PostScript gives an error on numbers with a '-' following a '.'
         * Adobe Acrobat Reader (PDF) apparently doesn't treat this as an
         * error. Experiments show that the numbers following the '-' are
         * ignored, so we swallow the fractional part. SCAN_PDF_INV_NUM
         *  enables this compatibility kloodge.
         */
        if (c == '-') {
            if ((SCAN_PDF_INV_NUM & scanner_options) == 0)
                break;
            do {
                GET_NEXT(c, sp, c = EOFC);
            } while (IS_DIGIT(d, c));
            break;
        }
        if (WOULD_OVERFLOW(ival, d, max_int)) {
279
            dval = (double)ival;
280 281 282 283 284
            goto fd;
        }
        ival = ival * 10 + d;
        exp10--;
        GET_NEXT(c, sp, c = EOFC);
285 286
    }
    if (sign < 0)
287
        ival = -ival;
288 289
    /* Take a shortcut for the common case */
    if (!(c == 'e' || c == 'E' || exp10 < -NUM_POWERS_10)) {	/* Check for trailing garbage */
290 291 292 293
        if (c != EOFC)
            *psp = sp, code = 1;
        make_real(pref, ival * neg_powers_10[-exp10]);
        return code;
294
    }
295
    dval = (double)ival;
296 297 298 299 300
    goto fe;

    /* Now we are accumulating a double in dval. */
fd:
    while (IS_DIGIT(d, c)) {
301 302 303
        dval = dval * 10 + d;
        exp10--;
        GET_NEXT(c, sp, c = EOFC);
304 305 306
    }
fs:
    if (sign < 0)
307
        dval = -dval;
308 309 310
fe:
    /* Now dval contains the value, negated if necessary. */
    switch (c) {
311 312 313 314 315
        case 'e':
        case 'E':
            {			/* Check for a following exponent. */
                int esign = 0;
                int iexp;
316

317
                GET_NEXT(c, sp, return_error(gs_error_syntaxerror));
318 319 320
                switch (c) {
                    case '-':
                        esign = 1;
321
                        /* fall through */
322
                    case '+':
323
                        GET_NEXT(c, sp, return_error(gs_error_syntaxerror));
324 325 326
                }
                /* Scan the exponent.  We limit it arbitrarily to 999. */
                if (!IS_DIGIT(d, c))
327
                    return_error(gs_error_syntaxerror);
328 329 330 331 332 333 334 335 336
                iexp = d;
                for (;; iexp = iexp * 10 + d) {
                    GET_NEXT(c, sp, break);
                    if (!IS_DIGIT(d, c)) {
                        *psp = sp;
                        code = 1;
                        break;
                    }
                    if (iexp > 99)
337
                        return_error(gs_error_limitcheck);
338 339 340 341 342 343 344 345 346 347 348 349
                }
                if (esign)
                    exp10 -= iexp;
                else
                    exp10 += iexp;
                break;
            }
        default:
            *psp = sp;
            code = 1;
        case EOFC:
            ;
350 351 352
    }
    /* Compute dval * 10^exp10. */
    if (exp10 > 0) {
353 354 355 356 357
        while (exp10 > NUM_POWERS_10)
            dval *= powers_10[NUM_POWERS_10],
                exp10 -= NUM_POWERS_10;
        if (exp10 > 0)
            dval *= powers_10[exp10];
358
    } else if (exp10 < 0) {
359 360 361 362 363
        while (exp10 < -NUM_POWERS_10)
            dval /= powers_10[NUM_POWERS_10],
                exp10 += NUM_POWERS_10;
        if (exp10 < 0)
            dval /= powers_10[-exp10];
364 365 366 367 368 369 370
    }
    /*
     * Check for an out-of-range result.  Currently we don't check for
     * absurdly large numbers of digits in the accumulation loops,
     * but we should.
     */
    if (dval >= 0) {
371
        if (dval > MAX_FLOAT)
372
            return_error(gs_error_limitcheck);
373
    } else {
374
        if (dval < -MAX_FLOAT)
375
            return_error(gs_error_limitcheck);
376 377 378 379 380
    }
rret:
    make_real(pref, dval);
    return code;
}