doscan.c
上传用户:qaz666999
上传日期:2022-08-06
资源大小:2570k
文件大小:20k
- /* __gmp_doscan -- formatted input internals.
- THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
- CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
- FUTURE GNU MP RELEASES.
- Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
- This file is part of the GNU MP Library.
- The GNU MP Library is free software; you can redistribute it and/or modify
- it under the terms of the GNU Lesser General Public License as published by
- the Free Software Foundation; either version 3 of the License, or (at your
- option) any later version.
- The GNU MP Library is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
- License for more details.
- You should have received a copy of the GNU Lesser General Public License
- along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
- #define _GNU_SOURCE /* for DECIMAL_POINT in langinfo.h */
- #include "config.h"
- #if HAVE_STDARG
- #include <stdarg.h>
- #else
- #include <varargs.h>
- #endif
- #include <ctype.h>
- #include <stddef.h> /* for ptrdiff_t */
- #include <stdio.h>
- #include <stdlib.h> /* for strtol */
- #include <string.h>
- #if HAVE_LANGINFO_H
- #include <langinfo.h> /* for nl_langinfo */
- #endif
- #if HAVE_LOCALE_H
- #include <locale.h> /* for localeconv */
- #endif
- #if HAVE_INTTYPES_H
- # include <inttypes.h> /* for intmax_t */
- #else
- # if HAVE_STDINT_H
- # include <stdint.h>
- # endif
- #endif
- #if HAVE_SYS_TYPES_H
- #include <sys/types.h> /* for quad_t */
- #endif
- #include "gmp.h"
- #include "gmp-impl.h"
- /* Change this to "#define TRACE(x) x" for some traces. */
- #define TRACE(x)
- /* General:
- It's necessary to parse up the format string to recognise the GMP
- extra types F, Q and Z. Other types and conversions are passed
- across to the standard sscanf or fscanf via funs->scan, for ease of
- implementation. This is essential in the case of something like glibc
- %p where the pointer format isn't actually documented.
- Because funs->scan doesn't get the whole input it can't put the right
- values in for %n, so that's handled in __gmp_doscan. Neither sscanf
- nor fscanf directly indicate how many characters were read, so an
- extra %n is appended to each run for that. For fscanf this merely
- supports our %n output, but for sscanf it lets funs->step move us
- along the input string.
- Whitespace and literal matches in the format string, including %%,
- are handled directly within __gmp_doscan. This is reasonably
- efficient, and avoids some suspicious behaviour observed in various
- system libc's. GLIBC 2.2.4 for instance returns 0 on
- sscanf(" ", " x")
- or
- sscanf(" ", " x%d",&n)
- whereas we think they should return EOF, since end-of-string is
- reached when a match of "x" is required.
- For standard % conversions, funs->scan is called once for each
- conversion. If we had vfscanf and vsscanf and could rely on their
- fixed text matching behaviour then we could call them with multiple
- consecutive standard conversions. But plain fscanf and sscanf work
- fine, and parsing one field at a time shouldn't be too much of a
- slowdown.
- gmpscan:
- gmpscan reads a gmp type. It's only used from one place, but is a
- separate subroutine to avoid a big chunk of complicated code in the
- middle of __gmp_doscan. Within gmpscan a couple of loopbacks make it
- possible to share code for parsing integers, rationals and floats.
- In gmpscan normally one char of lookahead is maintained, but when width
- is reached that stops, on the principle that an fgetc/ungetc of a char
- past where we're told to stop would be undesirable. "chars" is how many
- characters have been read so far, including the current c. When
- chars==width and another character is desired then a jump is done to the
- "convert" stage. c is invalid and mustn't be unget'ed in this case;
- chars is set to width+1 to indicate that.
- gmpscan normally returns the number of characters read. -1 means an
- invalid field, -2 means EOF reached before any matching characters
- were read.
- For hex floats, the mantissa part is passed to mpf_set_str, then the
- exponent is applied with mpf_mul_exp or mpf_div_2exp. This is easier
- than teaching mpf_set_str about an exponent factor (ie. 2) differing
- from the mantissa radix point factor (ie. 16). mpf_mul_exp and
- mpf_div_2exp will preserve the application requested precision, so
- nothing in that respect is lost by making this a two-step process.
- Matching and errors:
- C99 7.19.6.2 paras 9 and 10 say an input item is read as the longest
- string which is a match for the appropriate type, or a prefix of a
- match. With that done, if it's only a prefix then the result is a
- matching failure, ie. invalid input.
- This rule seems fairly clear, but doesn't seem to be universally
- applied in system C libraries. Even GLIBC doesn't seem to get it
- right, insofar as it seems to accept some apparently invalid forms.
- Eg. glibc 2.3.1 accepts "0x" for a "%i", where a reading of the
- standard would suggest a non-empty sequence of digits should be
- required after an "0x".
- A footnote to 7.19.6.2 para 17 notes how this input item reading can
- mean inputs acceptable to strtol are not acceptable to fscanf. We
- think this confirms our reading of "0x" as invalid.
- Clearly gmp_sscanf could backtrack to a longest input which was a
- valid match for a given item, but this is not done, since C99 says
- sscanf is identical to fscanf, so we make gmp_sscanf identical to
- gmp_fscanf.
- Types:
- C99 says "ll" is for long long, and "L" is for long double floats.
- Unfortunately in GMP 4.1.1 we documented the two as equivalent. This
- doesn't affect us directly, since both are passed through to plain
- scanf. It seems wisest not to try to enforce the C99 rule. This is
- consistent with what we said before, though whether it actually
- worked was always up to the C library.
- Alternatives:
- Consideration was given to using separate code for gmp_fscanf and
- gmp_sscanf. The sscanf case could zip across a string doing literal
- matches or recognising digits in gmpscan, rather than making a
- function call fun->get per character. The fscanf could use getc
- rather than fgetc too, which might help those systems where getc is a
- macro or otherwise inlined. But none of this scanning and converting
- will be particularly fast, so the two are done together to keep it a
- little simpler for now.
- Various multibyte string issues are not addressed, for a start C99
- scanf says the format string is multibyte. Since we pass %c, %s and
- %[ to the system scanf, they might do multibyte reads already, but
- it's another matter whether or not that can be used, since our digit
- and whitespace parsing is only unibyte. The plan is to quietly
- ignore multibyte locales for now. This is not as bad as it sounds,
- since GMP is presumably used mostly on numbers, which can be
- perfectly adequately treated in plain ASCII.
- */
- struct gmp_doscan_params_t {
- int base;
- int ignore;
- char type;
- int width;
- };
- #define GET(c)
- do {
- ASSERT (chars <= width);
- chars++;
- if (chars > width)
- goto convert;
- (c) = (*funs->get) (data);
- } while (0)
- /* store into "s", extending if necessary */
- #define STORE(c)
- do {
- ASSERT (s_upto <= s_alloc);
- if (s_upto >= s_alloc)
- {
- size_t s_alloc_new = s_alloc + S_ALLOC_STEP;
- s = __GMP_REALLOCATE_FUNC_TYPE (s, s_alloc, s_alloc_new, char);
- s_alloc = s_alloc_new;
- }
- s[s_upto++] = c;
- } while (0)
- #define S_ALLOC_STEP 512
- static int
- gmpscan (const struct gmp_doscan_funs_t *funs, void *data,
- const struct gmp_doscan_params_t *p, void *dst)
- {
- int chars, c, base, first, width, seen_point, seen_digit, hexfloat;
- size_t s_upto, s_alloc, hexexp;
- char *s;
- int invalid = 0;
- TRACE (printf ("gmpscann"));
- ASSERT (p->type == 'F' || p->type == 'Q' || p->type == 'Z');
- c = (*funs->get) (data);
- if (c == EOF)
- return -2;
- chars = 1;
- first = 1;
- seen_point = 0;
- width = (p->width == 0 ? INT_MAX-1 : p->width);
- base = p->base;
- s_alloc = S_ALLOC_STEP;
- s = __GMP_ALLOCATE_FUNC_TYPE (s_alloc, char);
- s_upto = 0;
- hexfloat = 0;
- hexexp = 0;
- another:
- seen_digit = 0;
- if (c == '-')
- {
- STORE (c);
- goto get_for_sign;
- }
- else if (c == '+')
- {
- /* don't store '+', it's not accepted by mpz_set_str etc */
- get_for_sign:
- GET (c);
- }
- if (base == 0)
- {
- base = 10; /* decimal if no base indicator */
- if (c == '0')
- {
- seen_digit = 1; /* 0 alone is a valid number */
- if (p->type != 'F')
- base = 8; /* leading 0 is octal, for non-floats */
- STORE (c);
- GET (c);
- if (c == 'x' || c == 'X')
- {
- base = 16;
- seen_digit = 0; /* must have digits after an 0x */
- if (p->type == 'F') /* don't pass 'x' to mpf_set_str_point */
- hexfloat = 1;
- else
- STORE (c);
- GET (c);
- }
- }
- }
- digits:
- for (;;)
- {
- if (base == 16)
- {
- if (! isxdigit (c))
- break;
- }
- else
- {
- if (! isdigit (c))
- break;
- if (base == 8 && (c == '8' || c == '9'))
- break;
- }
- seen_digit = 1;
- STORE (c);
- GET (c);
- }
- if (first)
- {
- /* decimal point */
- if (p->type == 'F' && ! seen_point)
- {
- /* For a multi-character decimal point, if the first character is
- present then all of it must be, otherwise the input is
- considered invalid. */
- const char *point = GMP_DECIMAL_POINT;
- int pc = (unsigned char) *point++;
- if (c == pc)
- {
- for (;;)
- {
- STORE (c);
- GET (c);
- pc = (unsigned char) *point++;
- if (pc == ' ')
- break;
- if (c != pc)
- goto set_invalid;
- }
- seen_point = 1;
- goto digits;
- }
- }
- /* exponent */
- if (p->type == 'F')
- {
- if (hexfloat && (c == 'p' || c == 'P'))
- {
- hexexp = s_upto; /* exponent location */
- base = 10; /* exponent in decimal */
- goto exponent;
- }
- else if (! hexfloat && (c == 'e' || c == 'E'))
- {
- exponent:
- /* must have at least one digit in the mantissa, just an exponent
- is not good enough */
- if (! seen_digit)
- goto set_invalid;
- do_second:
- first = 0;
- STORE (c);
- GET (c);
- goto another;
- }
- }
- /* denominator */
- if (p->type == 'Q' && c == '/')
- {
- /* must have at least one digit in the numerator */
- if (! seen_digit)
- goto set_invalid;
- /* now look for at least one digit in the denominator */
- seen_digit = 0;
- /* allow the base to be redetermined for "%i" */
- base = p->base;
- goto do_second;
- }
- }
- convert:
- if (! seen_digit)
- {
- set_invalid:
- invalid = 1;
- goto done;
- }
- if (! p->ignore)
- {
- STORE (' ');
- TRACE (printf (" convert "%s"n", s));
- /* We ought to have parsed out a valid string above, so just test
- mpz_set_str etc with an ASSERT. */
- switch (p->type) {
- case 'F':
- {
- mpf_ptr f = (mpf_ptr) dst;
- if (hexexp != 0)
- s[hexexp] = ' ';
- ASSERT_NOCARRY (mpf_set_str (f, s, hexfloat ? 16 : 10));
- if (hexexp != 0)
- {
- char *dummy;
- long exp;
- exp = strtol (s + hexexp + 1, &dummy, 10);
- if (exp >= 0)
- mpf_mul_2exp (f, f, (unsigned long) exp);
- else
- mpf_div_2exp (f, f, - (unsigned long) exp);
- }
- }
- break;
- case 'Q':
- ASSERT_NOCARRY (mpq_set_str ((mpq_ptr) dst, s, p->base));
- break;
- case 'Z':
- ASSERT_NOCARRY (mpz_set_str ((mpz_ptr) dst, s, p->base));
- break;
- default:
- ASSERT (0);
- /*FALLTHRU*/
- break;
- }
- }
- done:
- ASSERT (chars <= width+1);
- if (chars != width+1)
- {
- (*funs->unget) (c, data);
- TRACE (printf (" ungetc %d, to give %d charsn", c, chars-1));
- }
- chars--;
- (*__gmp_free_func) (s, s_alloc);
- if (invalid)
- {
- TRACE (printf (" invalidn"));
- return -1;
- }
- TRACE (printf (" return %d chars (cf width %d)n", chars, width));
- return chars;
- }
- /* Read and discard whitespace, if any. Return number of chars skipped.
- Whitespace skipping never provokes the EOF return from __gmp_doscan, so
- it's not necessary to watch for EOF from funs->get, */
- static int
- skip_white (const struct gmp_doscan_funs_t *funs, void *data)
- {
- int c;
- int ret = 0;
- do
- {
- c = (funs->get) (data);
- ret++;
- }
- while (isspace (c));
- (funs->unget) (c, data);
- ret--;
- TRACE (printf (" skip white %dn", ret));
- return ret;
- }
- int
- __gmp_doscan (const struct gmp_doscan_funs_t *funs, void *data,
- const char *orig_fmt, va_list orig_ap)
- {
- struct gmp_doscan_params_t param;
- va_list ap;
- char *alloc_fmt;
- const char *fmt, *this_fmt, *end_fmt;
- size_t orig_fmt_len, alloc_fmt_size, len;
- int new_fields, new_chars;
- char fchar;
- int fields = 0;
- int chars = 0;
- TRACE (printf ("__gmp_doscan "%s"n", orig_fmt);
- if (funs->scan == (gmp_doscan_scan_t) sscanf)
- printf (" s="%s"n", * (const char **) data));
- /* Don't modify orig_ap, if va_list is actually an array and hence call by
- reference. It could be argued that it'd be more efficient to leave
- callers to make a copy if they care, but doing so here is going to be a
- very small part of the total work, and we may as well keep applications
- out of trouble. */
- va_copy (ap, orig_ap);
- /* Parts of the format string are going to be copied so that a " %n" can
- be appended. alloc_fmt is some space for that. orig_fmt_len+4 will be
- needed if fmt consists of a single "%" specifier, but otherwise is an
- overestimate. We're not going to be very fast here, so use
- __gmp_allocate_func rather than TMP_ALLOC. */
- orig_fmt_len = strlen (orig_fmt);
- alloc_fmt_size = orig_fmt_len + 4;
- alloc_fmt = __GMP_ALLOCATE_FUNC_TYPE (alloc_fmt_size, char);
- fmt = orig_fmt;
- end_fmt = orig_fmt + orig_fmt_len;
- for (;;)
- {
- next:
- fchar = *fmt++;
- if (fchar == ' ')
- break;
- if (isspace (fchar))
- {
- chars += skip_white (funs, data);
- continue;
- }
- if (fchar != '%')
- {
- int c;
- literal:
- c = (funs->get) (data);
- if (c != fchar)
- {
- (funs->unget) (c, data);
- if (c == EOF)
- {
- eof_no_match:
- if (fields == 0)
- fields = EOF;
- }
- goto done;
- }
- chars++;
- continue;
- }
- param.type = ' ';
- param.base = 0; /* for e,f,g,i */
- param.ignore = 0;
- param.width = 0;
- this_fmt = fmt-1;
- TRACE (printf (" this_fmt "%s"n", this_fmt));
- for (;;)
- {
- ASSERT (fmt <= end_fmt);
- fchar = *fmt++;
- switch (fchar) {
- case ' ': /* unterminated % sequence */
- ASSERT (0);
- goto done;
- case '%': /* literal % */
- goto literal;
- case '[': /* character range */
- fchar = *fmt++;
- if (fchar == '^')
- fchar = *fmt++;
- /* ']' allowed as the first char (possibly after '^') */
- if (fchar == ']')
- fchar = *fmt++;
- for (;;)
- {
- ASSERT (fmt <= end_fmt);
- if (fchar == ' ')
- {
- /* unterminated % sequence */
- ASSERT (0);
- goto done;
- }
- if (fchar == ']')
- break;
- fchar = *fmt++;
- }
- /*FALLTHRU*/
- case 'c': /* characters */
- case 's': /* string of non-whitespace */
- case 'p': /* pointer */
- libc_type:
- len = fmt - this_fmt;
- memcpy (alloc_fmt, this_fmt, len);
- alloc_fmt[len++] = '%';
- alloc_fmt[len++] = 'n';
- alloc_fmt[len] = ' ';
- TRACE (printf (" scan "%s"n", alloc_fmt);
- if (funs->scan == (gmp_doscan_scan_t) sscanf)
- printf (" s="%s"n", * (const char **) data));
- new_chars = -1;
- if (param.ignore)
- {
- new_fields = (*funs->scan) (data, alloc_fmt, &new_chars, NULL);
- ASSERT (new_fields == 0 || new_fields == EOF);
- }
- else
- {
- void *arg = va_arg (ap, void *);
- new_fields = (*funs->scan) (data, alloc_fmt, arg, &new_chars);
- ASSERT (new_fields==0 || new_fields==1 || new_fields==EOF);
- if (new_fields == 0)
- goto done; /* invalid input */
- if (new_fields == 1)
- ASSERT (new_chars != -1);
- }
- TRACE (printf (" new_fields %d new_chars %dn",
- new_fields, new_chars));
- if (new_fields == -1)
- goto eof_no_match; /* EOF before anything matched */
- /* Under param.ignore, when new_fields==0 we don't know if
- it's a successful match or an invalid field. new_chars
- won't have been assigned if it was an invalid field. */
- if (new_chars == -1)
- goto done; /* invalid input */
- chars += new_chars;
- (*funs->step) (data, new_chars);
- increment_fields:
- if (! param.ignore)
- fields++;
- goto next;
- case 'd': /* decimal */
- case 'u': /* decimal */
- param.base = 10;
- goto numeric;
- case 'e': /* float */
- case 'E': /* float */
- case 'f': /* float */
- case 'g': /* float */
- case 'G': /* float */
- case 'i': /* integer with base marker */
- numeric:
- if (param.type != 'F' && param.type != 'Q' && param.type != 'Z')
- goto libc_type;
- chars += skip_white (funs, data);
- new_chars = gmpscan (funs, data, ¶m,
- param.ignore ? NULL : va_arg (ap, void*));
- if (new_chars == -2)
- goto eof_no_match;
- if (new_chars == -1)
- goto done;
- ASSERT (new_chars >= 0);
- chars += new_chars;
- goto increment_fields;
- case 'a': /* glibc allocate string */
- case ''': /* glibc digit groupings */
- break;
- case 'F': /* mpf_t */
- case 'j': /* intmax_t */
- case 'L': /* long long */
- case 'q': /* quad_t */
- case 'Q': /* mpq_t */
- case 't': /* ptrdiff_t */
- case 'z': /* size_t */
- case 'Z': /* mpz_t */
- set_type:
- param.type = fchar;
- break;
- case 'h': /* short or char */
- if (param.type != 'h')
- goto set_type;
- param.type = 'H'; /* internal code for "hh" */
- break;
- goto numeric;
- case 'l': /* long, long long, double or long double */
- if (param.type != 'l')
- goto set_type;
- param.type = 'L'; /* "ll" means "L" */
- break;
- case 'n':
- if (! param.ignore)
- {
- void *p;
- p = va_arg (ap, void *);
- TRACE (printf (" store %%n to %pn", p));
- switch (param.type) {
- case ' ': * (int *) p = chars; break;
- case 'F': mpf_set_si ((mpf_ptr) p, (long) chars); break;
- case 'H': * (char *) p = chars; break;
- case 'h': * (short *) p = chars; break;
- #if HAVE_INTMAX_T
- case 'j': * (intmax_t *) p = chars; break;
- #else
- case 'j': ASSERT_FAIL (intmax_t not available); break;
- #endif
- case 'l': * (long *) p = chars; break;
- #if HAVE_QUAD_T && HAVE_LONG_LONG
- case 'q':
- ASSERT_ALWAYS (sizeof (quad_t) == sizeof (long long));
- /*FALLTHRU*/
- #else
- case 'q': ASSERT_FAIL (quad_t not available); break;
- #endif
- #if HAVE_LONG_LONG
- case 'L': * (long long *) p = chars; break;
- #else
- case 'L': ASSERT_FAIL (long long not available); break;
- #endif
- case 'Q': mpq_set_si ((mpq_ptr) p, (long) chars, 1L); break;
- #if HAVE_PTRDIFF_T
- case 't': * (ptrdiff_t *) p = chars; break;
- #else
- case 't': ASSERT_FAIL (ptrdiff_t not available); break;
- #endif
- case 'z': * (size_t *) p = chars; break;
- case 'Z': mpz_set_si ((mpz_ptr) p, (long) chars); break;
- default: ASSERT (0); break;
- }
- }
- goto next;
- case 'o':
- param.base = 8;
- goto numeric;
- case 'x':
- case 'X':
- param.base = 16;
- goto numeric;
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- param.width = 0;
- do {
- param.width = param.width * 10 + (fchar-'0');
- fchar = *fmt++;
- } while (isdigit (fchar));
- fmt--; /* unget the non-digit */
- break;
- case '*':
- param.ignore = 1;
- break;
- default:
- /* something invalid in a % sequence */
- ASSERT (0);
- goto next;
- }
- }
- }
- done:
- (*__gmp_free_func) (alloc_fmt, alloc_fmt_size);
- return fields;
- }