gen-psqr.c
上传用户:qaz666999
上传日期:2022-08-06
资源大小:2570k
文件大小:17k
- /* Generate perfect square testing data.
- Copyright 2002, 2003, 2004 Free Software Foundation, Inc.
- This file is part of the GNU MP Library.
- The GNU MP Library is free software; you can redistribute it and/or modify
- it under the terms of the GNU Lesser General Public License as published by
- the Free Software Foundation; either version 3 of the License, or (at your
- option) any later version.
- The GNU MP Library is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
- License for more details.
- You should have received a copy of the GNU Lesser General Public License
- along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
- #include <stdio.h>
- #include <stdlib.h>
- #include "dumbmp.c"
- /* The aim of this program is to choose either mpn_mod_34lsub1 or mpn_mod_1
- (plus a PERFSQR_PP modulus), and generate tables indicating quadratic
- residues and non-residues modulo small factors of that modulus.
- For the usual 32 or 64 bit cases mpn_mod_34lsub1 gets used. That
- function exists specifically because 2^24-1 and 2^48-1 have nice sets of
- prime factors. For other limb sizes it's considered, but if it doesn't
- have good factors then mpn_mod_1 will be used instead.
- When mpn_mod_1 is used, the modulus PERFSQR_PP is created from a
- selection of small primes, chosen to fill PERFSQR_MOD_BITS of a limb,
- with that bit count chosen so (2*GMP_LIMB_BITS)*2^PERFSQR_MOD_BITS <=
- GMP_LIMB_MAX, allowing PERFSQR_MOD_IDX in mpn/generic/perfsqr.c to do its
- calculation within a single limb.
- In either case primes can be combined to make divisors. The table data
- then effectively indicates remainders which are quadratic residues mod
- all the primes. This sort of combining reduces the number of steps
- needed after mpn_mod_34lsub1 or mpn_mod_1, saving code size and time.
- Nothing is gained or lost in terms of detections, the same total fraction
- of non-residues will be identified.
- Nothing particularly sophisticated is attempted for combining factors to
- make divisors. This is probably a kind of knapsack problem so it'd be
- too hard to attempt anything completely general. For the usual 32 and 64
- bit limbs we get a good enough result just pairing the biggest and
- smallest which fit together, repeatedly.
- Another aim is to get powerful combinations, ie. divisors which identify
- biggest fraction of non-residues, and have those run first. Again for
- the usual 32 and 64 bits it seems good enough just to pair for big
- divisors then sort according to the resulting fraction of non-residues
- identified.
- Also in this program, a table sq_res_0x100 of residues modulo 256 is
- generated. This simply fills bits into limbs of the appropriate
- build-time GMP_LIMB_BITS each.
- */
- /* Normally we aren't using const in gen*.c programs, so as not to have to
- bother figuring out if it works, but using it with f_cmp_divisor and
- f_cmp_fraction avoids warnings from the qsort calls. */
- /* Same tests as gmp.h. */
- #if defined (__STDC__)
- || defined (__cplusplus)
- || defined (_AIX)
- || defined (__DECC)
- || (defined (__mips) && defined (_SYSTYPE_SVR4))
- || defined (_MSC_VER)
- || defined (_WIN32)
- #define HAVE_CONST 1
- #endif
- #if ! HAVE_CONST
- #define const
- #endif
- mpz_t *sq_res_0x100; /* table of limbs */
- int nsq_res_0x100; /* elements in sq_res_0x100 array */
- int sq_res_0x100_num; /* squares in sq_res_0x100 */
- double sq_res_0x100_fraction; /* sq_res_0x100_num / 256 */
- int mod34_bits; /* 3*GMP_NUMB_BITS/4 */
- int mod_bits; /* bits from PERFSQR_MOD_34 or MOD_PP */
- int max_divisor; /* all divisors <= max_divisor */
- int max_divisor_bits; /* ceil(log2(max_divisor)) */
- double total_fraction; /* of squares */
- mpz_t pp; /* product of primes, or 0 if mod_34lsub1 used */
- mpz_t pp_norm; /* pp shifted so NUMB high bit set */
- mpz_t pp_inverted; /* invert_limb style inverse */
- mpz_t mod_mask; /* 2^mod_bits-1 */
- char mod34_excuse[128]; /* why mod_34lsub1 not used (if it's not) */
- /* raw list of divisors of 2^mod34_bits-1 or pp, just to show in a comment */
- struct rawfactor_t {
- int divisor;
- int multiplicity;
- };
- struct rawfactor_t *rawfactor;
- int nrawfactor;
- /* factors of 2^mod34_bits-1 or pp and associated data, after combining etc */
- struct factor_t {
- int divisor;
- mpz_t inverse; /* 1/divisor mod 2^mod_bits */
- mpz_t mask; /* indicating squares mod divisor */
- double fraction; /* squares/total */
- };
- struct factor_t *factor;
- int nfactor; /* entries in use in factor array */
- int factor_alloc; /* entries allocated to factor array */
- int
- f_cmp_divisor (const void *parg, const void *qarg)
- {
- const struct factor_t *p, *q;
- p = parg;
- q = qarg;
- if (p->divisor > q->divisor)
- return 1;
- else if (p->divisor < q->divisor)
- return -1;
- else
- return 0;
- }
- int
- f_cmp_fraction (const void *parg, const void *qarg)
- {
- const struct factor_t *p, *q;
- p = parg;
- q = qarg;
- if (p->fraction > q->fraction)
- return 1;
- else if (p->fraction < q->fraction)
- return -1;
- else
- return 0;
- }
- /* Remove array[idx] by copying the remainder down, and adjust narray
- accordingly. */
- #define COLLAPSE_ELEMENT(array, idx, narray)
- do {
- mem_copyi ((char *) &(array)[idx],
- (char *) &(array)[idx+1],
- ((narray)-((idx)+1)) * sizeof (array[0]));
- (narray)--;
- } while (0)
- /* return n*2^p mod m */
- int
- mul_2exp_mod (int n, int p, int m)
- {
- int i;
- for (i = 0; i < p; i++)
- n = (2 * n) % m;
- return n;
- }
- /* return -n mod m */
- int
- neg_mod (int n, int m)
- {
- ASSERT (n >= 0 && n < m);
- return (n == 0 ? 0 : m-n);
- }
- /* Set "mask" to a value such that "mask & (1<<idx)" is non-zero if
- "-(idx<<mod_bits)" can be a square modulo m. */
- void
- square_mask (mpz_t mask, int m)
- {
- int p, i, r, idx;
- p = mul_2exp_mod (1, mod_bits, m);
- p = neg_mod (p, m);
- mpz_set_ui (mask, 0L);
- for (i = 0; i < m; i++)
- {
- r = (i * i) % m;
- idx = (r * p) % m;
- mpz_setbit (mask, (unsigned long) idx);
- }
- }
- void
- generate_sq_res_0x100 (int limb_bits)
- {
- int i, res;
- nsq_res_0x100 = (0x100 + limb_bits - 1) / limb_bits;
- sq_res_0x100 = (mpz_t *) xmalloc (nsq_res_0x100 * sizeof (*sq_res_0x100));
- for (i = 0; i < nsq_res_0x100; i++)
- mpz_init_set_ui (sq_res_0x100[i], 0L);
- for (i = 0; i < 0x100; i++)
- {
- res = (i * i) % 0x100;
- mpz_setbit (sq_res_0x100[res / limb_bits],
- (unsigned long) (res % limb_bits));
- }
- sq_res_0x100_num = 0;
- for (i = 0; i < nsq_res_0x100; i++)
- sq_res_0x100_num += mpz_popcount (sq_res_0x100[i]);
- sq_res_0x100_fraction = (double) sq_res_0x100_num / 256.0;
- }
- void
- generate_mod (int limb_bits, int nail_bits)
- {
- int numb_bits = limb_bits - nail_bits;
- int i, divisor;
- mpz_init_set_ui (pp, 0L);
- mpz_init_set_ui (pp_norm, 0L);
- mpz_init_set_ui (pp_inverted, 0L);
- /* no more than limb_bits many factors in a one limb modulus (and of
- course in reality nothing like that many) */
- factor_alloc = limb_bits;
- factor = (struct factor_t *) xmalloc (factor_alloc * sizeof (*factor));
- rawfactor = (struct rawfactor_t *)
- xmalloc (factor_alloc * sizeof (*rawfactor));
- if (numb_bits % 4 != 0)
- {
- strcpy (mod34_excuse, "GMP_NUMB_BITS % 4 != 0");
- goto use_pp;
- }
- max_divisor = 2*limb_bits;
- max_divisor_bits = log2_ceil (max_divisor);
- if (numb_bits / 4 < max_divisor_bits)
- {
- /* Wind back to one limb worth of max_divisor, if that will let us use
- mpn_mod_34lsub1. */
- max_divisor = limb_bits;
- max_divisor_bits = log2_ceil (max_divisor);
- if (numb_bits / 4 < max_divisor_bits)
- {
- strcpy (mod34_excuse, "GMP_NUMB_BITS / 4 too small");
- goto use_pp;
- }
- }
- {
- /* Can use mpn_mod_34lsub1, find small factors of 2^mod34_bits-1. */
- mpz_t m, q, r;
- int multiplicity;
- mod34_bits = (numb_bits / 4) * 3;
- /* mpn_mod_34lsub1 returns a full limb value, PERFSQR_MOD_34 folds it at
- the mod34_bits mark, adding the two halves for a remainder of at most
- mod34_bits+1 many bits */
- mod_bits = mod34_bits + 1;
- mpz_init_set_ui (m, 1L);
- mpz_mul_2exp (m, m, mod34_bits);
- mpz_sub_ui (m, m, 1L);
- mpz_init (q);
- mpz_init (r);
- for (i = 3; i <= max_divisor; i++)
- {
- if (! isprime (i))
- continue;
- mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
- if (mpz_sgn (r) != 0)
- continue;
- /* if a repeated prime is found it's used as an i^n in one factor */
- divisor = 1;
- multiplicity = 0;
- do
- {
- if (divisor > max_divisor / i)
- break;
- multiplicity++;
- mpz_set (m, q);
- mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
- }
- while (mpz_sgn (r) == 0);
- ASSERT (nrawfactor < factor_alloc);
- rawfactor[nrawfactor].divisor = i;
- rawfactor[nrawfactor].multiplicity = multiplicity;
- nrawfactor++;
- }
- mpz_clear (m);
- mpz_clear (q);
- mpz_clear (r);
- }
- if (nrawfactor <= 2)
- {
- mpz_t new_pp;
- sprintf (mod34_excuse, "only %d small factor%s",
- nrawfactor, nrawfactor == 1 ? "" : "s");
- use_pp:
- /* reset to two limbs of max_divisor, in case the mpn_mod_34lsub1 code
- tried with just one */
- max_divisor = 2*limb_bits;
- max_divisor_bits = log2_ceil (max_divisor);
- mpz_init (new_pp);
- nrawfactor = 0;
- mod_bits = MIN (numb_bits, limb_bits - max_divisor_bits);
- /* one copy of each small prime */
- mpz_set_ui (pp, 1L);
- for (i = 3; i <= max_divisor; i++)
- {
- if (! isprime (i))
- continue;
- mpz_mul_ui (new_pp, pp, (unsigned long) i);
- if (mpz_sizeinbase (new_pp, 2) > mod_bits)
- break;
- mpz_set (pp, new_pp);
- ASSERT (nrawfactor < factor_alloc);
- rawfactor[nrawfactor].divisor = i;
- rawfactor[nrawfactor].multiplicity = 1;
- nrawfactor++;
- }
- /* Plus an extra copy of one or more of the primes selected, if that
- still fits in max_divisor and the total in mod_bits. Usually only
- 3 or 5 will be candidates */
- for (i = nrawfactor-1; i >= 0; i--)
- {
- if (rawfactor[i].divisor > max_divisor / rawfactor[i].divisor)
- continue;
- mpz_mul_ui (new_pp, pp, (unsigned long) rawfactor[i].divisor);
- if (mpz_sizeinbase (new_pp, 2) > mod_bits)
- continue;
- mpz_set (pp, new_pp);
- rawfactor[i].multiplicity++;
- }
- mod_bits = mpz_sizeinbase (pp, 2);
- mpz_set (pp_norm, pp);
- while (mpz_sizeinbase (pp_norm, 2) < numb_bits)
- mpz_add (pp_norm, pp_norm, pp_norm);
- mpz_preinv_invert (pp_inverted, pp_norm, numb_bits);
- mpz_clear (new_pp);
- }
- /* start the factor array */
- for (i = 0; i < nrawfactor; i++)
- {
- int j;
- ASSERT (nfactor < factor_alloc);
- factor[nfactor].divisor = 1;
- for (j = 0; j < rawfactor[i].multiplicity; j++)
- factor[nfactor].divisor *= rawfactor[i].divisor;
- nfactor++;
- }
- combine:
- /* Combine entries in the factor array. Combine the smallest entry with
- the biggest one that will fit with it (ie. under max_divisor), then
- repeat that with the new smallest entry. */
- qsort (factor, nfactor, sizeof (factor[0]), f_cmp_divisor);
- for (i = nfactor-1; i >= 1; i--)
- {
- if (factor[i].divisor <= max_divisor / factor[0].divisor)
- {
- factor[0].divisor *= factor[i].divisor;
- COLLAPSE_ELEMENT (factor, i, nfactor);
- goto combine;
- }
- }
- total_fraction = 1.0;
- for (i = 0; i < nfactor; i++)
- {
- mpz_init (factor[i].inverse);
- mpz_invert_ui_2exp (factor[i].inverse,
- (unsigned long) factor[i].divisor,
- (unsigned long) mod_bits);
- mpz_init (factor[i].mask);
- square_mask (factor[i].mask, factor[i].divisor);
- /* fraction of possible squares */
- factor[i].fraction = (double) mpz_popcount (factor[i].mask)
- / factor[i].divisor;
- /* total fraction of possible squares */
- total_fraction *= factor[i].fraction;
- }
- /* best tests first (ie. smallest fraction) */
- qsort (factor, nfactor, sizeof (factor[0]), f_cmp_fraction);
- }
- void
- print (int limb_bits, int nail_bits)
- {
- int i;
- mpz_t mhi, mlo;
- printf ("/* This file generated by gen-psqr.c - DO NOT EDIT. */n");
- printf ("n");
- printf ("#if GMP_LIMB_BITS != %d || GMP_NAIL_BITS != %dn",
- limb_bits, nail_bits);
- printf ("Error, error, this data is for %d bit limb and %d bit nailn",
- limb_bits, nail_bits);
- printf ("#endifn");
- printf ("n");
- printf ("/* Non-zero bit indicates a quadratic residue mod 0x100.n");
- printf (" This test identifies %.2f%% as non-squares (%d/256). */n",
- (1.0 - sq_res_0x100_fraction) * 100.0,
- 0x100 - sq_res_0x100_num);
- printf ("static const mp_limb_tn");
- printf ("sq_res_0x100[%d] = {n", nsq_res_0x100);
- for (i = 0; i < nsq_res_0x100; i++)
- {
- printf (" CNST_LIMB(0x");
- mpz_out_str (stdout, 16, sq_res_0x100[i]);
- printf ("),n");
- }
- printf ("};n");
- printf ("n");
- if (mpz_sgn (pp) != 0)
- {
- printf ("/* mpn_mod_34lsub1 not used due to %s */n", mod34_excuse);
- printf ("/* PERFSQR_PP = ");
- }
- else
- printf ("/* 2^%d-1 = ", mod34_bits);
- for (i = 0; i < nrawfactor; i++)
- {
- if (i != 0)
- printf (" * ");
- printf ("%d", rawfactor[i].divisor);
- if (rawfactor[i].multiplicity != 1)
- printf ("^%d", rawfactor[i].multiplicity);
- }
- printf (" %s*/n", mpz_sgn (pp) == 0 ? "... " : "");
- printf ("#define PERFSQR_MOD_BITS %dn", mod_bits);
- if (mpz_sgn (pp) != 0)
- {
- printf ("#define PERFSQR_PP CNST_LIMB(0x");
- mpz_out_str (stdout, 16, pp);
- printf (")n");
- printf ("#define PERFSQR_PP_NORM CNST_LIMB(0x");
- mpz_out_str (stdout, 16, pp_norm);
- printf (")n");
- printf ("#define PERFSQR_PP_INVERTED CNST_LIMB(0x");
- mpz_out_str (stdout, 16, pp_inverted);
- printf (")n");
- }
- printf ("n");
- mpz_init (mhi);
- mpz_init (mlo);
- printf ("/* This test identifies %.2f%% as non-squares. */n",
- (1.0 - total_fraction) * 100.0);
- printf ("#define PERFSQR_MOD_TEST(up, usize) \n");
- printf (" do { \n");
- printf (" mp_limb_t r; \n");
- if (mpz_sgn (pp) != 0)
- printf (" PERFSQR_MOD_PP (r, up, usize); \n");
- else
- printf (" PERFSQR_MOD_34 (r, up, usize); \n");
- for (i = 0; i < nfactor; i++)
- {
- printf (" \n");
- printf (" /* %5.2f%% */ \n",
- (1.0 - factor[i].fraction) * 100.0);
- printf (" PERFSQR_MOD_%d (r, CNST_LIMB(%2d), CNST_LIMB(0x",
- factor[i].divisor <= limb_bits ? 1 : 2,
- factor[i].divisor);
- mpz_out_str (stdout, 16, factor[i].inverse);
- printf ("), \n");
- printf (" CNST_LIMB(0x");
- if ( factor[i].divisor <= limb_bits)
- {
- mpz_out_str (stdout, 16, factor[i].mask);
- }
- else
- {
- mpz_tdiv_r_2exp (mlo, factor[i].mask, (unsigned long) limb_bits);
- mpz_tdiv_q_2exp (mhi, factor[i].mask, (unsigned long) limb_bits);
- mpz_out_str (stdout, 16, mhi);
- printf ("), CNST_LIMB(0x");
- mpz_out_str (stdout, 16, mlo);
- }
- printf (")); \n");
- }
- printf (" } while (0)n");
- printf ("n");
- printf ("/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, %.2f%% non-squares. */n",
- (1.0 - (total_fraction * 44.0/256.0)) * 100.0);
- printf ("n");
- printf ("/* helper for tests/mpz/t-perfsqr.c */n");
- printf ("#define PERFSQR_DIVISORS { 256,");
- for (i = 0; i < nfactor; i++)
- printf (" %d,", factor[i].divisor);
- printf (" }n");
- mpz_clear (mhi);
- mpz_clear (mlo);
- }
- int
- main (int argc, char *argv[])
- {
- int limb_bits, nail_bits;
- if (argc != 3)
- {
- fprintf (stderr, "Usage: gen-psqr <limbbits> <nailbits>n");
- exit (1);
- }
- limb_bits = atoi (argv[1]);
- nail_bits = atoi (argv[2]);
- if (limb_bits <= 0
- || nail_bits < 0
- || nail_bits >= limb_bits)
- {
- fprintf (stderr, "Invalid limb/nail bits: %d %dn",
- limb_bits, nail_bits);
- exit (1);
- }
- generate_sq_res_0x100 (limb_bits);
- generate_mod (limb_bits, nail_bits);
- print (limb_bits, nail_bits);
- return 0;
- }