hgcd2.c
上传用户:qaz666999
上传日期:2022-08-06
资源大小:2570k
文件大小:10k
- /* hgcd2.c
- THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
- SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
- GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
- Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008 Free Software
- Foundation, Inc.
- This file is part of the GNU MP Library.
- The GNU MP Library is free software; you can redistribute it and/or modify
- it under the terms of the GNU Lesser General Public License as published by
- the Free Software Foundation; either version 3 of the License, or (at your
- option) any later version.
- The GNU MP Library is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
- License for more details.
- You should have received a copy of the GNU Lesser General Public License
- along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
- #include "gmp.h"
- #include "gmp-impl.h"
- #include "longlong.h"
- #if GMP_NAIL_BITS == 0
- /* Copied from the old mpn/generic/gcdext.c, and modified slightly to return
- the remainder. */
- /* Single-limb division optimized for small quotients. */
- static inline mp_limb_t
- div1 (mp_ptr rp,
- mp_limb_t n0,
- mp_limb_t d0)
- {
- mp_limb_t q = 0;
- if ((mp_limb_signed_t) n0 < 0)
- {
- int cnt;
- for (cnt = 1; (mp_limb_signed_t) d0 >= 0; cnt++)
- {
- d0 = d0 << 1;
- }
- q = 0;
- while (cnt)
- {
- q <<= 1;
- if (n0 >= d0)
- {
- n0 = n0 - d0;
- q |= 1;
- }
- d0 = d0 >> 1;
- cnt--;
- }
- }
- else
- {
- int cnt;
- for (cnt = 0; n0 >= d0; cnt++)
- {
- d0 = d0 << 1;
- }
- q = 0;
- while (cnt)
- {
- d0 = d0 >> 1;
- q <<= 1;
- if (n0 >= d0)
- {
- n0 = n0 - d0;
- q |= 1;
- }
- cnt--;
- }
- }
- *rp = n0;
- return q;
- }
- /* Two-limb division optimized for small quotients. */
- static inline mp_limb_t
- div2 (mp_ptr rp,
- mp_limb_t nh, mp_limb_t nl,
- mp_limb_t dh, mp_limb_t dl)
- {
- mp_limb_t q = 0;
- if ((mp_limb_signed_t) nh < 0)
- {
- int cnt;
- for (cnt = 1; (mp_limb_signed_t) dh >= 0; cnt++)
- {
- dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
- dl = dl << 1;
- }
- while (cnt)
- {
- q <<= 1;
- if (nh > dh || (nh == dh && nl >= dl))
- {
- sub_ddmmss (nh, nl, nh, nl, dh, dl);
- q |= 1;
- }
- dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
- dh = dh >> 1;
- cnt--;
- }
- }
- else
- {
- int cnt;
- for (cnt = 0; nh > dh || (nh == dh && nl >= dl); cnt++)
- {
- dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
- dl = dl << 1;
- }
- while (cnt)
- {
- dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
- dh = dh >> 1;
- q <<= 1;
- if (nh > dh || (nh == dh && nl >= dl))
- {
- sub_ddmmss (nh, nl, nh, nl, dh, dl);
- q |= 1;
- }
- cnt--;
- }
- }
- rp[0] = nl;
- rp[1] = nh;
- return q;
- }
- #if 0
- /* This div2 uses less branches, but it seems to nevertheless be
- slightly slower than the above code. */
- static inline mp_limb_t
- div2 (mp_ptr rp,
- mp_limb_t nh, mp_limb_t nl,
- mp_limb_t dh, mp_limb_t dl)
- {
- mp_limb_t q = 0;
- int ncnt;
- int dcnt;
- count_leading_zeros (ncnt, nh);
- count_leading_zeros (dcnt, dh);
- dcnt -= ncnt;
- dh = (dh << dcnt) + (-(dcnt > 0) & (dl >> (GMP_LIMB_BITS - dcnt)));
- dl <<= dcnt;
- do
- {
- mp_limb_t bit;
- q <<= 1;
- if (UNLIKELY (nh == dh))
- bit = (nl >= dl);
- else
- bit = (nh > dh);
- q |= bit;
- sub_ddmmss (nh, nl, nh, nl, (-bit) & dh, (-bit) & dl);
- dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
- dh = dh >> 1;
- }
- while (dcnt--);
- rp[0] = nl;
- rp[1] = nh;
- return q;
- }
- #endif
- #else /* GMP_NAIL_BITS != 0 */
- /* Check all functions for nail support. */
- /* hgcd2 should be defined to take inputs including nail bits, and
- produce a matrix with elements also including nail bits. This is
- necessary, for the matrix elements to be useful with mpn_mul_1,
- mpn_addmul_1 and friends. */
- #error Not implemented
- #endif /* GMP_NAIL_BITS != 0 */
- /* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs
- matrix M. Returns 1 if we make progress, i.e. can perform at least
- one subtraction. Otherwise returns zero.. */
- /* FIXME: Possible optimizations:
- The div2 function starts with checking the most significant bit of
- the numerator. We can maintained normalized operands here, call
- hgcd with normalized operands only, which should make the code
- simpler and possibly faster.
- Experiment with table lookups on the most significant bits.
- This function is also a candidate for assembler implementation.
- */
- int
- mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
- struct hgcd_matrix1 *M)
- {
- mp_limb_t u00, u01, u10, u11;
- if (ah < 2 || bh < 2)
- return 0;
- if (ah > bh || (ah == bh && al > bl))
- {
- sub_ddmmss (ah, al, ah, al, bh, bl);
- if (ah < 2)
- return 0;
- u00 = u01 = u11 = 1;
- u10 = 0;
- }
- else
- {
- sub_ddmmss (bh, bl, bh, bl, ah, al);
- if (bh < 2)
- return 0;
- u00 = u10 = u11 = 1;
- u01 = 0;
- }
- if (ah < bh)
- goto subtract_a;
- for (;;)
- {
- ASSERT (ah >= bh);
- if (ah == bh)
- goto done;
- if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
- {
- ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
- bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
- break;
- }
- /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
- 1), affecting the second column of M. */
- ASSERT (ah > bh);
- sub_ddmmss (ah, al, ah, al, bh, bl);
- if (ah < 2)
- goto done;
- if (ah <= bh)
- {
- /* Use q = 1 */
- u01 += u00;
- u11 += u10;
- }
- else
- {
- mp_limb_t r[2];
- mp_limb_t q = div2 (r, ah, al, bh, bl);
- al = r[0]; ah = r[1];
- if (ah < 2)
- {
- /* A is too small, but q is correct. */
- u01 += q * u00;
- u11 += q * u10;
- goto done;
- }
- q++;
- u01 += q * u00;
- u11 += q * u10;
- }
- subtract_a:
- ASSERT (bh >= ah);
- if (ah == bh)
- goto done;
- if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
- {
- ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
- bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
- goto subtract_a1;
- }
- /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
- 1), affecting the first column of M. */
- sub_ddmmss (bh, bl, bh, bl, ah, al);
- if (bh < 2)
- goto done;
- if (bh <= ah)
- {
- /* Use q = 1 */
- u00 += u01;
- u10 += u11;
- }
- else
- {
- mp_limb_t r[2];
- mp_limb_t q = div2 (r, bh, bl, ah, al);
- bl = r[0]; bh = r[1];
- if (bh < 2)
- {
- /* B is too small, but q is correct. */
- u00 += q * u01;
- u10 += q * u11;
- goto done;
- }
- q++;
- u00 += q * u01;
- u10 += q * u11;
- }
- }
- /* NOTE: Since we discard the least significant half limb, we don't
- get a truly maximal M (corresponding to |a - b| <
- 2^{GMP_LIMB_BITS +1}). */
- /* Single precision loop */
- for (;;)
- {
- ASSERT (ah >= bh);
- if (ah == bh)
- break;
- ah -= bh;
- if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
- break;
- if (ah <= bh)
- {
- /* Use q = 1 */
- u01 += u00;
- u11 += u10;
- }
- else
- {
- mp_limb_t r;
- mp_limb_t q = div1 (&r, ah, bh);
- ah = r;
- if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
- {
- /* A is too small, but q is correct. */
- u01 += q * u00;
- u11 += q * u10;
- break;
- }
- q++;
- u01 += q * u00;
- u11 += q * u10;
- }
- subtract_a1:
- ASSERT (bh >= ah);
- if (ah == bh)
- break;
- bh -= ah;
- if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
- break;
- if (bh <= ah)
- {
- /* Use q = 1 */
- u00 += u01;
- u10 += u11;
- }
- else
- {
- mp_limb_t r;
- mp_limb_t q = div1 (&r, bh, ah);
- bh = r;
- if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
- {
- /* B is too small, but q is correct. */
- u00 += q * u01;
- u10 += q * u11;
- break;
- }
- q++;
- u00 += q * u01;
- u10 += q * u11;
- }
- }
- done:
- M->u[0][0] = u00; M->u[0][1] = u01;
- M->u[1][0] = u10; M->u[1][1] = u11;
- return 1;
- }
- /* Sets (r;b) = (a;b) M, with M = (u00, u01; u10, u11). Vector must
- * have space for n + 1 limbs. Uses three buffers to avoid a copy*/
- mp_size_t
- mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M,
- mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
- {
- mp_limb_t ah, bh;
- /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
- r = u00 * a
- r += u10 * b
- b *= u11
- b += u01 * a
- */
- #if HAVE_NATIVE_mpn_addaddmul_1msb0
- ah = mpn_addaddmul_1msb0 (rp, ap, bp, n, M->u[0][0], M->u[1][0]);
- bh = mpn_addaddmul_1msb0 (bp, bp, ap, n, M->u[1][1], M->u[0][1]);
- #else
- ah = mpn_mul_1 (rp, ap, n, M->u[0][0]);
- ah += mpn_addmul_1 (rp, bp, n, M->u[1][0]);
- bh = mpn_mul_1 (bp, bp, n, M->u[1][1]);
- bh += mpn_addmul_1 (bp, ap, n, M->u[0][1]);
- #endif
- rp[n] = ah;
- bp[n] = bh;
- n += (ah | bh) > 0;
- return n;
- }
- /* Sets (r;b) = M^{-1}(a;b), with M^{-1} = (u11, -u01; -u10, u00) from
- the left. Uses three buffers, to avoid a copy. */
- mp_size_t
- mpn_hgcd_mul_matrix1_inverse_vector (const struct hgcd_matrix1 *M,
- mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
- {
- mp_limb_t h0, h1;
- /* Compute (r;b) <-- (u11 a - u01 b; -u10 a + u00 b) as
- r = u11 * a
- r -= u01 * b
- b *= u00
- b -= u10 * a
- */
- h0 = mpn_mul_1 (rp, ap, n, M->u[1][1]);
- h1 = mpn_submul_1 (rp, bp, n, M->u[0][1]);
- ASSERT (h0 == h1);
- h0 = mpn_mul_1 (bp, bp, n, M->u[0][0]);
- h1 = mpn_submul_1 (bp, ap, n, M->u[1][0]);
- ASSERT (h0 == h1);
- n -= (rp[n-1] | bp[n-1]) == 0;
- return n;
- }