speed.h
上传用户:qaz666999
上传日期:2022-08-06
资源大小:2570k
文件大小:101k
- /* Header for speed and threshold things.
- Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010 Free
- Software Foundation, Inc.
- This file is part of the GNU MP Library.
- The GNU MP Library is free software; you can redistribute it and/or modify
- it under the terms of the GNU Lesser General Public License as published by
- the Free Software Foundation; either version 3 of the License, or (at your
- option) any later version.
- The GNU MP Library is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
- License for more details.
- You should have received a copy of the GNU Lesser General Public License
- along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
- #ifndef __SPEED_H__
- #define __SPEED_H__
- /* Pad ptr,oldsize with zero limbs (at the most significant end) to make it
- newsize long. */
- #define MPN_ZERO_EXTEND(ptr, oldsize, newsize)
- do {
- ASSERT ((newsize) >= (oldsize));
- MPN_ZERO ((ptr)+(oldsize), (newsize)-(oldsize));
- } while (0)
- /* A mask of the least significant n bits. Note 1<<32 doesn't give zero on
- x86 family CPUs, hence the separate case for GMP_LIMB_BITS. */
- #define MP_LIMB_T_LOWBITMASK(n)
- ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX : ((mp_limb_t) 1 << (n)) - 1)
- /* align must be a power of 2 here, usually CACHE_LINE_SIZE is a good choice */
- #define TMP_ALLOC_ALIGNED(bytes, align)
- align_pointer (TMP_ALLOC ((bytes) + (align)-1), (align))
- #define TMP_ALLOC_LIMBS_ALIGNED(limbs, align)
- ((mp_ptr) TMP_ALLOC_ALIGNED ((limbs)*sizeof(mp_limb_t), align))
- /* CACHE_LINE_SIZE is our default alignment for speed operands, and the
- limit on what s->align_xp etc and then request for off-alignment. Maybe
- this should be an option of some sort, but in any case here are some line
- sizes,
- bytes
- 32 pentium
- 64 athlon
- 64 itanium-2 L1
- 128 itanium-2 L2
- */
- #define CACHE_LINE_SIZE 64 /* bytes */
- #define SPEED_TMP_ALLOC_ADJUST_MASK (CACHE_LINE_SIZE/BYTES_PER_MP_LIMB - 1)
- /* Set ptr to a TMP_ALLOC block of the given limbs, with the given limb
- alignment. */
- #define SPEED_TMP_ALLOC_LIMBS(ptr, limbs, align)
- do {
- mp_ptr __ptr;
- mp_size_t __ptr_align, __ptr_add;
-
- ASSERT ((CACHE_LINE_SIZE % BYTES_PER_MP_LIMB) == 0);
- __ptr = TMP_ALLOC_LIMBS ((limbs) + SPEED_TMP_ALLOC_ADJUST_MASK);
- __ptr_align = (__ptr - (mp_ptr) NULL);
- __ptr_add = ((align) - __ptr_align) & SPEED_TMP_ALLOC_ADJUST_MASK;
- (ptr) = __ptr + __ptr_add;
- } while (0)
- /* This is the size for s->xp_block and s->yp_block, used in certain
- routines that want to run across many different data values and use
- s->size for a different purpose, eg. SPEED_ROUTINE_MPN_GCD_1.
- 512 means 2kbytes of data for each of xp_block and yp_block, making 4k
- total, which should fit easily in any L1 data cache. */
- #define SPEED_BLOCK_SIZE 512 /* limbs */
- extern double speed_unittime;
- extern double speed_cycletime;
- extern int speed_precision;
- extern char speed_time_string[];
- void speed_time_init __GMP_PROTO ((void));
- void speed_cycletime_fail __GMP_PROTO ((const char *str));
- void speed_cycletime_init __GMP_PROTO ((void));
- void speed_cycletime_need_cycles __GMP_PROTO ((void));
- void speed_cycletime_need_seconds __GMP_PROTO ((void));
- void speed_starttime __GMP_PROTO ((void));
- double speed_endtime __GMP_PROTO ((void));
- struct speed_params {
- unsigned reps; /* how many times to run the routine */
- mp_ptr xp; /* first argument */
- mp_ptr yp; /* second argument */
- mp_size_t size; /* size of both arguments */
- mp_limb_t r; /* user supplied parameter */
- mp_size_t align_xp; /* alignment of xp */
- mp_size_t align_yp; /* alignment of yp */
- mp_size_t align_wp; /* intended alignment of wp */
- mp_size_t align_wp2; /* intended alignment of wp2 */
- mp_ptr xp_block; /* first special SPEED_BLOCK_SIZE block */
- mp_ptr yp_block; /* second special SPEED_BLOCK_SIZE block */
- double time_divisor; /* optionally set by the speed routine */
- /* used by the cache priming things */
- int cache;
- unsigned src_num, dst_num;
- struct {
- mp_ptr ptr;
- mp_size_t size;
- } src[3], dst[3];
- };
- typedef double (*speed_function_t) __GMP_PROTO ((struct speed_params *s));
- double speed_measure __GMP_PROTO ((speed_function_t fun, struct speed_params *s));
- /* Prototypes for speed measuring routines */
- double speed_back_to_back __GMP_PROTO ((struct speed_params *s));
- double speed_count_leading_zeros __GMP_PROTO ((struct speed_params *s));
- double speed_count_trailing_zeros __GMP_PROTO ((struct speed_params *s));
- double speed_find_a __GMP_PROTO ((struct speed_params *s));
- double speed_gmp_allocate_free __GMP_PROTO ((struct speed_params *s));
- double speed_gmp_allocate_reallocate_free __GMP_PROTO ((struct speed_params *s));
- double speed_invert_limb __GMP_PROTO ((struct speed_params *s));
- double speed_malloc_free __GMP_PROTO ((struct speed_params *s));
- double speed_malloc_realloc_free __GMP_PROTO ((struct speed_params *s));
- double speed_memcpy __GMP_PROTO ((struct speed_params *s));
- double speed_binvert_limb __GMP_PROTO ((struct speed_params *s));
- double speed_binvert_limb_mul1 __GMP_PROTO ((struct speed_params *s));
- double speed_binvert_limb_loop __GMP_PROTO ((struct speed_params *s));
- double speed_binvert_limb_cond __GMP_PROTO ((struct speed_params *s));
- double speed_binvert_limb_arith __GMP_PROTO ((struct speed_params *s));
- double speed_mpf_init_clear __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_add_n __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_addlsh1_n __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_addlsh2_n __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_add_n_sub_n __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_and_n __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_andn_n __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_addmul_1 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_addmul_2 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_addmul_3 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_addmul_4 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_addmul_5 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_addmul_6 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_addmul_7 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_addmul_8 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_com __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_copyd __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_copyi __GMP_PROTO ((struct speed_params *s));
- double speed_MPN_COPY __GMP_PROTO ((struct speed_params *s));
- double speed_MPN_COPY_DECR __GMP_PROTO ((struct speed_params *s));
- double speed_MPN_COPY_INCR __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_divexact_1 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_divexact_by3 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_bdiv_q_1 __GMP_PROTO ((struct speed_params *));
- double speed_mpn_pi1_bdiv_q_1 __GMP_PROTO ((struct speed_params *));
- double speed_mpn_bdiv_dbm1c __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_divrem_1 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_divrem_1f __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_divrem_1c __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_divrem_1cf __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_divrem_1_div __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_divrem_1f_div __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_divrem_1_inv __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_divrem_1f_inv __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_divrem_2 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_divrem_2_div __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_divrem_2_inv __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_fib2_ui __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_matrix22_mul __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_hgcd __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_hgcd_lehmer __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_gcd __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_gcd_1 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_gcd_1N __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_gcdext __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_gcdext_double __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_gcdext_one_double __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_gcdext_one_single __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_gcdext_single __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_get_str __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_hamdist __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_ior_n __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_iorn_n __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_jacobi_base __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_jacobi_base_1 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_jacobi_base_2 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_jacobi_base_3 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_lshift __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_lshiftc __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mod_1 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mod_1c __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mod_1_div __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mod_1_inv __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mod_1_1 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mod_1_2 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mod_1_3 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mod_1_4 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mod_34lsub1 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_modexact_1_odd __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_modexact_1c_odd __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mul_1 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mul_1_inplace __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mul_2 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mul_3 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mul_4 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mul __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mul_basecase __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mul_fft __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mul_fft_sqr __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_fft_mul __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_fft_sqr __GMP_PROTO ((struct speed_params *s));
- #if WANT_OLD_FFT_FULL
- double speed_mpn_mul_fft_full __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mul_fft_full_sqr __GMP_PROTO ((struct speed_params *s));
- #endif
- double speed_mpn_nussbaumer_mul __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_nussbaumer_mul_sqr __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mul_n __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mul_n_sqr __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mullo_n __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mullo_basecase __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_nand_n __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_nior_n __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_popcount __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_preinv_divrem_1 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_preinv_divrem_1f __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_preinv_mod_1 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_sbpi1_div_qr __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_dcpi1_div_qr __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_sbpi1_divappr_q __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_dcpi1_divappr_q __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mu_div_qr __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mu_divappr_q __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mupi_div_qr __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mu_div_q __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_sbpi1_bdiv_qr __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_dcpi1_bdiv_qr __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_sbpi1_bdiv_q __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_dcpi1_bdiv_q __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mu_bdiv_q __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mu_bdiv_qr __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_invert __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_invertappr __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_ni_invertappr __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_binvert __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_redc_1 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_redc_2 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_redc_n __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_rsblsh1_n __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_rsblsh2_n __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_rsh1add_n __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_rsh1sub_n __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_rshift __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_sb_divrem_m3 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_sb_divrem_m3_div __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_sb_divrem_m3_inv __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_set_str __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_bc_set_str __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_dc_set_str __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_set_str_pre __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_sqr_basecase __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_sqr_diagonal __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_sqr __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_sqrtrem __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_rootrem __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_sub_n __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_sublsh1_n __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_sublsh2_n __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_submul_1 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_toom2_sqr __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_toom3_sqr __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_toom4_sqr __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_toom6_sqr __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_toom8_sqr __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_toom22_mul __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_toom33_mul __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_toom44_mul __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_toom6h_mul __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_toom8h_mul __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_toom32_mul __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_toom42_mul __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_toom43_mul __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_toom63_mul __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_toom32_for_toom43_mul __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_toom43_for_toom32_mul __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_toom32_for_toom53_mul __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_toom53_for_toom32_mul __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_toom42_for_toom53_mul __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_toom53_for_toom42_mul __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mulmod_bnm1 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_bc_mulmod_bnm1 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_mulmod_bnm1_rounded __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_sqrmod_bnm1 __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_udiv_qrnnd __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_udiv_qrnnd_r __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_umul_ppmm __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_umul_ppmm_r __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_xnor_n __GMP_PROTO ((struct speed_params *s));
- double speed_mpn_xor_n __GMP_PROTO ((struct speed_params *s));
- double speed_MPN_ZERO __GMP_PROTO ((struct speed_params *s));
- double speed_mpq_init_clear __GMP_PROTO ((struct speed_params *s));
- double speed_mpz_add __GMP_PROTO ((struct speed_params *s));
- double speed_mpz_bin_uiui __GMP_PROTO ((struct speed_params *s));
- double speed_mpz_fac_ui __GMP_PROTO ((struct speed_params *s));
- double speed_mpz_fib_ui __GMP_PROTO ((struct speed_params *s));
- double speed_mpz_fib2_ui __GMP_PROTO ((struct speed_params *s));
- double speed_mpz_init_clear __GMP_PROTO ((struct speed_params *s));
- double speed_mpz_init_realloc_clear __GMP_PROTO ((struct speed_params *s));
- double speed_mpz_jacobi __GMP_PROTO ((struct speed_params *s));
- double speed_mpz_lucnum_ui __GMP_PROTO ((struct speed_params *s));
- double speed_mpz_lucnum2_ui __GMP_PROTO ((struct speed_params *s));
- double speed_mpz_mod __GMP_PROTO ((struct speed_params *s));
- double speed_mpz_powm __GMP_PROTO ((struct speed_params *s));
- double speed_mpz_powm_mod __GMP_PROTO ((struct speed_params *s));
- double speed_mpz_powm_redc __GMP_PROTO ((struct speed_params *s));
- double speed_mpz_powm_ui __GMP_PROTO ((struct speed_params *s));
- double speed_mpz_urandomb __GMP_PROTO ((struct speed_params *s));
- double speed_gmp_randseed __GMP_PROTO ((struct speed_params *s));
- double speed_gmp_randseed_ui __GMP_PROTO ((struct speed_params *s));
- double speed_noop __GMP_PROTO ((struct speed_params *s));
- double speed_noop_wxs __GMP_PROTO ((struct speed_params *s));
- double speed_noop_wxys __GMP_PROTO ((struct speed_params *s));
- double speed_operator_div __GMP_PROTO ((struct speed_params *s));
- double speed_operator_mod __GMP_PROTO ((struct speed_params *s));
- double speed_udiv_qrnnd __GMP_PROTO ((struct speed_params *s));
- double speed_udiv_qrnnd_preinv1 __GMP_PROTO ((struct speed_params *s));
- double speed_udiv_qrnnd_preinv2 __GMP_PROTO ((struct speed_params *s));
- double speed_udiv_qrnnd_c __GMP_PROTO ((struct speed_params *s));
- double speed_umul_ppmm __GMP_PROTO ((struct speed_params *s));
- /* Prototypes for other routines */
- /* low 32-bits in p[0], high 32-bits in p[1] */
- void speed_cyclecounter __GMP_PROTO ((unsigned p[2]));
- void mftb_function __GMP_PROTO ((unsigned p[2]));
- /* In i386 gcc -fPIC, ebx is a fixed register and can't be declared a dummy
- output or a clobber for the cpuid, hence an explicit save and restore. A
- clobber as such doesn't provoke an error unfortunately (gcc 3.0), so use
- the dummy output style in non-PIC, so there's an error if somehow -fPIC
- is used without a -DPIC to tell us about it. */
- #if defined(__GNUC__) && ! defined (NO_ASM)
- && (defined (__i386__) || defined (__i486__))
- #ifdef PIC
- #define speed_cyclecounter(p)
- do {
- int __speed_cyclecounter__save_ebx;
- int __speed_cyclecounter__dummy;
- __asm__ __volatile__ ("movl %%ebx, %1n"
- "cpuidn"
- "movl %1, %%ebxn"
- "rdtsc"
- : "=a" ((p)[0]),
- "=&rm" (__speed_cyclecounter__save_ebx),
- "=c" (__speed_cyclecounter__dummy),
- "=d" ((p)[1]));
- } while (0)
- #else
- #define speed_cyclecounter(p)
- do {
- int __speed_cyclecounter__dummy1;
- int __speed_cyclecounter__dummy2;
- __asm__ __volatile__ ("cpuidn"
- "rdtsc"
- : "=a" ((p)[0]),
- "=b" (__speed_cyclecounter__dummy1),
- "=c" (__speed_cyclecounter__dummy2),
- "=d" ((p)[1]));
- } while (0)
- #endif
- #endif
- double speed_cyclecounter_diff __GMP_PROTO ((const unsigned [2], const unsigned [2]));
- int gettimeofday_microseconds_p __GMP_PROTO ((void));
- int getrusage_microseconds_p __GMP_PROTO ((void));
- int cycles_works_p __GMP_PROTO ((void));
- long clk_tck __GMP_PROTO ((void));
- double freq_measure __GMP_PROTO ((const char *, double (*)(void)));
- int double_cmp_ptr __GMP_PROTO ((const double *, const double *));
- void pentium_wbinvd __GMP_PROTO ((void));
- typedef int (*qsort_function_t) __GMP_PROTO ((const void *, const void *));
- void noop __GMP_PROTO ((void));
- void noop_1 __GMP_PROTO ((mp_limb_t));
- void noop_wxs __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
- void noop_wxys __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
- void mpn_cache_fill __GMP_PROTO ((mp_srcptr, mp_size_t));
- void mpn_cache_fill_dummy __GMP_PROTO ((mp_limb_t));
- void speed_cache_fill __GMP_PROTO ((struct speed_params *));
- void speed_operand_src __GMP_PROTO ((struct speed_params *, mp_ptr, mp_size_t));
- void speed_operand_dst __GMP_PROTO ((struct speed_params *, mp_ptr, mp_size_t));
- extern int speed_option_addrs;
- extern int speed_option_verbose;
- void speed_option_set __GMP_PROTO((const char *));
- mp_limb_t mpn_divrem_1_div __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
- mp_limb_t mpn_divrem_1_inv __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
- mp_limb_t mpn_divrem_2_div __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr));
- mp_limb_t mpn_divrem_2_inv __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr));
- int mpn_jacobi_base_1 __GMP_PROTO ((mp_limb_t, mp_limb_t, int));
- int mpn_jacobi_base_2 __GMP_PROTO ((mp_limb_t, mp_limb_t, int));
- int mpn_jacobi_base_3 __GMP_PROTO ((mp_limb_t, mp_limb_t, int));
- mp_limb_t mpn_mod_1_div __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t));
- mp_limb_t mpn_mod_1_inv __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t));
- mp_size_t mpn_gcd_binary
- __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
- mp_size_t mpn_gcd_accel
- __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
- mp_size_t mpn_gcdext_one_double
- __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
- mp_size_t mpn_gcdext_one_single
- __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
- mp_size_t mpn_gcdext_single
- __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
- mp_size_t mpn_gcdext_double
- __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
- mp_limb_t mpn_sb_divrem_mn_div __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
- mp_limb_t mpn_sb_divrem_mn_inv __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
- mp_size_t mpn_set_str_basecase __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, int));
- void mpn_pre_set_str __GMP_PROTO ((mp_ptr, unsigned char *, size_t, powers_t *, mp_ptr));
- void mpz_powm_mod __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr));
- void mpz_powm_redc __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr));
- int speed_routine_count_zeros_setup
- __GMP_PROTO ((struct speed_params *, mp_ptr, int, int));
- /* "get" is called repeatedly until it ticks over, just in case on a fast
- processor it takes less than a microsecond, though this is probably
- unlikely if it's a system call.
- speed_cyclecounter is called on the same side of the "get" for the start
- and end measurements. It doesn't matter how long it takes from the "get"
- sample to the cycles sample, since that period will cancel out in the
- difference calculation (assuming it's the same each time).
- Letting the test run for more than a process time slice is probably only
- going to reduce accuracy, especially for getrusage when the cycle counter
- is real time, or for gettimeofday if the cycle counter is in fact process
- time. Use CLK_TCK/2 as a reasonable stop.
- It'd be desirable to be quite accurate here. The default speed_precision
- for a cycle counter is 10000 cycles, so to mix that with getrusage or
- gettimeofday the frequency should be at least that accurate. But running
- measurements for 10000 microseconds (or more) is too long. Be satisfied
- with just a half clock tick (5000 microseconds usually). */
- #define FREQ_MEASURE_ONE(name, type, get, getc, sec, usec)
- do {
- type st1, st, et1, et;
- unsigned sc[2], ec[2];
- long dt, half_tick;
- double dc, cyc;
-
- half_tick = (1000000L / clk_tck()) / 2;
-
- get (st1);
- do {
- get (st);
- } while (usec(st) == usec(st1) && sec(st) == sec(st1));
-
- getc (sc);
-
- for (;;)
- {
- get (et1);
- do {
- get (et);
- } while (usec(et) == usec(et1) && sec(et) == sec(et1));
-
- getc (ec);
-
- dc = speed_cyclecounter_diff (ec, sc);
-
- /* allow secs to cancel before multiplying */
- dt = sec(et) - sec(st);
- dt = dt * 1000000L + (usec(et) - usec(st));
-
- if (dt >= half_tick)
- break;
- }
-
- cyc = dt * 1e-6 / dc;
-
- if (speed_option_verbose >= 2)
- printf ("freq_measure_%s_one() dc=%.6g dt=%ld cyc=%.6gn",
- name, dc, dt, cyc);
-
- return dt * 1e-6 / dc;
-
- } while (0)
- /* The measuring routines use these big macros to save duplication for
- similar forms. They also get used for some automatically generated
- measuring of new implementations of functions.
- Having something like SPEED_ROUTINE_BINARY_N as a subroutine accepting a
- function pointer is considered undesirable since it's not the way a
- normal application will be calling, and some processors might do
- different things with an indirect call, like not branch predicting, or
- doing a full pipe flush. At least some of the "functions" measured are
- actually macros too.
- The net effect is to bloat the object code, possibly in a big way, but
- only what's being measured is being run, so that doesn't matter.
- The loop forms don't try to cope with __GMP_ATTRIBUTE_PURE or
- ATTRIBUTE_CONST on the called functions. Adding a cast to a non-pure
- function pointer doesn't work in gcc 3.2. Using an actual non-pure
- function pointer variable works, but stands a real risk of a
- non-optimizing compiler generating unnecessary overheads in the call.
- Currently the best idea is not to use those attributes for a timing
- program build. __GMP_NO_ATTRIBUTE_CONST_PURE will tell gmp.h and
- gmp-impl.h to omit them from routines there. */
- #define SPEED_RESTRICT_COND(cond) if (!(cond)) return -1.0;
- /* For mpn_copy or similar. */
- #define SPEED_ROUTINE_MPN_COPY(function)
- {
- mp_ptr wp;
- unsigned i;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 0);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);
-
- speed_operand_src (s, s->xp, s->size);
- speed_operand_dst (s, wp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- function (wp, s->xp, s->size);
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_COPYC(function)
- {
- mp_ptr wp;
- unsigned i;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 0);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);
-
- speed_operand_src (s, s->xp, s->size);
- speed_operand_dst (s, wp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- function (wp, s->xp, s->size, 0);
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- /* s->size is still in limbs, and it's limbs which are copied, but
- "function" takes a size in bytes not limbs. */
- #define SPEED_ROUTINE_MPN_COPY_BYTES(function)
- {
- mp_ptr wp;
- unsigned i;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 0);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);
-
- speed_operand_src (s, s->xp, s->size);
- speed_operand_dst (s, wp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- function (wp, s->xp, s->size * BYTES_PER_MP_LIMB);
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- /* For mpn_add_n, mpn_sub_n, or similar. */
- #define SPEED_ROUTINE_MPN_BINARY_N_CALL(call)
- {
- mp_ptr wp;
- mp_ptr xp, yp;
- unsigned i;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);
-
- xp = s->xp;
- yp = s->yp;
-
- if (s->r == 0) ;
- else if (s->r == 1) { xp = wp; }
- else if (s->r == 2) { yp = wp; }
- else if (s->r == 3) { xp = wp; yp = wp; }
- else if (s->r == 4) { yp = xp; }
- else {
- TMP_FREE;
- return -1.0;
- }
-
- /* initialize wp if operand overlap */
- if (xp == wp || yp == wp)
- MPN_COPY (wp, s->xp, s->size);
-
- speed_operand_src (s, xp, s->size);
- speed_operand_src (s, yp, s->size);
- speed_operand_dst (s, wp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- call;
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- /* For mpn_add_n, mpn_sub_n, or similar. */
- #define SPEED_ROUTINE_MPN_ADDSUB_N_CALL(call)
- {
- mp_ptr ap, sp;
- mp_ptr xp, yp;
- unsigned i;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (ap, s->size, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (sp, s->size, s->align_wp);
-
- xp = s->xp;
- yp = s->yp;
-
- if ((s->r & 1) != 0) { xp = ap; }
- if ((s->r & 2) != 0) { yp = ap; }
- if ((s->r & 4) != 0) { xp = sp; }
- if ((s->r & 8) != 0) { yp = sp; }
- if ((s->r & 3) == 3 || (s->r & 12) == 12)
- {
- TMP_FREE;
- return -1.0;
- }
-
- /* initialize ap if operand overlap */
- if (xp == ap || yp == ap)
- MPN_COPY (ap, s->xp, s->size);
- /* initialize sp if operand overlap */
- if (xp == sp || yp == sp)
- MPN_COPY (sp, s->xp, s->size);
-
- speed_operand_src (s, xp, s->size);
- speed_operand_src (s, yp, s->size);
- speed_operand_dst (s, ap, s->size);
- speed_operand_dst (s, sp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- call;
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_BINARY_N(function)
- SPEED_ROUTINE_MPN_BINARY_N_CALL ((*function) (wp, xp, yp, s->size))
- #define SPEED_ROUTINE_MPN_BINARY_NC(function)
- SPEED_ROUTINE_MPN_BINARY_N_CALL ((*function) (wp, xp, yp, s->size, 0))
- /* For mpn_lshift, mpn_rshift, mpn_mul_1, with r, or similar. */
- #define SPEED_ROUTINE_MPN_UNARY_1_CALL(call)
- {
- mp_ptr wp;
- unsigned i;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);
-
- speed_operand_src (s, s->xp, s->size);
- speed_operand_dst (s, wp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- call;
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_UNARY_1(function)
- SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r))
- #define SPEED_ROUTINE_MPN_UNARY_1C(function)
- SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r, 0))
- /* FIXME: wp is uninitialized here, should start it off from xp */
- #define SPEED_ROUTINE_MPN_UNARY_1_INPLACE(function)
- SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, wp, s->size, s->r))
- #define SPEED_ROUTINE_MPN_DIVEXACT_1(function)
- SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r))
- #define SPEED_ROUTINE_MPN_BDIV_Q_1(function)
- SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r))
- #define SPEED_ROUTINE_MPN_PI1_BDIV_Q_1_CALL(call)
- {
- unsigned shift;
- mp_limb_t dinv;
-
- SPEED_RESTRICT_COND (s->size > 0);
- SPEED_RESTRICT_COND (s->r != 0);
-
- count_trailing_zeros (shift, s->r);
- binvert_limb (dinv, s->r >> shift);
-
- SPEED_ROUTINE_MPN_UNARY_1_CALL (call);
- }
- #define SPEED_ROUTINE_MPN_PI1_BDIV_Q_1(function)
- SPEED_ROUTINE_MPN_PI1_BDIV_Q_1_CALL
- ((*function) (wp, s->xp, s->size, s->r, dinv, shift))
- #define SPEED_ROUTINE_MPN_BDIV_DBM1C(function)
- SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r, 0))
- #define SPEED_ROUTINE_MPN_DIVREM_1(function)
- SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, 0, s->xp, s->size, s->r))
- #define SPEED_ROUTINE_MPN_DIVREM_1C(function)
- SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, 0, s->xp, s->size, s->r, 0))
- #define SPEED_ROUTINE_MPN_DIVREM_1F(function)
- SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->size, s->xp, 0, s->r))
- #define SPEED_ROUTINE_MPN_DIVREM_1CF(function)
- SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->size, s->xp, 0, s->r, 0))
- #define SPEED_ROUTINE_MPN_PREINV_DIVREM_1_CALL(call)
- {
- unsigned shift;
- mp_limb_t dinv;
-
- SPEED_RESTRICT_COND (s->size >= 0);
- SPEED_RESTRICT_COND (s->r != 0);
-
- count_leading_zeros (shift, s->r);
- invert_limb (dinv, s->r << shift);
-
- SPEED_ROUTINE_MPN_UNARY_1_CALL (call);
- }
- #define SPEED_ROUTINE_MPN_PREINV_DIVREM_1(function)
- SPEED_ROUTINE_MPN_PREINV_DIVREM_1_CALL
- ((*function) (wp, 0, s->xp, s->size, s->r, dinv, shift))
- /* s->size limbs worth of fraction part */
- #define SPEED_ROUTINE_MPN_PREINV_DIVREM_1F(function)
- SPEED_ROUTINE_MPN_PREINV_DIVREM_1_CALL
- ((*function) (wp, s->size, s->xp, 0, s->r, dinv, shift))
- /* s->r is duplicated to form the multiplier, defaulting to
- MP_BASES_BIG_BASE_10. Not sure if that's particularly useful, but at
- least it provides some control. */
- #define SPEED_ROUTINE_MPN_UNARY_N(function,N)
- {
- mp_ptr wp;
- mp_size_t wn;
- unsigned i;
- double t;
- mp_limb_t yp[N];
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= N);
-
- TMP_MARK;
- wn = s->size + N-1;
- SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp);
- for (i = 0; i < N; i++)
- yp[i] = (s->r != 0 ? s->r : MP_BASES_BIG_BASE_10);
-
- speed_operand_src (s, s->xp, s->size);
- speed_operand_src (s, yp, (mp_size_t) N);
- speed_operand_dst (s, wp, wn);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- function (wp, s->xp, s->size, yp);
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_UNARY_2(function)
- SPEED_ROUTINE_MPN_UNARY_N (function, 2)
- #define SPEED_ROUTINE_MPN_UNARY_3(function)
- SPEED_ROUTINE_MPN_UNARY_N (function, 3)
- #define SPEED_ROUTINE_MPN_UNARY_4(function)
- SPEED_ROUTINE_MPN_UNARY_N (function, 4)
- #define SPEED_ROUTINE_MPN_UNARY_5(function)
- SPEED_ROUTINE_MPN_UNARY_N (function, 5)
- #define SPEED_ROUTINE_MPN_UNARY_6(function)
- SPEED_ROUTINE_MPN_UNARY_N (function, 6)
- #define SPEED_ROUTINE_MPN_UNARY_7(function)
- SPEED_ROUTINE_MPN_UNARY_N (function, 7)
- #define SPEED_ROUTINE_MPN_UNARY_8(function)
- SPEED_ROUTINE_MPN_UNARY_N (function, 8)
- /* For mpn_mul, mpn_mul_basecase, xsize=r, ysize=s->size. */
- #define SPEED_ROUTINE_MPN_MUL(function)
- {
- mp_ptr wp, xp;
- mp_size_t size1;
- unsigned i;
- double t;
- TMP_DECL;
-
- size1 = (s->r == 0 ? s->size : s->r);
-
- SPEED_RESTRICT_COND (s->size >= 1);
- SPEED_RESTRICT_COND (size1 >= s->size);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (wp, size1 + s->size, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp);
-
- speed_operand_src (s, xp, size1);
- speed_operand_src (s, s->yp, s->size);
- speed_operand_dst (s, wp, size1 + s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- function (wp, xp, size1, s->yp, s->size);
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_MUL_N_CALL(call)
- {
- mp_ptr wp;
- unsigned i;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp);
-
- speed_operand_src (s, s->xp, s->size);
- speed_operand_src (s, s->yp, s->size);
- speed_operand_dst (s, wp, 2*s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- call;
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_MUL_N(function)
- SPEED_ROUTINE_MPN_MUL_N_CALL (function (wp, s->xp, s->yp, s->size));
- #define SPEED_ROUTINE_MPN_MULLO_N_CALL(call)
- {
- mp_ptr wp;
- unsigned i;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);
-
- speed_operand_src (s, s->xp, s->size);
- speed_operand_src (s, s->yp, s->size);
- speed_operand_dst (s, wp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- call;
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_MULLO_N(function)
- SPEED_ROUTINE_MPN_MULLO_N_CALL (function (wp, s->xp, s->yp, s->size));
- /* For mpn_mul_basecase, xsize=r, ysize=s->size. */
- #define SPEED_ROUTINE_MPN_MULLO_BASECASE(function)
- {
- mp_ptr wp;
- unsigned i;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);
-
- speed_operand_src (s, s->xp, s->size);
- speed_operand_src (s, s->yp, s->size);
- speed_operand_dst (s, wp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- function (wp, s->xp, s->yp, s->size);
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL(call)
- {
- mp_ptr wp, tp;
- unsigned i;
- double t;
- mp_size_t itch;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- itch = mpn_mulmod_bnm1_itch (s->size, s->size, s->size);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (wp, 2 * s->size, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2);
-
- speed_operand_src (s, s->xp, s->size);
- speed_operand_src (s, s->yp, s->size);
- speed_operand_dst (s, wp, 2 * s->size);
- speed_operand_dst (s, tp, itch);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- call;
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_MULMOD_BNM1_ROUNDED(function)
- {
- mp_ptr wp, tp;
- unsigned i;
- double t;
- mp_size_t size, itch;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- size = mpn_mulmod_bnm1_next_size (s->size);
- itch = mpn_mulmod_bnm1_itch (size, size, size);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (wp, size, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2);
-
- speed_operand_src (s, s->xp, s->size);
- speed_operand_src (s, s->yp, s->size);
- speed_operand_dst (s, wp, size);
- speed_operand_dst (s, tp, itch);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- function (wp, size, s->xp, s->size, s->yp, s->size, tp);
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_MUL_N_TSPACE(call, tsize, minsize)
- {
- mp_ptr wp, tspace;
- unsigned i;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= minsize);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (tspace, tsize, s->align_wp2);
-
- speed_operand_src (s, s->xp, s->size);
- speed_operand_src (s, s->yp, s->size);
- speed_operand_dst (s, wp, 2*s->size);
- speed_operand_dst (s, tspace, tsize);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- call;
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_TOOM22_MUL_N(function)
- SPEED_ROUTINE_MPN_MUL_N_TSPACE
- (function (wp, s->xp, s->size, s->yp, s->size, tspace),
- mpn_toom22_mul_itch (s->size, s->size),
- MPN_TOOM22_MUL_MINSIZE)
- #define SPEED_ROUTINE_MPN_TOOM33_MUL_N(function)
- SPEED_ROUTINE_MPN_MUL_N_TSPACE
- (function (wp, s->xp, s->size, s->yp, s->size, tspace),
- mpn_toom33_mul_itch (s->size, s->size),
- MPN_TOOM33_MUL_MINSIZE)
- #define SPEED_ROUTINE_MPN_TOOM44_MUL_N(function)
- SPEED_ROUTINE_MPN_MUL_N_TSPACE
- (function (wp, s->xp, s->size, s->yp, s->size, tspace),
- mpn_toom44_mul_itch (s->size, s->size),
- MPN_TOOM44_MUL_MINSIZE)
- #define SPEED_ROUTINE_MPN_TOOM6H_MUL_N(function)
- SPEED_ROUTINE_MPN_MUL_N_TSPACE
- (function (wp, s->xp, s->size, s->yp, s->size, tspace),
- mpn_toom6h_mul_itch (s->size, s->size),
- MPN_TOOM6H_MUL_MINSIZE)
- #define SPEED_ROUTINE_MPN_TOOM8H_MUL_N(function)
- SPEED_ROUTINE_MPN_MUL_N_TSPACE
- (function (wp, s->xp, s->size, s->yp, s->size, tspace),
- mpn_toom8h_mul_itch (s->size, s->size),
- MPN_TOOM8H_MUL_MINSIZE)
- #define SPEED_ROUTINE_MPN_TOOM32_MUL(function)
- SPEED_ROUTINE_MPN_MUL_N_TSPACE
- (function (wp, s->xp, s->size, s->yp, 2*s->size/3, tspace),
- mpn_toom32_mul_itch (s->size, 2*s->size/3),
- MPN_TOOM32_MUL_MINSIZE)
- #define SPEED_ROUTINE_MPN_TOOM42_MUL(function)
- SPEED_ROUTINE_MPN_MUL_N_TSPACE
- (function (wp, s->xp, s->size, s->yp, s->size/2, tspace),
- mpn_toom42_mul_itch (s->size, s->size/2),
- MPN_TOOM42_MUL_MINSIZE)
- #define SPEED_ROUTINE_MPN_TOOM43_MUL(function)
- SPEED_ROUTINE_MPN_MUL_N_TSPACE
- (function (wp, s->xp, s->size, s->yp, s->size*3/4, tspace),
- mpn_toom43_mul_itch (s->size, s->size*3/4),
- MPN_TOOM43_MUL_MINSIZE)
- #define SPEED_ROUTINE_MPN_TOOM63_MUL(function)
- SPEED_ROUTINE_MPN_MUL_N_TSPACE
- (function (wp, s->xp, s->size, s->yp, s->size/2, tspace),
- mpn_toom63_mul_itch (s->size, s->size/2),
- MPN_TOOM63_MUL_MINSIZE)
- #define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL(function)
- SPEED_ROUTINE_MPN_MUL_N_TSPACE
- (function (wp, s->xp, s->size, s->yp, 17*s->size/24, tspace),
- mpn_toom32_mul_itch (s->size, 17*s->size/24),
- MPN_TOOM32_MUL_MINSIZE)
- #define SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL(function)
- SPEED_ROUTINE_MPN_MUL_N_TSPACE
- (function (wp, s->xp, s->size, s->yp, 17*s->size/24, tspace),
- mpn_toom43_mul_itch (s->size, 17*s->size/24),
- MPN_TOOM43_MUL_MINSIZE)
- #define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL(function)
- SPEED_ROUTINE_MPN_MUL_N_TSPACE
- (function (wp, s->xp, s->size, s->yp, 19*s->size/30, tspace),
- mpn_toom32_mul_itch (s->size, 19*s->size/30),
- MPN_TOOM32_MUL_MINSIZE)
- #define SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL(function)
- SPEED_ROUTINE_MPN_MUL_N_TSPACE
- (function (wp, s->xp, s->size, s->yp, 19*s->size/30, tspace),
- mpn_toom53_mul_itch (s->size, 19*s->size/30),
- MPN_TOOM53_MUL_MINSIZE)
- #define SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL(function)
- SPEED_ROUTINE_MPN_MUL_N_TSPACE
- (function (wp, s->xp, s->size, s->yp, 11*s->size/20, tspace),
- mpn_toom42_mul_itch (s->size, 11*s->size/20),
- MPN_TOOM42_MUL_MINSIZE)
- #define SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL(function)
- SPEED_ROUTINE_MPN_MUL_N_TSPACE
- (function (wp, s->xp, s->size, s->yp, 11*s->size/20, tspace),
- mpn_toom53_mul_itch (s->size, 11*s->size/20),
- MPN_TOOM53_MUL_MINSIZE)
- #define SPEED_ROUTINE_MPN_SQR_CALL(call)
- {
- mp_ptr wp;
- unsigned i;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp);
-
- speed_operand_src (s, s->xp, s->size);
- speed_operand_dst (s, wp, 2*s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- call;
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_SQR(function)
- SPEED_ROUTINE_MPN_SQR_CALL (function (wp, s->xp, s->size))
- #define SPEED_ROUTINE_MPN_SQR_DIAGONAL(function)
- SPEED_ROUTINE_MPN_SQR (function)
- #define SPEED_ROUTINE_MPN_SQR_TSPACE(call, tsize, minsize)
- {
- mp_ptr wp, tspace;
- unsigned i;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= minsize);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (tspace, tsize, s->align_wp2);
-
- speed_operand_src (s, s->xp, s->size);
- speed_operand_dst (s, wp, 2*s->size);
- speed_operand_dst (s, tspace, tsize);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- call;
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_TOOM2_SQR(function)
- SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace),
- mpn_toom2_sqr_itch (s->size),
- MPN_TOOM2_SQR_MINSIZE)
- #define SPEED_ROUTINE_MPN_TOOM3_SQR(function)
- SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace),
- mpn_toom3_sqr_itch (s->size),
- MPN_TOOM3_SQR_MINSIZE)
- #define SPEED_ROUTINE_MPN_TOOM4_SQR(function)
- SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace),
- mpn_toom4_sqr_itch (s->size),
- MPN_TOOM4_SQR_MINSIZE)
- #define SPEED_ROUTINE_MPN_TOOM6_SQR(function)
- SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace),
- mpn_toom6_sqr_itch (s->size),
- MPN_TOOM6_SQR_MINSIZE)
- #define SPEED_ROUTINE_MPN_TOOM8_SQR(function)
- SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace),
- mpn_toom8_sqr_itch (s->size),
- MPN_TOOM8_SQR_MINSIZE)
- #define SPEED_ROUTINE_MPN_MOD_CALL(call)
- {
- unsigned i;
-
- SPEED_RESTRICT_COND (s->size >= 0);
-
- speed_operand_src (s, s->xp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- call;
- while (--i != 0);
-
- return speed_endtime ();
- }
- #define SPEED_ROUTINE_MPN_MOD_1(function)
- SPEED_ROUTINE_MPN_MOD_CALL ((*function) (s->xp, s->size, s->r))
- #define SPEED_ROUTINE_MPN_MOD_1C(function)
- SPEED_ROUTINE_MPN_MOD_CALL ((*function)(s->xp, s->size, s->r, CNST_LIMB(0)))
- #define SPEED_ROUTINE_MPN_MODEXACT_1_ODD(function)
- SPEED_ROUTINE_MPN_MOD_CALL (function (s->xp, s->size, s->r));
- #define SPEED_ROUTINE_MPN_MODEXACT_1C_ODD(function)
- SPEED_ROUTINE_MPN_MOD_CALL (function (s->xp, s->size, s->r, CNST_LIMB(0)));
- #define SPEED_ROUTINE_MPN_MOD_34LSUB1(function)
- SPEED_ROUTINE_MPN_MOD_CALL ((*function) (s->xp, s->size))
- #define SPEED_ROUTINE_MPN_PREINV_MOD_1(function)
- {
- unsigned i;
- mp_limb_t inv;
-
- SPEED_RESTRICT_COND (s->size >= 0);
- SPEED_RESTRICT_COND (s->r & GMP_LIMB_HIGHBIT);
-
- invert_limb (inv, s->r);
- speed_operand_src (s, s->xp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- (*function) (s->xp, s->size, s->r, inv);
- while (--i != 0);
-
- return speed_endtime ();
- }
- #define SPEED_ROUTINE_MPN_MOD_1_1(function,pfunc)
- {
- unsigned i;
- mp_limb_t inv[4];
-
- SPEED_RESTRICT_COND (s->size >= 2);
-
- mpn_mod_1_1p_cps (inv, s->r);
- speed_operand_src (s, s->xp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do {
- pfunc (inv, s->r);
- function (s->xp, s->size, s->r, inv);
- } while (--i != 0);
-
- return speed_endtime ();
- }
- #define SPEED_ROUTINE_MPN_MOD_1_N(function,pfunc,N)
- {
- unsigned i;
- mp_limb_t inv[N+3];
-
- SPEED_RESTRICT_COND (s->size >= 1);
- SPEED_RESTRICT_COND (s->r <= ~(mp_limb_t)0 / N);
-
- speed_operand_src (s, s->xp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do {
- pfunc (inv, s->r);
- function (s->xp, s->size, s->r, inv);
- } while (--i != 0);
-
- return speed_endtime ();
- }
- /* A division of 2*s->size by s->size limbs */
- #define SPEED_ROUTINE_MPN_DC_DIVREM_CALL(call)
- {
- unsigned i;
- mp_ptr a, d, q, r;
- double t;
- gmp_pi1_t dinv;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (a, 2*s->size, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (d, s->size, s->align_yp);
- SPEED_TMP_ALLOC_LIMBS (q, s->size+1, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (r, s->size, s->align_wp2);
-
- MPN_COPY (a, s->xp, s->size);
- MPN_COPY (a+s->size, s->xp, s->size);
-
- MPN_COPY (d, s->yp, s->size);
-
- /* normalize the data */
- d[s->size-1] |= GMP_NUMB_HIGHBIT;
- a[2*s->size-1] = d[s->size-1] - 1;
-
- invert_pi1 (dinv, d[s->size-1], d[s->size-2]);
-
- speed_operand_src (s, a, 2*s->size);
- speed_operand_src (s, d, s->size);
- speed_operand_dst (s, q, s->size+1);
- speed_operand_dst (s, r, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- call;
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- /* A remainder 2*s->size by s->size limbs */
- #define SPEED_ROUTINE_MPZ_MOD(function)
- {
- unsigned i;
- mpz_t a, d, r;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- mpz_init_set_n (d, s->yp, s->size);
-
- /* high part less than d, low part a duplicate copied in */
- mpz_init_set_n (a, s->xp, s->size);
- mpz_mod (a, a, d);
- mpz_mul_2exp (a, a, GMP_LIMB_BITS * s->size);
- MPN_COPY (PTR(a), s->xp, s->size);
-
- mpz_init (r);
-
- speed_operand_src (s, PTR(a), SIZ(a));
- speed_operand_src (s, PTR(d), SIZ(d));
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- function (r, a, d);
- while (--i != 0);
- return speed_endtime ();
- }
- #define SPEED_ROUTINE_MPN_PI1_DIV(function, INV, DMIN, QMIN)
- {
- unsigned i;
- mp_ptr dp, tp, ap, qp;
- gmp_pi1_t inv;
- double t;
- mp_size_t size1;
- TMP_DECL;
-
- size1 = (s->r == 0 ? 2 * s->size : s->r);
-
- SPEED_RESTRICT_COND (s->size >= DMIN);
- SPEED_RESTRICT_COND (size1 - s->size >= QMIN);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (ap, size1, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);
- SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_wp2);
-
- /* we don't fill in dividend completely when size1 > s->size */
- MPN_COPY (ap, s->xp, s->size);
- MPN_COPY (ap + size1 - s->size, s->xp, s->size);
-
- MPN_COPY (dp, s->yp, s->size);
-
- /* normalize the data */
- dp[s->size-1] |= GMP_NUMB_HIGHBIT;
- ap[size1 - 1] = dp[s->size - 1] - 1;
-
- invert_pi1 (inv, dp[s->size-1], dp[s->size-2]);
-
- speed_operand_src (s, ap, size1);
- speed_operand_dst (s, tp, size1);
- speed_operand_src (s, dp, s->size);
- speed_operand_dst (s, qp, size1 - s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do {
- MPN_COPY (tp, ap, size1);
- function (qp, tp, size1, dp, s->size, INV);
- } while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_MU_DIV_Q(function,itchfn)
- {
- unsigned i;
- mp_ptr dp, tp, qp, scratch;
- double t;
- mp_size_t itch;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 2);
-
- itch = itchfn (2 * s->size, s->size, 0);
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);
- SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2);
-
- MPN_COPY (tp, s->xp, s->size);
- MPN_COPY (tp+s->size, s->xp, s->size);
-
- /* normalize the data */
- dp[s->size-1] |= GMP_NUMB_HIGHBIT;
- tp[2*s->size-1] = dp[s->size-1] - 1;
-
- speed_operand_dst (s, qp, s->size);
- speed_operand_src (s, tp, 2 * s->size);
- speed_operand_src (s, dp, s->size);
- speed_operand_dst (s, scratch, itch);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do {
- function (qp, tp, 2 * s->size, dp, s->size, scratch);
- } while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_MU_DIV_QR(function,itchfn)
- {
- unsigned i;
- mp_ptr dp, tp, qp, rp, scratch;
- double t;
- mp_size_t size1, itch;
- TMP_DECL;
-
- size1 = (s->r == 0 ? 2 * s->size : s->r);
-
- SPEED_RESTRICT_COND (s->size >= 2);
- SPEED_RESTRICT_COND (size1 >= s->size);
-
- itch = itchfn (size1, s->size, 0);
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);
- SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2);
- SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */
-
- /* we don't fill in dividend completely when size1 > s->size */
- MPN_COPY (tp, s->xp, s->size);
- MPN_COPY (tp + size1 - s->size, s->xp, s->size);
-
- MPN_COPY (dp, s->yp, s->size);
-
- /* normalize the data */
- dp[s->size-1] |= GMP_NUMB_HIGHBIT;
- tp[size1 - 1] = dp[s->size - 1] - 1;
-
- speed_operand_dst (s, qp, size1 - s->size);
- speed_operand_dst (s, rp, s->size);
- speed_operand_src (s, tp, size1);
- speed_operand_src (s, dp, s->size);
- speed_operand_dst (s, scratch, itch);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do {
- function (qp, rp, tp, size1, dp, s->size, scratch);
- } while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_MUPI_DIV_QR(function,itchfn)
- {
- unsigned i;
- mp_ptr dp, tp, qp, rp, ip, scratch;
- double t;
- mp_size_t size1, itch;
- TMP_DECL;
-
- size1 = (s->r == 0 ? 2 * s->size : s->r);
-
- SPEED_RESTRICT_COND (s->size >= 2);
- SPEED_RESTRICT_COND (size1 >= s->size);
-
- itch = itchfn (size1, s->size, 0);
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);
- SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2);
- SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */
- SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_wp2); /* alignment? */
-
- /* we don't fill in dividend completely when size1 > s->size */
- MPN_COPY (tp, s->xp, s->size);
- MPN_COPY (tp + size1 - s->size, s->xp, s->size);
-
- MPN_COPY (dp, s->yp, s->size);
-
- /* normalize the data */
- dp[s->size-1] |= GMP_NUMB_HIGHBIT;
- tp[size1 - 1] = dp[s->size-1] - 1;
-
- mpn_invert (ip, dp, s->size, NULL);
-
- speed_operand_dst (s, qp, size1 - s->size);
- speed_operand_dst (s, rp, s->size);
- speed_operand_src (s, tp, size1);
- speed_operand_src (s, dp, s->size);
- speed_operand_src (s, ip, s->size);
- speed_operand_dst (s, scratch, itch);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do {
- function (qp, rp, tp, size1, dp, s->size, ip, s->size, scratch);
- } while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_PI1_BDIV_QR(function)
- {
- unsigned i;
- mp_ptr dp, tp, ap, qp;
- mp_limb_t inv;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);
- SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size, s->align_wp2);
-
- MPN_COPY (ap, s->xp, s->size);
- MPN_COPY (ap+s->size, s->xp, s->size);
-
- /* divisor must be odd */
- MPN_COPY (dp, s->yp, s->size);
- dp[0] |= 1;
- binvert_limb (inv, dp[0]);
- inv = -inv;
-
- speed_operand_src (s, ap, 2*s->size);
- speed_operand_dst (s, tp, 2*s->size);
- speed_operand_src (s, dp, s->size);
- speed_operand_dst (s, qp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do {
- MPN_COPY (tp, ap, 2*s->size);
- function (qp, tp, 2*s->size, dp, s->size, inv);
- } while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_PI1_BDIV_Q(function)
- {
- unsigned i;
- mp_ptr dp, tp, qp;
- mp_limb_t inv;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);
- SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (tp, s->size, s->align_wp2);
-
- /* divisor must be odd */
- MPN_COPY (dp, s->yp, s->size);
- dp[0] |= 1;
- binvert_limb (inv, dp[0]);
- inv = -inv;
-
- speed_operand_src (s, s->xp, s->size);
- speed_operand_dst (s, tp, s->size);
- speed_operand_src (s, dp, s->size);
- speed_operand_dst (s, qp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do {
- MPN_COPY (tp, s->xp, s->size);
- function (qp, tp, s->size, dp, s->size, inv);
- } while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_MU_BDIV_Q(function,itchfn)
- {
- unsigned i;
- mp_ptr dp, qp, scratch;
- double t;
- mp_size_t itch;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 2);
-
- itch = itchfn (s->size, s->size);
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);
- SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2);
-
- /* divisor must be odd */
- MPN_COPY (dp, s->yp, s->size);
- dp[0] |= 1;
-
- speed_operand_dst (s, qp, s->size);
- speed_operand_src (s, s->xp, s->size);
- speed_operand_src (s, dp, s->size);
- speed_operand_dst (s, scratch, itch);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do {
- function (qp, s->xp, s->size, dp, s->size, scratch);
- } while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_MU_BDIV_QR(function,itchfn)
- {
- unsigned i;
- mp_ptr dp, tp, qp, rp, scratch;
- double t;
- mp_size_t itch;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 2);
-
- itch = itchfn (2 * s->size, s->size);
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);
- SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2);
- SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */
-
- MPN_COPY (tp, s->xp, s->size);
- MPN_COPY (tp+s->size, s->xp, s->size);
-
- /* divisor must be odd */
- MPN_COPY (dp, s->yp, s->size);
- dp[0] |= 1;
-
- speed_operand_dst (s, qp, s->size);
- speed_operand_dst (s, rp, s->size);
- speed_operand_src (s, tp, 2 * s->size);
- speed_operand_src (s, dp, s->size);
- speed_operand_dst (s, scratch, itch);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do {
- function (qp, rp, tp, 2 * s->size, dp, s->size, scratch);
- } while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_INVERT(function,itchfn)
- {
- long i;
- mp_ptr up, tp, ip;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp);
- SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp);
-
- MPN_COPY (up, s->xp, s->size);
-
- /* normalize the data */
- up[s->size-1] |= GMP_NUMB_HIGHBIT;
-
- speed_operand_src (s, up, s->size);
- speed_operand_dst (s, tp, s->size);
- speed_operand_dst (s, ip, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- function (ip, up, s->size, tp);
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_INVERTAPPR(function,itchfn)
- {
- long i;
- mp_ptr up, tp, ip;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp);
- SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp);
-
- MPN_COPY (up, s->xp, s->size);
-
- /* normalize the data */
- up[s->size-1] |= GMP_NUMB_HIGHBIT;
-
- speed_operand_src (s, up, s->size);
- speed_operand_dst (s, tp, s->size);
- speed_operand_dst (s, ip, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- function (ip, up, s->size, tp);
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_NI_INVERTAPPR(function,itchfn)
- {
- long i;
- mp_ptr up, tp, ip;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 3);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp);
- SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp);
-
- MPN_COPY (up, s->xp, s->size);
-
- /* normalize the data */
- up[s->size-1] |= GMP_NUMB_HIGHBIT;
-
- speed_operand_src (s, up, s->size);
- speed_operand_dst (s, tp, s->size);
- speed_operand_dst (s, ip, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- function (ip, up, s->size, tp);
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_BINVERT(function,itchfn)
- {
- long i;
- mp_ptr up, tp, ip;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp);
- SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp);
-
- MPN_COPY (up, s->xp, s->size);
-
- /* normalize the data */
- up[0] |= 1;
-
- speed_operand_src (s, up, s->size);
- speed_operand_dst (s, tp, s->size);
- speed_operand_dst (s, ip, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- function (ip, up, s->size, tp);
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_REDC_1(function)
- {
- unsigned i;
- mp_ptr cp, mp, tp, ap;
- mp_limb_t inv;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (mp, s->size, s->align_yp);
- SPEED_TMP_ALLOC_LIMBS (cp, s->size, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2);
-
- MPN_COPY (ap, s->xp, s->size);
- MPN_COPY (ap+s->size, s->xp, s->size);
-
- /* modulus must be odd */
- MPN_COPY (mp, s->yp, s->size);
- mp[0] |= 1;
- binvert_limb (inv, mp[0]);
- inv = -inv;
-
- speed_operand_src (s, ap, 2*s->size+1);
- speed_operand_dst (s, tp, 2*s->size+1);
- speed_operand_src (s, mp, s->size);
- speed_operand_dst (s, cp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do {
- MPN_COPY (tp, ap, 2*s->size);
- function (cp, tp, mp, s->size, inv);
- } while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_REDC_2(function)
- {
- unsigned i;
- mp_ptr cp, mp, tp, ap;
- mp_limb_t invp[2];
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (mp, s->size, s->align_yp);
- SPEED_TMP_ALLOC_LIMBS (cp, s->size, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2);
-
- MPN_COPY (ap, s->xp, s->size);
- MPN_COPY (ap+s->size, s->xp, s->size);
-
- /* modulus must be odd */
- MPN_COPY (mp, s->yp, s->size);
- mp[0] |= 1;
- mpn_binvert (invp, mp, 2, tp);
- invp[0] = -invp[0]; invp[1] = ~invp[1];
-
- speed_operand_src (s, ap, 2*s->size+1);
- speed_operand_dst (s, tp, 2*s->size+1);
- speed_operand_src (s, mp, s->size);
- speed_operand_dst (s, cp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do {
- MPN_COPY (tp, ap, 2*s->size);
- function (cp, tp, mp, s->size, invp);
- } while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_REDC_N(function)
- {
- unsigned i;
- mp_ptr cp, mp, tp, ap, invp;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size > 8);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (mp, s->size, s->align_yp);
- SPEED_TMP_ALLOC_LIMBS (cp, s->size, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2);
- SPEED_TMP_ALLOC_LIMBS (invp, s->size, s->align_wp2); /* align? */
-
- MPN_COPY (ap, s->xp, s->size);
- MPN_COPY (ap+s->size, s->xp, s->size);
-
- /* modulus must be odd */
- MPN_COPY (mp, s->yp, s->size);
- mp[0] |= 1;
- mpn_binvert (invp, mp, s->size, tp);
-
- speed_operand_src (s, ap, 2*s->size+1);
- speed_operand_dst (s, tp, 2*s->size+1);
- speed_operand_src (s, mp, s->size);
- speed_operand_dst (s, cp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do {
- MPN_COPY (tp, ap, 2*s->size);
- function (cp, tp, mp, s->size, invp);
- } while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_POPCOUNT(function)
- {
- unsigned i;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- speed_operand_src (s, s->xp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- function (s->xp, s->size);
- while (--i != 0);
-
- return speed_endtime ();
- }
- #define SPEED_ROUTINE_MPN_HAMDIST(function)
- {
- unsigned i;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- speed_operand_src (s, s->xp, s->size);
- speed_operand_src (s, s->yp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- function (s->xp, s->yp, s->size);
- while (--i != 0);
-
- return speed_endtime ();
- }
- #define SPEED_ROUTINE_MPZ_UI(function)
- {
- mpz_t z;
- unsigned i;
- double t;
-
- SPEED_RESTRICT_COND (s->size >= 0);
-
- mpz_init (z);
-
- speed_starttime ();
- i = s->reps;
- do
- function (z, s->size);
- while (--i != 0);
- t = speed_endtime ();
-
- mpz_clear (z);
- return t;
- }
- #define SPEED_ROUTINE_MPZ_FAC_UI(function) SPEED_ROUTINE_MPZ_UI(function)
- #define SPEED_ROUTINE_MPZ_FIB_UI(function) SPEED_ROUTINE_MPZ_UI(function)
- #define SPEED_ROUTINE_MPZ_LUCNUM_UI(function) SPEED_ROUTINE_MPZ_UI(function)
- #define SPEED_ROUTINE_MPZ_2_UI(function)
- {
- mpz_t z, z2;
- unsigned i;
- double t;
-
- SPEED_RESTRICT_COND (s->size >= 0);
-
- mpz_init (z);
- mpz_init (z2);
-
- speed_starttime ();
- i = s->reps;
- do
- function (z, z2, s->size);
- while (--i != 0);
- t = speed_endtime ();
-
- mpz_clear (z);
- mpz_clear (z2);
- return t;
- }
- #define SPEED_ROUTINE_MPZ_FIB2_UI(function) SPEED_ROUTINE_MPZ_2_UI(function)
- #define SPEED_ROUTINE_MPZ_LUCNUM2_UI(function) SPEED_ROUTINE_MPZ_2_UI(function)
- #define SPEED_ROUTINE_MPN_FIB2_UI(function)
- {
- mp_ptr fp, f1p;
- mp_size_t alloc;
- unsigned i;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 0);
-
- TMP_MARK;
- alloc = MPN_FIB2_SIZE (s->size);
- SPEED_TMP_ALLOC_LIMBS (fp, alloc, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (f1p, alloc, s->align_yp);
-
- speed_starttime ();
- i = s->reps;
- do
- function (fp, f1p, s->size);
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- /* Calculate b^e mod m for random b and m of s->size limbs and random e of 6
- limbs. m is forced to odd so that redc can be used. e is limited in
- size so the calculation doesn't take too long. */
- #define SPEED_ROUTINE_MPZ_POWM(function)
- {
- mpz_t r, b, e, m;
- unsigned i;
- double t;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- mpz_init (r);
- mpz_init_set_n (b, s->xp, s->size);
- mpz_init_set_n (m, s->yp, s->size);
- mpz_setbit (m, 0); /* force m to odd */
- mpz_init_set_n (e, s->xp_block, 6);
-
- speed_starttime ();
- i = s->reps;
- do
- function (r, b, e, m);
- while (--i != 0);
- t = speed_endtime ();
-
- mpz_clear (r);
- mpz_clear (b);
- mpz_clear (e);
- mpz_clear (m);
- return t;
- }
- /* (m-2)^0xAAAAAAAA mod m */
- #define SPEED_ROUTINE_MPZ_POWM_UI(function)
- {
- mpz_t r, b, m;
- unsigned long e;
- unsigned i;
- double t;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- mpz_init (r);
-
- /* force m to odd */
- mpz_init (m);
- mpz_set_n (m, s->xp, s->size);
- PTR(m)[0] |= 1;
-
- e = (~ (unsigned long) 0) / 3;
- if (s->r != 0)
- e = s->r;
-
- mpz_init_set (b, m);
- mpz_sub_ui (b, b, 2);
- /* printf ("%Xn", mpz_get_ui(m)); */
- i = s->reps;
- speed_starttime ();
- do
- function (r, b, e, m);
- while (--i != 0);
- t = speed_endtime ();
-
- mpz_clear (r);
- mpz_clear (b);
- mpz_clear (m);
- return t;
- }
- #define SPEED_ROUTINE_MPN_ADDSUB_CALL(call)
- {
- mp_ptr wp, wp2, xp, yp;
- unsigned i;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 0);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (wp2, s->size, s->align_wp2);
- xp = s->xp;
- yp = s->yp;
-
- if (s->r == 0) ;
- else if (s->r == 1) { xp = wp; }
- else if (s->r == 2) { yp = wp2; }
- else if (s->r == 3) { xp = wp; yp = wp2; }
- else if (s->r == 4) { xp = wp2; yp = wp; }
- else {
- TMP_FREE;
- return -1.0;
- }
- if (xp != s->xp) MPN_COPY (xp, s->xp, s->size);
- if (yp != s->yp) MPN_COPY (yp, s->yp, s->size);
-
- speed_operand_src (s, xp, s->size);
- speed_operand_src (s, yp, s->size);
- speed_operand_dst (s, wp, s->size);
- speed_operand_dst (s, wp2, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- call;
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_ADDSUB_N(function)
- SPEED_ROUTINE_MPN_ADDSUB_CALL
- (function (wp, wp2, xp, yp, s->size));
- #define SPEED_ROUTINE_MPN_ADDSUB_NC(function)
- SPEED_ROUTINE_MPN_ADDSUB_CALL
- (function (wp, wp2, xp, yp, s->size, 0));
- /* Doing an Nx1 gcd with the given r. */
- #define SPEED_ROUTINE_MPN_GCD_1N(function)
- {
- mp_ptr xp;
- unsigned i;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
- SPEED_RESTRICT_COND (s->r != 0);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (xp, s->size, s->align_xp);
- MPN_COPY (xp, s->xp, s->size);
- xp[0] |= refmpn_zero_p (xp, s->size);
-
- speed_operand_src (s, s->xp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- function (xp, s->size, s->r);
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- /* SPEED_BLOCK_SIZE many one GCDs of s->size bits each. */
- #define SPEED_ROUTINE_MPN_GCD_1_CALL(setup, call)
- {
- unsigned i, j;
- mp_ptr px, py;
- mp_limb_t x_mask, y_mask;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
- SPEED_RESTRICT_COND (s->size <= mp_bits_per_limb);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (px, SPEED_BLOCK_SIZE, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (py, SPEED_BLOCK_SIZE, s->align_yp);
- MPN_COPY (px, s->xp_block, SPEED_BLOCK_SIZE);
- MPN_COPY (py, s->yp_block, SPEED_BLOCK_SIZE);
-
- x_mask = MP_LIMB_T_LOWBITMASK (s->size);
- y_mask = MP_LIMB_T_LOWBITMASK (s->r != 0 ? s->r : s->size);
- for (i = 0; i < SPEED_BLOCK_SIZE; i++)
- {
- px[i] &= x_mask; px[i] += (px[i] == 0);
- py[i] &= y_mask; py[i] += (py[i] == 0);
- setup;
- }
-
- speed_operand_src (s, px, SPEED_BLOCK_SIZE);
- speed_operand_src (s, py, SPEED_BLOCK_SIZE);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- {
- j = SPEED_BLOCK_SIZE;
- do
- {
- call;
- }
- while (--j != 0);
- }
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
-
- s->time_divisor = SPEED_BLOCK_SIZE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_GCD_1(function)
- SPEED_ROUTINE_MPN_GCD_1_CALL( , function (&px[j-1], 1, py[j-1]))
- #define SPEED_ROUTINE_MPN_JACBASE(function)
- SPEED_ROUTINE_MPN_GCD_1_CALL
- ({
- /* require x<y, y odd, y!=1 */
- px[i] %= py[i];
- px[i] |= 1;
- py[i] |= 1;
- if (py[i]==1) py[i]=3;
- },
- function (px[j-1], py[j-1], 0))
- /* Run some GCDs of s->size limbs each. The number of different data values
- is decreased as s->size**2, since GCD is a quadratic algorithm.
- SPEED_ROUTINE_MPN_GCD runs more times than SPEED_ROUTINE_MPN_GCDEXT
- though, because the plain gcd is about twice as fast as gcdext. */
- #define SPEED_ROUTINE_MPN_GCD_CALL(datafactor, call)
- {
- unsigned i;
- mp_size_t j, pieces, psize;
- mp_ptr wp, wp2, xtmp, ytmp, px, py;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (xtmp, s->size+1, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (ytmp, s->size+1, s->align_yp);
- SPEED_TMP_ALLOC_LIMBS (wp, s->size+1, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (wp2, s->size+1, s->align_wp2);
-
- pieces = SPEED_BLOCK_SIZE * datafactor / s->size / s->size;
- pieces = MIN (pieces, SPEED_BLOCK_SIZE / s->size);
- pieces = MAX (pieces, 1);
-
- psize = pieces * s->size;
- px = TMP_ALLOC_LIMBS (psize);
- py = TMP_ALLOC_LIMBS (psize);
- MPN_COPY (px, pieces==1 ? s->xp : s->xp_block, psize);
- MPN_COPY (py, pieces==1 ? s->yp : s->yp_block, psize);
-
- /* Requirements: x >= y, y must be odd, high limbs != 0.
- No need to ensure random numbers are really great. */
- for (j = 0; j < pieces; j++)
- {
- mp_ptr x = px + j * s->size;
- mp_ptr y = py + j * s->size;
- if (x[s->size - 1] == 0) x[s->size - 1] = 1;
- if (y[s->size - 1] == 0) y[s->size - 1] = 1;
-
- if (x[s->size - 1] < y[s->size - 1])
- MP_LIMB_T_SWAP (x[s->size - 1], y[s->size - 1]);
- else if (x[s->size - 1] == y[s->size - 1])
- {
- x[s->size - 1] = 2;
- y[s->size - 1] = 1;
- }
- y[0] |= 1;
- }
-
- speed_operand_src (s, px, psize);
- speed_operand_src (s, py, psize);
- speed_operand_dst (s, xtmp, s->size);
- speed_operand_dst (s, ytmp, s->size);
- speed_operand_dst (s, wp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- {
- j = pieces;
- do
- {
- MPN_COPY (xtmp, px+(j - 1)*s->size, s->size);
- MPN_COPY (ytmp, py+(j - 1)*s->size, s->size);
- call;
- }
- while (--j != 0);
- }
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
-
- s->time_divisor = pieces;
- return t;
- }
- #define SPEED_ROUTINE_MPN_GCD(function)
- SPEED_ROUTINE_MPN_GCD_CALL (8, function (wp, xtmp, s->size, ytmp, s->size))
- #define SPEED_ROUTINE_MPN_GCDEXT(function)
- SPEED_ROUTINE_MPN_GCD_CALL
- (4, { mp_size_t wp2size;
- function (wp, wp2, &wp2size, xtmp, s->size, ytmp, s->size); })
- #define SPEED_ROUTINE_MPN_GCDEXT_ONE(function)
- {
- unsigned i;
- mp_size_t j, pieces, psize, wp2size;
- mp_ptr wp, wp2, xtmp, ytmp, px, py;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- TMP_MARK;
-
- SPEED_TMP_ALLOC_LIMBS (xtmp, s->size+1, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (ytmp, s->size+1, s->align_yp);
- MPN_COPY (xtmp, s->xp, s->size);
- MPN_COPY (ytmp, s->yp, s->size);
-
- SPEED_TMP_ALLOC_LIMBS (wp, s->size+1, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (wp2, s->size+1, s->align_wp2);
-
- pieces = SPEED_BLOCK_SIZE / 3;
- psize = 3 * pieces;
- px = TMP_ALLOC_LIMBS (psize);
- py = TMP_ALLOC_LIMBS (psize);
- MPN_COPY (px, s->xp_block, psize);
- MPN_COPY (py, s->yp_block, psize);
-
- /* x must have at least as many bits as y,
- high limbs must be non-zero */
- for (j = 0; j < pieces; j++)
- {
- mp_ptr x = px+3*j;
- mp_ptr y = py+3*j;
- x[2] += (x[2] == 0);
- y[2] += (y[2] == 0);
- if (x[2] < y[2])
- MP_LIMB_T_SWAP (x[2], y[2]);
- }
-
- speed_operand_src (s, px, psize);
- speed_operand_src (s, py, psize);
- speed_operand_dst (s, xtmp, s->size);
- speed_operand_dst (s, ytmp, s->size);
- speed_operand_dst (s, wp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- {
- mp_ptr x = px;
- mp_ptr y = py;
- mp_ptr xth = &xtmp[s->size-3];
- mp_ptr yth = &ytmp[s->size-3];
- j = pieces;
- do
- {
- xth[0] = x[0], xth[1] = x[1], xth[2] = x[2];
- yth[0] = y[0], yth[1] = y[1], yth[2] = y[2];
-
- ytmp[0] |= 1; /* y must be odd, */
-
- function (wp, wp2, &wp2size, xtmp, s->size, ytmp, s->size);
-
- x += 3;
- y += 3;
- }
- while (--j != 0);
- }
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
-
- s->time_divisor = pieces;
- return t;
- }
- #define SPEED_ROUTINE_MPZ_JACOBI(function)
- {
- mpz_t a, b;
- unsigned i;
- mp_size_t j, pieces, psize;
- mp_ptr px, py;
- double t;
- TMP_DECL;
-
- TMP_MARK;
- pieces = SPEED_BLOCK_SIZE / MAX (s->size, 1);
- pieces = MAX (pieces, 1);
- s->time_divisor = pieces;
-
- psize = pieces * s->size;
- px = TMP_ALLOC_LIMBS (psize);
- py = TMP_ALLOC_LIMBS (psize);
- MPN_COPY (px, pieces==1 ? s->xp : s->xp_block, psize);
- MPN_COPY (py, pieces==1 ? s->yp : s->yp_block, psize);
-
- for (j = 0; j < pieces; j++)
- {
- mp_ptr x = px+j*s->size;
- mp_ptr y = py+j*s->size;
-
- /* y odd */
- y[0] |= 1;
-
- /* high limbs non-zero */
- if (x[s->size-1] == 0) x[s->size-1] = 1;
- if (y[s->size-1] == 0) y[s->size-1] = 1;
- }
-
- SIZ(a) = s->size;
- SIZ(b) = s->size;
-
- speed_operand_src (s, px, psize);
- speed_operand_src (s, py, psize);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- {
- j = pieces;
- do
- {
- PTR(a) = px+(j-1)*s->size;
- PTR(b) = py+(j-1)*s->size;
- function (a, b);
- }
- while (--j != 0);
- }
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_DIVREM_2(function)
- {
- mp_ptr wp, xp;
- mp_limb_t yp[2];
- unsigned i;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 2);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (xp, s->size, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);
-
- /* source is destroyed */
- MPN_COPY (xp, s->xp, s->size);
-
- /* divisor must be normalized */
- MPN_COPY (yp, s->yp_block, 2);
- yp[1] |= GMP_NUMB_HIGHBIT;
-
- speed_operand_src (s, xp, s->size);
- speed_operand_src (s, yp, 2);
- speed_operand_dst (s, wp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- function (wp, 0, xp, s->size, yp);
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MODLIMB_INVERT(function)
- {
- unsigned i, j;
- mp_ptr xp;
- mp_limb_t n = 1;
- double t;
-
- xp = s->xp_block-1;
-
- speed_operand_src (s, s->xp_block, SPEED_BLOCK_SIZE);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- {
- j = SPEED_BLOCK_SIZE;
- do
- {
- /* randomized but successively dependent */
- n += (xp[j] << 1);
-
- function (n, n);
- }
- while (--j != 0);
- }
- while (--i != 0);
- t = speed_endtime ();
-
- /* make sure the compiler won't optimize away n */
- noop_1 (n);
-
- s->time_divisor = SPEED_BLOCK_SIZE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_SQRTREM(function)
- {
- mp_ptr wp, wp2;
- unsigned i;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (wp2, s->size, s->align_wp2);
-
- speed_operand_src (s, s->xp, s->size);
- speed_operand_dst (s, wp, s->size);
- speed_operand_dst (s, wp2, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- function (wp, wp2, s->xp, s->size);
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_ROOTREM(function)
- {
- mp_ptr wp, wp2;
- unsigned i;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (wp2, s->size, s->align_wp2);
-
- speed_operand_src (s, s->xp, s->size);
- speed_operand_dst (s, wp, s->size);
- speed_operand_dst (s, wp2, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- function (wp, wp2, s->xp, s->size, s->r);
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- /* s->size controls the number of limbs in the input, s->r is the base, or
- decimal by default. */
- #define SPEED_ROUTINE_MPN_GET_STR(function)
- {
- unsigned char *wp;
- mp_size_t wn;
- mp_ptr xp;
- int base;
- unsigned i;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- base = s->r == 0 ? 10 : s->r;
- SPEED_RESTRICT_COND (base >= 2 && base <= 256);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (xp, s->size + 1, s->align_xp);
-
- MPN_SIZEINBASE (wn, s->xp, s->size, base);
- wp = TMP_ALLOC (wn);
-
- /* use this during development to guard against overflowing wp */
- /*
- MPN_COPY (xp, s->xp, s->size);
- ASSERT_ALWAYS (mpn_get_str (wp, base, xp, s->size) <= wn);
- */
-
- speed_operand_src (s, s->xp, s->size);
- speed_operand_dst (s, xp, s->size);
- speed_operand_dst (s, (mp_ptr) wp, wn/BYTES_PER_MP_LIMB);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- {
- MPN_COPY (xp, s->xp, s->size);
- function (wp, base, xp, s->size);
- }
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- /* s->size controls the number of digits in the input, s->r is the base, or
- decimal by default. */
- #define SPEED_ROUTINE_MPN_SET_STR_CALL(call)
- {
- unsigned char *xp;
- mp_ptr wp;
- mp_size_t wn;
- unsigned i;
- int base;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 1);
-
- base = s->r == 0 ? 10 : s->r;
- SPEED_RESTRICT_COND (base >= 2 && base <= 256);
-
- TMP_MARK;
-
- xp = TMP_ALLOC (s->size);
- for (i = 0; i < s->size; i++)
- xp[i] = s->xp[i] % base;
-
- wn = ((mp_size_t) (s->size / mp_bases[base].chars_per_bit_exactly))
- / GMP_LIMB_BITS + 2;
- SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp);
-
- /* use this during development to check wn is big enough */
- /*
- ASSERT_ALWAYS (mpn_set_str (wp, xp, s->size, base) <= wn);
- */
-
- speed_operand_src (s, (mp_ptr) xp, s->size/BYTES_PER_MP_LIMB);
- speed_operand_dst (s, wp, wn);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- call;
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- /* Run an accel gcd find_a() function over various data values. A set of
- values is used in case some run particularly fast or slow. The size
- parameter is ignored, the amount of data tested is fixed. */
- #define SPEED_ROUTINE_MPN_GCD_FINDA(function)
- {
- unsigned i, j;
- mp_limb_t cp[SPEED_BLOCK_SIZE][2];
- double t;
- TMP_DECL;
-
- TMP_MARK;
-
- /* low must be odd, high must be non-zero */
- for (i = 0; i < SPEED_BLOCK_SIZE; i++)
- {
- cp[i][0] = s->xp_block[i] | 1;
- cp[i][1] = s->yp_block[i] + (s->yp_block[i] == 0);
- }
-
- speed_operand_src (s, &cp[0][0], 2*SPEED_BLOCK_SIZE);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- {
- j = SPEED_BLOCK_SIZE;
- do
- {
- function (cp[j-1]);
- }
- while (--j != 0);
- }
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
-
- s->time_divisor = SPEED_BLOCK_SIZE;
- return t;
- }
- /* "call" should do "count_foo_zeros(c,n)".
- Give leading=1 if foo is leading zeros, leading=0 for trailing.
- Give zero=1 if n=0 is allowed in the call, zero=0 if not. */
- #define SPEED_ROUTINE_COUNT_ZEROS_A(leading, zero)
- {
- mp_ptr xp;
- int i, c;
- unsigned j;
- mp_limb_t n;
- double t;
- TMP_DECL;
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (xp, SPEED_BLOCK_SIZE, s->align_xp);
-
- if (! speed_routine_count_zeros_setup (s, xp, leading, zero))
- return -1.0;
- speed_operand_src (s, xp, SPEED_BLOCK_SIZE);
- speed_cache_fill (s);
-
- c = 0;
- speed_starttime ();
- j = s->reps;
- do {
- for (i = 0; i < SPEED_BLOCK_SIZE; i++)
- {
- n = xp[i];
- n ^= c;
- #define SPEED_ROUTINE_COUNT_ZEROS_B()
- }
- } while (--j != 0);
- t = speed_endtime ();
-
- /* don't let c go dead */
- noop_1 (c);
-
- s->time_divisor = SPEED_BLOCK_SIZE;
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_COUNT_ZEROS_C(call, leading, zero)
- do {
- SPEED_ROUTINE_COUNT_ZEROS_A (leading, zero);
- call;
- SPEED_ROUTINE_COUNT_ZEROS_B ();
- } while (0)
- #define SPEED_ROUTINE_COUNT_LEADING_ZEROS_C(call,zero)
- SPEED_ROUTINE_COUNT_ZEROS_C (call, 1, zero)
- #define SPEED_ROUTINE_COUNT_LEADING_ZEROS(fun)
- SPEED_ROUTINE_COUNT_ZEROS_C (fun (c, n), 1, 0)
- #define SPEED_ROUTINE_COUNT_TRAILING_ZEROS_C(call,zero)
- SPEED_ROUTINE_COUNT_ZEROS_C (call, 0, zero)
- #define SPEED_ROUTINE_COUNT_TRAILING_ZEROS(call)
- SPEED_ROUTINE_COUNT_ZEROS_C (fun (c, n), 0, 0)
- #define SPEED_ROUTINE_INVERT_LIMB_CALL(call)
- {
- unsigned i, j;
- mp_limb_t d, dinv=0;
- mp_ptr xp = s->xp_block - 1;
-
- s->time_divisor = SPEED_BLOCK_SIZE;
-
- speed_starttime ();
- i = s->reps;
- do
- {
- j = SPEED_BLOCK_SIZE;
- do
- {
- d = dinv ^ xp[j];
- d |= GMP_LIMB_HIGHBIT;
- do { call; } while (0);
- }
- while (--j != 0);
- }
- while (--i != 0);
-
- /* don't let the compiler optimize everything away */
- noop_1 (dinv);
-
- return speed_endtime();
- }
- #endif
- #define SPEED_ROUTINE_MPN_BACK_TO_BACK(function)
- {
- unsigned i;
- speed_starttime ();
- i = s->reps;
- do
- function ();
- while (--i != 0);
- return speed_endtime ();
- }
- #define SPEED_ROUTINE_MPN_ZERO_CALL(call)
- {
- mp_ptr wp;
- unsigned i;
- double t;
- TMP_DECL;
-
- SPEED_RESTRICT_COND (s->size >= 0);
-
- TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);
- speed_operand_dst (s, wp, s->size);
- speed_cache_fill (s);
-
- speed_starttime ();
- i = s->reps;
- do
- call;
- while (--i != 0);
- t = speed_endtime ();
-
- TMP_FREE;
- return t;
- }
- #define SPEED_ROUTINE_MPN_ZERO(function)
- SPEED_ROUTINE_MPN_ZERO_CALL (function (wp, s->size))