aors_n.asm
上传用户:qaz666999
上传日期:2022-08-06
资源大小:2570k
文件大小:4k
- dnl Intel Pentium mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
- dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2002 Free Software
- dnl Foundation, Inc.
- dnl
- dnl This file is part of the GNU MP Library.
- dnl
- dnl The GNU MP Library is free software; you can redistribute it and/or
- dnl modify it under the terms of the GNU Lesser General Public License as
- dnl published by the Free Software Foundation; either version 3 of the
- dnl License, or (at your option) any later version.
- dnl
- dnl The GNU MP Library is distributed in the hope that it will be useful,
- dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
- dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- dnl Lesser General Public License for more details.
- dnl
- dnl You should have received a copy of the GNU Lesser General Public License
- dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
- include(`../config.m4')
- C P5: 2.375 cycles/limb
- ifdef(`OPERATION_add_n',`
- define(M4_inst, adcl)
- define(M4_function_n, mpn_add_n)
- define(M4_function_nc, mpn_add_nc)
- ',`ifdef(`OPERATION_sub_n',`
- define(M4_inst, sbbl)
- define(M4_function_n, mpn_sub_n)
- define(M4_function_nc, mpn_sub_nc)
- ',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
- ')')')
- MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
- C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
- C mp_size_t size);
- C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
- C mp_size_t size, mp_limb_t carry);
- defframe(PARAM_CARRY,20)
- defframe(PARAM_SIZE, 16)
- defframe(PARAM_SRC2, 12)
- defframe(PARAM_SRC1, 8)
- defframe(PARAM_DST, 4)
- TEXT
- ALIGN(8)
- PROLOGUE(M4_function_nc)
- pushl %edi
- pushl %esi
- pushl %ebx
- pushl %ebp
- deflit(`FRAME',16)
- movl PARAM_DST,%edi
- movl PARAM_SRC1,%esi
- movl PARAM_SRC2,%ebp
- movl PARAM_SIZE,%ecx
- movl (%ebp),%ebx
- decl %ecx
- movl %ecx,%edx
- shrl $3,%ecx
- andl $7,%edx
- testl %ecx,%ecx C zero carry flag
- jz L(endgo)
- pushl %edx
- FRAME_pushl()
- movl PARAM_CARRY,%eax
- shrl $1,%eax C shift bit 0 into carry
- jmp L(oop)
- L(endgo):
- deflit(`FRAME',16)
- movl PARAM_CARRY,%eax
- shrl $1,%eax C shift bit 0 into carry
- jmp L(end)
- EPILOGUE()
- ALIGN(8)
- PROLOGUE(M4_function_n)
- pushl %edi
- pushl %esi
- pushl %ebx
- pushl %ebp
- deflit(`FRAME',16)
- movl PARAM_DST,%edi
- movl PARAM_SRC1,%esi
- movl PARAM_SRC2,%ebp
- movl PARAM_SIZE,%ecx
- movl (%ebp),%ebx
- decl %ecx
- movl %ecx,%edx
- shrl $3,%ecx
- andl $7,%edx
- testl %ecx,%ecx C zero carry flag
- jz L(end)
- pushl %edx
- FRAME_pushl()
- ALIGN(8)
- L(oop): movl 28(%edi),%eax C fetch destination cache line
- leal 32(%edi),%edi
- L(1): movl (%esi),%eax
- movl 4(%esi),%edx
- M4_inst %ebx,%eax
- movl 4(%ebp),%ebx
- M4_inst %ebx,%edx
- movl 8(%ebp),%ebx
- movl %eax,-32(%edi)
- movl %edx,-28(%edi)
- L(2): movl 8(%esi),%eax
- movl 12(%esi),%edx
- M4_inst %ebx,%eax
- movl 12(%ebp),%ebx
- M4_inst %ebx,%edx
- movl 16(%ebp),%ebx
- movl %eax,-24(%edi)
- movl %edx,-20(%edi)
- L(3): movl 16(%esi),%eax
- movl 20(%esi),%edx
- M4_inst %ebx,%eax
- movl 20(%ebp),%ebx
- M4_inst %ebx,%edx
- movl 24(%ebp),%ebx
- movl %eax,-16(%edi)
- movl %edx,-12(%edi)
- L(4): movl 24(%esi),%eax
- movl 28(%esi),%edx
- M4_inst %ebx,%eax
- movl 28(%ebp),%ebx
- M4_inst %ebx,%edx
- movl 32(%ebp),%ebx
- movl %eax,-8(%edi)
- movl %edx,-4(%edi)
- leal 32(%esi),%esi
- leal 32(%ebp),%ebp
- decl %ecx
- jnz L(oop)
- popl %edx
- FRAME_popl()
- L(end):
- decl %edx C test %edx w/o clobbering carry
- js L(end2)
- incl %edx
- L(oop2):
- leal 4(%edi),%edi
- movl (%esi),%eax
- M4_inst %ebx,%eax
- movl 4(%ebp),%ebx
- movl %eax,-4(%edi)
- leal 4(%esi),%esi
- leal 4(%ebp),%ebp
- decl %edx
- jnz L(oop2)
- L(end2):
- movl (%esi),%eax
- M4_inst %ebx,%eax
- movl %eax,(%edi)
- sbbl %eax,%eax
- negl %eax
- popl %ebp
- popl %ebx
- popl %esi
- popl %edi
- ret
- EPILOGUE()