string.S
上传用户:jlfgdled
上传日期:2013-04-10
资源大小:33168k
文件大小:12k
- /*
- * BK Id: %F% %I% %G% %U% %#%
- */
- /*
- * String handling functions for PowerPC.
- *
- * Copyright (C) 1996 Paul Mackerras.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
- #include <linux/config.h>
- #include <asm/processor.h>
- #include <asm/cache.h>
- #include <asm/errno.h>
- #include <asm/ppc_asm.h>
- #define COPY_16_BYTES
- lwz r7,4(r4);
- lwz r8,8(r4);
- lwz r9,12(r4);
- lwzu r10,16(r4);
- stw r7,4(r6);
- stw r8,8(r6);
- stw r9,12(r6);
- stwu r10,16(r6)
- #define COPY_16_BYTES_WITHEX(n)
- 8 ## n ## 0:
- lwz r7,4(r4);
- 8 ## n ## 1:
- lwz r8,8(r4);
- 8 ## n ## 2:
- lwz r9,12(r4);
- 8 ## n ## 3:
- lwzu r10,16(r4);
- 8 ## n ## 4:
- stw r7,4(r6);
- 8 ## n ## 5:
- stw r8,8(r6);
- 8 ## n ## 6:
- stw r9,12(r6);
- 8 ## n ## 7:
- stwu r10,16(r6)
- #define COPY_16_BYTES_EXCODE(n)
- 9 ## n ## 0:
- addi r5,r5,-(16 * n);
- b 104f;
- 9 ## n ## 1:
- addi r5,r5,-(16 * n);
- b 105f;
- .section __ex_table,"a";
- .align 2;
- .long 8 ## n ## 0b,9 ## n ## 0b;
- .long 8 ## n ## 1b,9 ## n ## 0b;
- .long 8 ## n ## 2b,9 ## n ## 0b;
- .long 8 ## n ## 3b,9 ## n ## 0b;
- .long 8 ## n ## 4b,9 ## n ## 1b;
- .long 8 ## n ## 5b,9 ## n ## 1b;
- .long 8 ## n ## 6b,9 ## n ## 1b;
- .long 8 ## n ## 7b,9 ## n ## 1b;
- .text
- .text
- CACHELINE_BYTES = L1_CACHE_LINE_SIZE
- LG_CACHELINE_BYTES = LG_L1_CACHE_LINE_SIZE
- CACHELINE_MASK = (L1_CACHE_LINE_SIZE-1)
- .globl strcpy
- strcpy:
- addi r5,r3,-1
- addi r4,r4,-1
- 1: lbzu r0,1(r4)
- cmpwi 0,r0,0
- stbu r0,1(r5)
- bne 1b
- blr
- .globl strncpy
- strncpy:
- cmpwi 0,r5,0
- beqlr
- mtctr r5
- addi r6,r3,-1
- addi r4,r4,-1
- 1: lbzu r0,1(r4)
- cmpwi 0,r0,0
- stbu r0,1(r6)
- bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */
- blr
- .globl strcat
- strcat:
- addi r5,r3,-1
- addi r4,r4,-1
- 1: lbzu r0,1(r5)
- cmpwi 0,r0,0
- bne 1b
- addi r5,r5,-1
- 1: lbzu r0,1(r4)
- cmpwi 0,r0,0
- stbu r0,1(r5)
- bne 1b
- blr
- .globl strcmp
- strcmp:
- addi r5,r3,-1
- addi r4,r4,-1
- 1: lbzu r3,1(r5)
- cmpwi 1,r3,0
- lbzu r0,1(r4)
- subf. r3,r0,r3
- beqlr 1
- beq 1b
- blr
- .globl strlen
- strlen:
- addi r4,r3,-1
- 1: lbzu r0,1(r4)
- cmpwi 0,r0,0
- bne 1b
- subf r3,r3,r4
- blr
- /*
- * Use dcbz on the complete cache lines in the destination
- * to set them to zero. This requires that the destination
- * area is cacheable. -- paulus
- */
- .globl cacheable_memzero
- cacheable_memzero:
- mr r5,r4
- li r4,0
- addi r6,r3,-4
- cmplwi 0,r5,4
- blt 7f
- stwu r4,4(r6)
- beqlr
- andi. r0,r6,3
- add r5,r0,r5
- subf r6,r0,r6
- clrlwi r7,r6,32-LG_CACHELINE_BYTES
- add r8,r7,r5
- srwi r9,r8,LG_CACHELINE_BYTES
- addic. r9,r9,-1 /* total number of complete cachelines */
- ble 2f
- xori r0,r7,CACHELINE_MASK & ~3
- srwi. r0,r0,2
- beq 3f
- mtctr r0
- 4: stwu r4,4(r6)
- bdnz 4b
- 3: mtctr r9
- li r7,4
- #if !defined(CONFIG_8xx)
- 10: dcbz r7,r6
- #else
- 10: stw r4, 4(r6)
- stw r4, 8(r6)
- stw r4, 12(r6)
- stw r4, 16(r6)
- #endif
- addi r6,r6,CACHELINE_BYTES
- bdnz 10b
- clrlwi r5,r8,32-LG_CACHELINE_BYTES
- addi r5,r5,4
- 2: srwi r0,r5,2
- mtctr r0
- bdz 6f
- 1: stwu r4,4(r6)
- bdnz 1b
- 6: andi. r5,r5,3
- 7: cmpwi 0,r5,0
- beqlr
- mtctr r5
- addi r6,r6,3
- 8: stbu r4,1(r6)
- bdnz 8b
- blr
- .globl memset
- memset:
- rlwimi r4,r4,8,16,23
- rlwimi r4,r4,16,0,15
- addi r6,r3,-4
- cmplwi 0,r5,4
- blt 7f
- stwu r4,4(r6)
- beqlr
- andi. r0,r6,3
- add r5,r0,r5
- subf r6,r0,r6
- srwi r0,r5,2
- mtctr r0
- bdz 6f
- 1: stwu r4,4(r6)
- bdnz 1b
- 6: andi. r5,r5,3
- 7: cmpwi 0,r5,0
- beqlr
- mtctr r5
- addi r6,r6,3
- 8: stbu r4,1(r6)
- bdnz 8b
- blr
- .globl bcopy
- bcopy:
- mr r6,r3
- mr r3,r4
- mr r4,r6
- b memcpy
- /*
- * This version uses dcbz on the complete cache lines in the
- * destination area to reduce memory traffic. This requires that
- * the destination area is cacheable.
- * We only use this version if the source and dest don't overlap.
- * -- paulus.
- */
- .global cacheable_memcpy
- cacheable_memcpy:
- add r7,r3,r5 /* test if the src & dst overlap */
- add r8,r4,r5
- cmplw 0,r4,r7
- cmplw 1,r3,r8
- crand 0,0,4 /* cr0.lt &= cr1.lt */
- blt memcpy /* if regions overlap */
- addi r4,r4,-4
- addi r6,r3,-4
- neg r0,r3
- andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
- beq 58f
- cmplw 0,r5,r0 /* is this more than total to do? */
- blt 63f /* if not much to do */
- andi. r8,r0,3 /* get it word-aligned first */
- subf r5,r0,r5
- mtctr r8
- beq+ 61f
- 70: lbz r9,4(r4) /* do some bytes */
- stb r9,4(r6)
- addi r4,r4,1
- addi r6,r6,1
- bdnz 70b
- 61: srwi. r0,r0,2
- mtctr r0
- beq 58f
- 72: lwzu r9,4(r4) /* do some words */
- stwu r9,4(r6)
- bdnz 72b
- 58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
- clrlwi r5,r5,32-LG_CACHELINE_BYTES
- li r11,4
- mtctr r0
- beq 63f
- 53:
- #if !defined(CONFIG_8xx)
- dcbz r11,r6
- #endif
- COPY_16_BYTES
- #if L1_CACHE_LINE_SIZE >= 32
- COPY_16_BYTES
- #if L1_CACHE_LINE_SIZE >= 64
- COPY_16_BYTES
- COPY_16_BYTES
- #if L1_CACHE_LINE_SIZE >= 128
- COPY_16_BYTES
- COPY_16_BYTES
- COPY_16_BYTES
- COPY_16_BYTES
- #endif
- #endif
- #endif
- bdnz 53b
- 63: srwi. r0,r5,2
- mtctr r0
- beq 64f
- 30: lwzu r0,4(r4)
- stwu r0,4(r6)
- bdnz 30b
- 64: andi. r0,r5,3
- mtctr r0
- beq+ 65f
- 40: lbz r0,4(r4)
- stb r0,4(r6)
- addi r4,r4,1
- addi r6,r6,1
- bdnz 40b
- 65: blr
- .globl memmove
- memmove:
- cmplw 0,r3,r4
- bgt backwards_memcpy
- /* fall through */
- .globl memcpy
- memcpy:
- srwi. r7,r5,3
- addi r6,r3,-4
- addi r4,r4,-4
- beq 2f /* if less than 8 bytes to do */
- andi. r0,r6,3 /* get dest word aligned */
- mtctr r7
- bne 5f
- 1: lwz r7,4(r4)
- lwzu r8,8(r4)
- stw r7,4(r6)
- stwu r8,8(r6)
- bdnz 1b
- andi. r5,r5,7
- 2: cmplwi 0,r5,4
- blt 3f
- lwzu r0,4(r4)
- addi r5,r5,-4
- stwu r0,4(r6)
- 3: cmpwi 0,r5,0
- beqlr
- mtctr r5
- addi r4,r4,3
- addi r6,r6,3
- 4: lbzu r0,1(r4)
- stbu r0,1(r6)
- bdnz 4b
- blr
- 5: subfic r0,r0,4
- mtctr r0
- 6: lbz r7,4(r4)
- addi r4,r4,1
- stb r7,4(r6)
- addi r6,r6,1
- bdnz 6b
- subf r5,r0,r5
- rlwinm. r7,r5,32-3,3,31
- beq 2b
- mtctr r7
- b 1b
- .globl backwards_memcpy
- backwards_memcpy:
- rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */
- add r6,r3,r5
- add r4,r4,r5
- beq 2f
- andi. r0,r6,3
- mtctr r7
- bne 5f
- 1: lwz r7,-4(r4)
- lwzu r8,-8(r4)
- stw r7,-4(r6)
- stwu r8,-8(r6)
- bdnz 1b
- andi. r5,r5,7
- 2: cmplwi 0,r5,4
- blt 3f
- lwzu r0,-4(r4)
- subi r5,r5,4
- stwu r0,-4(r6)
- 3: cmpwi 0,r5,0
- beqlr
- mtctr r5
- 4: lbzu r0,-1(r4)
- stbu r0,-1(r6)
- bdnz 4b
- blr
- 5: mtctr r0
- 6: lbzu r7,-1(r4)
- stbu r7,-1(r6)
- bdnz 6b
- subf r5,r0,r5
- rlwinm. r7,r5,32-3,3,31
- beq 2b
- mtctr r7
- b 1b
-
- .globl memcmp
- memcmp:
- cmpwi 0,r5,0
- ble- 2f
- mtctr r5
- addi r6,r3,-1
- addi r4,r4,-1
- 1: lbzu r3,1(r6)
- lbzu r0,1(r4)
- subf. r3,r0,r3
- bdnzt 2,1b
- blr
- 2: li r3,0
- blr
- .global memchr
- memchr:
- cmpwi 0,r5,0
- ble- 2f
- mtctr r5
- addi r3,r3,-1
- 1: lbzu r0,1(r3)
- cmpw 0,r0,r4
- bdnzf 2,1b
- beqlr
- 2: li r3,0
- blr
- .globl __copy_tofrom_user
- __copy_tofrom_user:
- addi r4,r4,-4
- addi r6,r3,-4
- neg r0,r3
- andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
- beq 58f
- cmplw 0,r5,r0 /* is this more than total to do? */
- blt 63f /* if not much to do */
- andi. r8,r0,3 /* get it word-aligned first */
- mtctr r8
- beq+ 61f
- 70: lbz r9,4(r4) /* do some bytes */
- 71: stb r9,4(r6)
- addi r4,r4,1
- addi r6,r6,1
- bdnz 70b
- 61: subf r5,r0,r5
- srwi. r0,r0,2
- mtctr r0
- beq 58f
- 72: lwzu r9,4(r4) /* do some words */
- 73: stwu r9,4(r6)
- bdnz 72b
- 58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
- clrlwi r5,r5,32-LG_CACHELINE_BYTES
- li r11,4
- beq 63f
- #if !defined(CONFIG_8xx)
- /* Here we decide how far ahead to prefetch the source */
- #if MAX_L1_COPY_PREFETCH > 1
- /* Heuristically, for large transfers we prefetch
- MAX_L1_COPY_PREFETCH cachelines ahead. For small transfers
- we prefetch 1 cacheline ahead. */
- cmpwi r0,MAX_L1_COPY_PREFETCH
- li r7,1
- li r3,4
- ble 111f
- li r7,MAX_L1_COPY_PREFETCH
- 111: mtctr r7
- 112: dcbt r3,r4
- addi r3,r3,CACHELINE_BYTES
- bdnz 112b
- #else /* MAX_L1_COPY_PREFETCH == 1 */
- li r3,CACHELINE_BYTES + 4
- dcbt r11,r4
- #endif /* MAX_L1_COPY_PREFETCH */
- #endif /* CONFIG_8xx */
- mtctr r0
- 53:
- #if !defined(CONFIG_8xx)
- dcbt r3,r4
- 54: dcbz r11,r6
- #endif
- /* had to move these to keep extable in order */
- .section __ex_table,"a"
- .align 2
- .long 70b,100f
- .long 71b,101f
- .long 72b,102f
- .long 73b,103f
- #if !defined(CONFIG_8xx)
- .long 54b,105f
- #endif
- .text
- /* the main body of the cacheline loop */
- COPY_16_BYTES_WITHEX(0)
- #if L1_CACHE_LINE_SIZE >= 32
- COPY_16_BYTES_WITHEX(1)
- #if L1_CACHE_LINE_SIZE >= 64
- COPY_16_BYTES_WITHEX(2)
- COPY_16_BYTES_WITHEX(3)
- #if L1_CACHE_LINE_SIZE >= 128
- COPY_16_BYTES_WITHEX(4)
- COPY_16_BYTES_WITHEX(5)
- COPY_16_BYTES_WITHEX(6)
- COPY_16_BYTES_WITHEX(7)
- #endif
- #endif
- #endif
- bdnz 53b
- 63: srwi. r0,r5,2
- mtctr r0
- beq 64f
- 30: lwzu r0,4(r4)
- 31: stwu r0,4(r6)
- bdnz 30b
- 64: andi. r0,r5,3
- mtctr r0
- beq+ 65f
- 40: lbz r0,4(r4)
- 41: stb r0,4(r6)
- addi r4,r4,1
- addi r6,r6,1
- bdnz 40b
- 65: li r3,0
- blr
- /* read fault, initial single-byte copy */
- 100: li r4,0
- b 90f
- /* write fault, initial single-byte copy */
- 101: li r4,1
- 90: subf r5,r8,r5
- li r3,0
- b 99f
- /* read fault, initial word copy */
- 102: li r4,0
- b 91f
- /* write fault, initial word copy */
- 103: li r4,1
- 91: li r3,2
- b 99f
- /*
- * this stuff handles faults in the cacheline loop and branches to either
- * 104f (if in read part) or 105f (if in write part), after updating r5
- */
- COPY_16_BYTES_EXCODE(0)
- #if L1_CACHE_LINE_SIZE >= 32
- COPY_16_BYTES_EXCODE(1)
- #if L1_CACHE_LINE_SIZE >= 64
- COPY_16_BYTES_EXCODE(2)
- COPY_16_BYTES_EXCODE(3)
- #if L1_CACHE_LINE_SIZE >= 128
- COPY_16_BYTES_EXCODE(4)
- COPY_16_BYTES_EXCODE(5)
- COPY_16_BYTES_EXCODE(6)
- COPY_16_BYTES_EXCODE(7)
- #endif
- #endif
- #endif
- /* read fault in cacheline loop */
- 104: li r4,0
- b 92f
- /* fault on dcbz (effectively a write fault) */
- /* or write fault in cacheline loop */
- 105: li r4,1
- 92: li r3,LG_CACHELINE_BYTES
- b 99f
- /* read fault in final word loop */
- 108: li r4,0
- b 93f
- /* write fault in final word loop */
- 109: li r4,1
- 93: andi. r5,r5,3
- li r3,2
- b 99f
- /* read fault in final byte loop */
- 110: li r4,0
- b 94f
- /* write fault in final byte loop */
- 111: li r4,1
- 94: li r5,0
- li r3,0
- /*
- * At this stage the number of bytes not copied is
- * r5 + (ctr << r3), and r4 is 0 for read or 1 for write.
- */
- 99: mfctr r0
- slw r3,r0,r3
- add r3,r3,r5
- cmpwi 0,r4,0
- bne 120f
- /* for read fault, clear out the destination: r3 bytes starting at 4(r6) */
- srwi. r0,r3,2
- li r9,0
- mtctr r0
- beq 113f
- 112: stwu r9,4(r6)
- bdnz 112b
- 113: andi. r0,r3,3
- mtctr r0
- beq 120f
- 114: stb r9,4(r6)
- addi r6,r6,1
- bdnz 114b
- 120: blr
- .section __ex_table,"a"
- .align 2
- .long 30b,108b
- .long 31b,109b
- .long 40b,110b
- .long 41b,111b
- .long 112b,120b
- .long 114b,120b
- .text
- .globl __clear_user
- __clear_user:
- addi r6,r3,-4
- li r3,0
- li r5,0
- cmplwi 0,r4,4
- blt 7f
- /* clear a single word */
- 11: stwu r5,4(r6)
- beqlr
- /* clear word sized chunks */
- andi. r0,r6,3
- add r4,r0,r4
- subf r6,r0,r6
- srwi r0,r4,2
- andi. r4,r4,3
- mtctr r0
- bdz 7f
- 1: stwu r5,4(r6)
- bdnz 1b
- /* clear byte sized chunks */
- 7: cmpwi 0,r4,0
- beqlr
- mtctr r4
- addi r6,r6,3
- 8: stbu r5,1(r6)
- bdnz 8b
- blr
- 90: mr r3,r4
- blr
- 91: mfctr r3
- slwi r3,r3,2
- add r3,r3,r4
- blr
- 92: mfctr r3
- blr
- .section __ex_table,"a"
- .align 2
- .long 11b,90b
- .long 1b,91b
- .long 8b,92b
- .text
- .globl __strncpy_from_user
- __strncpy_from_user:
- addi r6,r3,-1
- addi r4,r4,-1
- cmpwi 0,r5,0
- beq 2f
- mtctr r5
- 1: lbzu r0,1(r4)
- cmpwi 0,r0,0
- stbu r0,1(r6)
- bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */
- beq 3f
- 2: addi r6,r6,1
- 3: subf r3,r3,r6
- blr
- 99: li r3,-EFAULT
- blr
- .section __ex_table,"a"
- .align 2
- .long 1b,99b
- .text
- /* r3 = str, r4 = len (> 0), r5 = top (highest addr) */
- .globl __strnlen_user
- __strnlen_user:
- addi r7,r3,-1
- subf r6,r7,r5 /* top+1 - str */
- cmplw 0,r4,r6
- bge 0f
- mr r6,r4
- 0: mtctr r6 /* ctr = min(len, top - str) */
- 1: lbzu r0,1(r7) /* get next byte */
- cmpwi 0,r0,0
- bdnzf 2,1b /* loop if --ctr != 0 && byte != 0 */
- addi r7,r7,1
- subf r3,r3,r7 /* number of bytes we have looked at */
- beqlr /* return if we found a 0 byte */
- cmpw 0,r3,r4 /* did we look at all len bytes? */
- blt 99f /* if not, must have hit top */
- addi r3,r4,1 /* return len + 1 to indicate no null found */
- blr
- 99: li r3,0 /* bad address, return 0 */
- blr
- .section __ex_table,"a"
- .align 2
- .long 1b,99b