lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame] | 1 | /* strcmp.S |
| 2 | * Copyright (C) 2003-2007 Analog Devices Inc., All Rights Reserved. |
| 3 | * |
| 4 | * This file is subject to the terms and conditions of the GNU Library General |
| 5 | * Public License. See the file "COPYING.LIB" in the main directory of this |
| 6 | * archive for more details. |
| 7 | * |
| 8 | * Non-LGPL License also available as part of VisualDSP++ |
| 9 | * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html |
| 10 | */ |
| 11 | |
| 12 | #include <sysdep.h> |
| 13 | |
| 14 | /* Fast strcmp() for Blackfin. |
| 15 | * When both strings are aligned, this processes four characters at |
| 16 | * a time. Uses a hw loop with "very big" count to loop "forever", |
| 17 | * until difference or a terminating zero is found. |
| 18 | * Once the end-case word has been identified, breaks out of the |
| 19 | * loop to check more carefully (same as the unaligned case). |
| 20 | */ |
| 21 | |
| 22 | .text |
| 23 | |
| 24 | .align 2 |
| 25 | |
| 26 | .weak _strcmp |
| 27 | ENTRY(_strcmp) |
| 28 | [--sp] = (R7:4); |
| 29 | p1 = r0; |
| 30 | p2 = r1; |
| 31 | |
| 32 | p0 = -1; /* (need for loop counter init) */ |
| 33 | |
| 34 | /* check if byte aligned */ |
| 35 | r0 = r0 | r1; /* check both pointers at same time */ |
| 36 | r0 <<= 30; /* dump all but last 2 bits */ |
| 37 | cc = az; /* are they zero? */ |
| 38 | if !cc jump .Lunaligned; /* no; use unaligned code. */ |
| 39 | /* fall-thru for aligned case.. */ |
| 40 | |
| 41 | /* note that r0 is zero from the previous... */ |
| 42 | /* p0 set to -1 */ |
| 43 | |
| 44 | LSETUP (.Lbeginloop, .Lendloop) lc0=p0; |
| 45 | /* pick up first words */ |
| 46 | r1 = [p1++]; |
| 47 | r2 = [p2++]; |
| 48 | /* make up mask: 0FF0FF */ |
| 49 | r7 = 0xFF; |
| 50 | r7.h = 0xFF; |
| 51 | /* loop : 9 cycles to check 4 characters */ |
| 52 | cc = r1 == r2; |
| 53 | .Lbeginloop: |
| 54 | if !cc jump .Lnotequal4; /* compare failure, exit loop */ |
| 55 | |
| 56 | /* starting with 44332211 */ |
| 57 | /* see if char 3 or char 1 is 0 */ |
| 58 | r3 = r1 & r7; /* form 00330011 */ |
| 59 | /* add to zero, and (r2 is free, reload) */ |
| 60 | r6 = r3 +|+ r0 || r2 = [p2++] || nop; |
| 61 | cc = az; /* true if either is zero */ |
| 62 | r3 = r1 ^ r3; /* form 44002200 (4321^0301 => 4020) */ |
| 63 | /* (trick, saves having another mask) */ |
| 64 | /* add to zero, and (r1 is free, reload) */ |
| 65 | r6 = r3 +|+ r0 || r1 = [p1++] || nop; |
| 66 | cc |= az; /* true if either is zero */ |
| 67 | if cc jump .Lzero4; /* leave if a zero somewhere */ |
| 68 | .Lendloop: |
| 69 | cc = r1 == r2; |
| 70 | |
| 71 | /* loop exits */ |
| 72 | .Lnotequal4: /* compare failure on 4-char compare */ |
| 73 | /* address pointers are one word ahead; */ |
| 74 | /* faster to use zero4 exit code */ |
| 75 | p1 += 4; |
| 76 | p2 += 4; |
| 77 | |
| 78 | .Lzero4: /* one of the bytes in word 1 is zero */ |
| 79 | /* but we've already fetched the next word; so */ |
| 80 | /* backup two to look at failing word again */ |
| 81 | p1 += -8; |
| 82 | p2 += -8; |
| 83 | |
| 84 | |
| 85 | |
| 86 | /* here when pointers are unaligned: checks one */ |
| 87 | /* character at a time. Also use at the end of */ |
| 88 | /* the word-check algorithm to figure out what happened */ |
| 89 | .Lunaligned: |
| 90 | /* R0 is non-zero from before. */ |
| 91 | /* p0 set to -1 */ |
| 92 | |
| 93 | r0 = 0 (Z); |
| 94 | r1 = B[p1++] (Z); |
| 95 | r2 = B[p2++] (Z); |
| 96 | LSETUP (.Lbeginloop1, .Lendloop1) lc0=p0; |
| 97 | |
| 98 | .Lbeginloop1: |
| 99 | cc = r1; /* first char must be non-zero */ |
| 100 | /* chars must be the same */ |
| 101 | r3 = r2 - r1 (NS) || r1 = B[p1++] (Z) || nop; |
| 102 | cc &= az; |
| 103 | r3 = r0 - r2; /* second char must be non-zero */ |
| 104 | cc &= an; |
| 105 | if !cc jump .Lexitloop1; |
| 106 | .Lendloop1: |
| 107 | r2 = B[p2++] (Z); |
| 108 | |
| 109 | .Lexitloop1: /* here means we found a zero or a difference. */ |
| 110 | /* we have r2(N), p2(N), r1(N+1), p1(N+2) */ |
| 111 | r1=B[p1+ -2] (Z); |
| 112 | r0 = r1 - r2; |
| 113 | (r7:4) = [sp++]; |
| 114 | rts; |
| 115 | .size _strcmp,.-_strcmp |
| 116 | |
| 117 | libc_hidden_def (strcmp) |
| 118 | |
| 119 | #ifndef __UCLIBC_HAS_LOCALE__ |
| 120 | weak_alias (strcmp,strcoll) |
| 121 | libc_hidden_def (strcoll) |
| 122 | #endif |