blob: ef23aa9abb9e7cd75e1ac628501e9811f99230e3 [file] [log] [blame]
lh9ed821d2023-04-07 01:36:19 -07001/* strcmp.S
2 * Copyright (C) 2003-2007 Analog Devices Inc., All Rights Reserved.
3 *
4 * This file is subject to the terms and conditions of the GNU Library General
5 * Public License. See the file "COPYING.LIB" in the main directory of this
6 * archive for more details.
7 *
8 * Non-LGPL License also available as part of VisualDSP++
9 * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html
10 */
11
12#include <sysdep.h>
13
14/* Fast strcmp() for Blackfin.
15 * When both strings are aligned, this processes four characters at
16 * a time. Uses a hw loop with "very big" count to loop "forever",
17 * until difference or a terminating zero is found.
18 * Once the end-case word has been identified, breaks out of the
19 * loop to check more carefully (same as the unaligned case).
20 */
21
22.text
23
24.align 2
25
26.weak _strcmp
27ENTRY(_strcmp)
28 [--sp] = (R7:4);
29 p1 = r0;
30 p2 = r1;
31
32 p0 = -1; /* (need for loop counter init) */
33
34 /* check if byte aligned */
35 r0 = r0 | r1; /* check both pointers at same time */
36 r0 <<= 30; /* dump all but last 2 bits */
37 cc = az; /* are they zero? */
38 if !cc jump .Lunaligned; /* no; use unaligned code. */
39 /* fall-thru for aligned case.. */
40
41 /* note that r0 is zero from the previous... */
42 /* p0 set to -1 */
43
44 LSETUP (.Lbeginloop, .Lendloop) lc0=p0;
45 /* pick up first words */
46 r1 = [p1++];
47 r2 = [p2++];
48 /* make up mask: 0FF0FF */
49 r7 = 0xFF;
50 r7.h = 0xFF;
51 /* loop : 9 cycles to check 4 characters */
52 cc = r1 == r2;
53.Lbeginloop:
54 if !cc jump .Lnotequal4; /* compare failure, exit loop */
55
56 /* starting with 44332211 */
57 /* see if char 3 or char 1 is 0 */
58 r3 = r1 & r7; /* form 00330011 */
59 /* add to zero, and (r2 is free, reload) */
60 r6 = r3 +|+ r0 || r2 = [p2++] || nop;
61 cc = az; /* true if either is zero */
62 r3 = r1 ^ r3; /* form 44002200 (4321^0301 => 4020) */
63 /* (trick, saves having another mask) */
64 /* add to zero, and (r1 is free, reload) */
65 r6 = r3 +|+ r0 || r1 = [p1++] || nop;
66 cc |= az; /* true if either is zero */
67 if cc jump .Lzero4; /* leave if a zero somewhere */
68.Lendloop:
69 cc = r1 == r2;
70
71 /* loop exits */
72.Lnotequal4: /* compare failure on 4-char compare */
73 /* address pointers are one word ahead; */
74 /* faster to use zero4 exit code */
75 p1 += 4;
76 p2 += 4;
77
78.Lzero4: /* one of the bytes in word 1 is zero */
79 /* but we've already fetched the next word; so */
80 /* backup two to look at failing word again */
81 p1 += -8;
82 p2 += -8;
83
84
85
86 /* here when pointers are unaligned: checks one */
87 /* character at a time. Also use at the end of */
88 /* the word-check algorithm to figure out what happened */
89.Lunaligned:
90 /* R0 is non-zero from before. */
91 /* p0 set to -1 */
92
93 r0 = 0 (Z);
94 r1 = B[p1++] (Z);
95 r2 = B[p2++] (Z);
96 LSETUP (.Lbeginloop1, .Lendloop1) lc0=p0;
97
98.Lbeginloop1:
99 cc = r1; /* first char must be non-zero */
100 /* chars must be the same */
101 r3 = r2 - r1 (NS) || r1 = B[p1++] (Z) || nop;
102 cc &= az;
103 r3 = r0 - r2; /* second char must be non-zero */
104 cc &= an;
105 if !cc jump .Lexitloop1;
106.Lendloop1:
107 r2 = B[p2++] (Z);
108
109.Lexitloop1: /* here means we found a zero or a difference. */
110 /* we have r2(N), p2(N), r1(N+1), p1(N+2) */
111 r1=B[p1+ -2] (Z);
112 r0 = r1 - r2;
113 (r7:4) = [sp++];
114 rts;
115.size _strcmp,.-_strcmp
116
117libc_hidden_def (strcmp)
118
119#ifndef __UCLIBC_HAS_LOCALE__
120weak_alias (strcmp,strcoll)
121libc_hidden_def (strcoll)
122#endif