| rjw | 1f88458 | 2022-01-06 17:20:42 +0800 | [diff] [blame] | 1 | /* | 
 | 2 |  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) | 
 | 3 |  * | 
 | 4 |  * This program is free software; you can redistribute it and/or modify | 
 | 5 |  * it under the terms of the GNU General Public License version 2 as | 
 | 6 |  * published by the Free Software Foundation. | 
 | 7 |  */ | 
 | 8 |  | 
 | 9 | /* This is optimized primarily for the ARC700. | 
 | 10 |    It would be possible to speed up the loops by one cycle / word | 
 | 11 |    respective one cycle / byte by forcing double source 1 alignment, unrolling | 
 | 12 |    by a factor of two, and speculatively loading the second word / byte of | 
 | 13 |    source 1; however, that would increase the overhead for loop setup / finish, | 
 | 14 |    and strcmp might often terminate early.  */ | 
 | 15 |  | 
 | 16 | #include <linux/linkage.h> | 
 | 17 |  | 
 | 18 | ENTRY_CFI(strcmp) | 
 | 19 | 	or	r2,r0,r1 | 
 | 20 | 	bmsk_s	r2,r2,1 | 
 | 21 | 	brne	r2,0,.Lcharloop | 
 | 22 | 	mov_s	r12,0x01010101 | 
 | 23 | 	ror	r5,r12 | 
 | 24 | .Lwordloop: | 
 | 25 | 	ld.ab	r2,[r0,4] | 
 | 26 | 	ld.ab	r3,[r1,4] | 
 | 27 | 	nop_s | 
 | 28 | 	sub	r4,r2,r12 | 
 | 29 | 	bic	r4,r4,r2 | 
 | 30 | 	and	r4,r4,r5 | 
 | 31 | 	brne	r4,0,.Lfound0 | 
 | 32 | 	breq	r2,r3,.Lwordloop | 
 | 33 | #ifdef	__LITTLE_ENDIAN__ | 
 | 34 | 	xor	r0,r2,r3	; mask for difference | 
 | 35 | 	sub_s	r1,r0,1 | 
 | 36 | 	bic_s	r0,r0,r1	; mask for least significant difference bit | 
 | 37 | 	sub	r1,r5,r0 | 
 | 38 | 	xor	r0,r5,r1	; mask for least significant difference byte | 
 | 39 | 	and_s	r2,r2,r0 | 
 | 40 | 	and_s	r3,r3,r0 | 
 | 41 | #endif /* LITTLE ENDIAN */ | 
 | 42 | 	cmp_s	r2,r3 | 
 | 43 | 	mov_s	r0,1 | 
 | 44 | 	j_s.d	[blink] | 
 | 45 | 	bset.lo	r0,r0,31 | 
 | 46 |  | 
 | 47 | 	.balign	4 | 
 | 48 | #ifdef __LITTLE_ENDIAN__ | 
 | 49 | .Lfound0: | 
 | 50 | 	xor	r0,r2,r3	; mask for difference | 
 | 51 | 	or	r0,r0,r4	; or in zero indicator | 
 | 52 | 	sub_s	r1,r0,1 | 
 | 53 | 	bic_s	r0,r0,r1	; mask for least significant difference bit | 
 | 54 | 	sub	r1,r5,r0 | 
 | 55 | 	xor	r0,r5,r1	; mask for least significant difference byte | 
 | 56 | 	and_s	r2,r2,r0 | 
 | 57 | 	and_s	r3,r3,r0 | 
 | 58 | 	sub.f	r0,r2,r3 | 
 | 59 | 	mov.hi	r0,1 | 
 | 60 | 	j_s.d	[blink] | 
 | 61 | 	bset.lo	r0,r0,31 | 
 | 62 | #else /* BIG ENDIAN */ | 
 | 63 | 	/* The zero-detection above can mis-detect 0x01 bytes as zeroes | 
 | 64 | 	   because of carry-propagateion from a lower significant zero byte. | 
 | 65 | 	   We can compensate for this by checking that bit0 is zero. | 
 | 66 | 	   This compensation is not necessary in the step where we | 
 | 67 | 	   get a low estimate for r2, because in any affected bytes | 
 | 68 | 	   we already have 0x00 or 0x01, which will remain unchanged | 
 | 69 | 	   when bit 7 is cleared.  */ | 
 | 70 | 	.balign	4 | 
 | 71 | .Lfound0: | 
 | 72 | 	lsr	r0,r4,8 | 
 | 73 | 	lsr_s	r1,r2 | 
 | 74 | 	bic_s	r2,r2,r0	; get low estimate for r2 and get ... | 
 | 75 | 	bic_s	r0,r0,r1	; <this is the adjusted mask for zeros> | 
 | 76 | 	or_s	r3,r3,r0	; ... high estimate r3 so that r2 > r3 will ... | 
 | 77 | 	cmp_s	r3,r2		; ... be independent of trailing garbage | 
 | 78 | 	or_s	r2,r2,r0	; likewise for r3 > r2 | 
 | 79 | 	bic_s	r3,r3,r0 | 
 | 80 | 	rlc	r0,0		; r0 := r2 > r3 ? 1 : 0 | 
 | 81 | 	cmp_s	r2,r3 | 
 | 82 | 	j_s.d	[blink] | 
 | 83 | 	bset.lo	r0,r0,31 | 
 | 84 | #endif /* ENDIAN */ | 
 | 85 |  | 
 | 86 | 	.balign	4 | 
 | 87 | .Lcharloop: | 
 | 88 | 	ldb.ab	r2,[r0,1] | 
 | 89 | 	ldb.ab	r3,[r1,1] | 
 | 90 | 	nop_s | 
 | 91 | 	breq	r2,0,.Lcmpend | 
 | 92 | 	breq	r2,r3,.Lcharloop | 
 | 93 | .Lcmpend: | 
 | 94 | 	j_s.d	[blink] | 
 | 95 | 	sub	r0,r2,r3 | 
 | 96 | END_CFI(strcmp) |