| /* | 
 |  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) | 
 |  * | 
 |  * This program is free software; you can redistribute it and/or modify | 
 |  * it under the terms of the GNU General Public License version 2 as | 
 |  * published by the Free Software Foundation. | 
 |  */ | 
 |  | 
 | /* This is optimized primarily for the ARC700. | 
 |    It would be possible to speed up the loops by one cycle / word | 
 |    respective one cycle / byte by forcing double source 1 alignment, unrolling | 
 |    by a factor of two, and speculatively loading the second word / byte of | 
 |    source 1; however, that would increase the overhead for loop setup / finish, | 
 |    and strcmp might often terminate early.  */ | 
 |  | 
 | #include <linux/linkage.h> | 
 |  | 
 | ENTRY_CFI(strcmp) | 
 | 	or	r2,r0,r1 | 
 | 	bmsk_s	r2,r2,1 | 
 | 	brne	r2,0,.Lcharloop | 
 | 	mov_s	r12,0x01010101 | 
 | 	ror	r5,r12 | 
 | .Lwordloop: | 
 | 	ld.ab	r2,[r0,4] | 
 | 	ld.ab	r3,[r1,4] | 
 | 	nop_s | 
 | 	sub	r4,r2,r12 | 
 | 	bic	r4,r4,r2 | 
 | 	and	r4,r4,r5 | 
 | 	brne	r4,0,.Lfound0 | 
 | 	breq	r2,r3,.Lwordloop | 
 | #ifdef	__LITTLE_ENDIAN__ | 
 | 	xor	r0,r2,r3	; mask for difference | 
 | 	sub_s	r1,r0,1 | 
 | 	bic_s	r0,r0,r1	; mask for least significant difference bit | 
 | 	sub	r1,r5,r0 | 
 | 	xor	r0,r5,r1	; mask for least significant difference byte | 
 | 	and_s	r2,r2,r0 | 
 | 	and_s	r3,r3,r0 | 
 | #endif /* LITTLE ENDIAN */ | 
 | 	cmp_s	r2,r3 | 
 | 	mov_s	r0,1 | 
 | 	j_s.d	[blink] | 
 | 	bset.lo	r0,r0,31 | 
 |  | 
 | 	.balign	4 | 
 | #ifdef __LITTLE_ENDIAN__ | 
 | .Lfound0: | 
 | 	xor	r0,r2,r3	; mask for difference | 
 | 	or	r0,r0,r4	; or in zero indicator | 
 | 	sub_s	r1,r0,1 | 
 | 	bic_s	r0,r0,r1	; mask for least significant difference bit | 
 | 	sub	r1,r5,r0 | 
 | 	xor	r0,r5,r1	; mask for least significant difference byte | 
 | 	and_s	r2,r2,r0 | 
 | 	and_s	r3,r3,r0 | 
 | 	sub.f	r0,r2,r3 | 
 | 	mov.hi	r0,1 | 
 | 	j_s.d	[blink] | 
 | 	bset.lo	r0,r0,31 | 
 | #else /* BIG ENDIAN */ | 
 | 	/* The zero-detection above can mis-detect 0x01 bytes as zeroes | 
 | 	   because of carry-propagateion from a lower significant zero byte. | 
 | 	   We can compensate for this by checking that bit0 is zero. | 
 | 	   This compensation is not necessary in the step where we | 
 | 	   get a low estimate for r2, because in any affected bytes | 
 | 	   we already have 0x00 or 0x01, which will remain unchanged | 
 | 	   when bit 7 is cleared.  */ | 
 | 	.balign	4 | 
 | .Lfound0: | 
 | 	lsr	r0,r4,8 | 
 | 	lsr_s	r1,r2 | 
 | 	bic_s	r2,r2,r0	; get low estimate for r2 and get ... | 
 | 	bic_s	r0,r0,r1	; <this is the adjusted mask for zeros> | 
 | 	or_s	r3,r3,r0	; ... high estimate r3 so that r2 > r3 will ... | 
 | 	cmp_s	r3,r2		; ... be independent of trailing garbage | 
 | 	or_s	r2,r2,r0	; likewise for r3 > r2 | 
 | 	bic_s	r3,r3,r0 | 
 | 	rlc	r0,0		; r0 := r2 > r3 ? 1 : 0 | 
 | 	cmp_s	r2,r3 | 
 | 	j_s.d	[blink] | 
 | 	bset.lo	r0,r0,31 | 
 | #endif /* ENDIAN */ | 
 |  | 
 | 	.balign	4 | 
 | .Lcharloop: | 
 | 	ldb.ab	r2,[r0,1] | 
 | 	ldb.ab	r3,[r1,1] | 
 | 	nop_s | 
 | 	breq	r2,0,.Lcmpend | 
 | 	breq	r2,r3,.Lcharloop | 
 | .Lcmpend: | 
 | 	j_s.d	[blink] | 
 | 	sub	r0,r2,r3 | 
 | END_CFI(strcmp) |