|  | /* SPDX-License-Identifier: GPL-2.0 */ | 
|  | /* | 
|  | * "memset" implementation for SH4 | 
|  | * | 
|  | * Copyright (C) 1999  Niibe Yutaka | 
|  | * Copyright (c) 2009  STMicroelectronics Limited | 
|  | * Author: Stuart Menefy <stuart.menefy:st.com> | 
|  | */ | 
|  |  | 
|  | /* | 
|  | *            void *memset(void *s, int c, size_t n); | 
|  | */ | 
|  |  | 
|  | #include <linux/linkage.h> | 
|  |  | 
|  | ENTRY(memset) | 
|  | mov	#12,r0 | 
|  | add	r6,r4 | 
|  | cmp/gt	r6,r0 | 
|  | bt/s	40f		! if it's too small, set a byte at once | 
|  | mov	r4,r0 | 
|  | and	#3,r0 | 
|  | cmp/eq	#0,r0 | 
|  | bt/s	2f		! It's aligned | 
|  | sub	r0,r6 | 
|  | 1: | 
|  | dt	r0 | 
|  | bf/s	1b | 
|  | mov.b	r5,@-r4 | 
|  | 2:				! make VVVV | 
|  | extu.b	r5,r5 | 
|  | swap.b	r5,r0		!   V0 | 
|  | or	r0,r5		!   VV | 
|  | swap.w	r5,r0		! VV00 | 
|  | or	r0,r5		! VVVV | 
|  |  | 
|  | ! Check if enough bytes need to be copied to be worth the big loop | 
|  | mov	#0x40, r0	! (MT) | 
|  | cmp/gt	r6,r0		! (MT)  64 > len => slow loop | 
|  |  | 
|  | bt/s	22f | 
|  | mov	r6,r0 | 
|  |  | 
|  | ! align the dst to the cache block size if necessary | 
|  | mov	r4, r3 | 
|  | mov	#~(0x1f), r1 | 
|  |  | 
|  | and	r3, r1 | 
|  | cmp/eq	r3, r1 | 
|  |  | 
|  | bt/s	11f		! dst is already aligned | 
|  | sub	r1, r3		! r3-r1 -> r3 | 
|  | shlr2	r3		! number of loops | 
|  |  | 
|  | 10:	mov.l	r5,@-r4 | 
|  | dt	r3 | 
|  | bf/s	10b | 
|  | add	#-4, r6 | 
|  |  | 
|  | 11:	! dst is 32byte aligned | 
|  | mov	r6,r2 | 
|  | mov	#-5,r0 | 
|  | shld	r0,r2		! number of loops | 
|  |  | 
|  | add	#-32, r4 | 
|  | mov	r5, r0 | 
|  | 12: | 
|  | movca.l	r0,@r4 | 
|  | mov.l	r5,@(4, r4) | 
|  | mov.l	r5,@(8, r4) | 
|  | mov.l	r5,@(12,r4) | 
|  | mov.l	r5,@(16,r4) | 
|  | mov.l	r5,@(20,r4) | 
|  | add	#-0x20, r6 | 
|  | mov.l	r5,@(24,r4) | 
|  | dt	r2 | 
|  | mov.l	r5,@(28,r4) | 
|  | bf/s	12b | 
|  | add	#-32, r4 | 
|  |  | 
|  | add	#32, r4 | 
|  | mov	#8, r0 | 
|  | cmp/ge	r0, r6 | 
|  | bf	40f | 
|  |  | 
|  | mov	r6,r0 | 
|  | 22: | 
|  | shlr2	r0 | 
|  | shlr	r0		! r0 = r6 >> 3 | 
|  | 3: | 
|  | dt	r0 | 
|  | mov.l	r5,@-r4		! set 8-byte at once | 
|  | bf/s	3b | 
|  | mov.l	r5,@-r4 | 
|  | ! | 
|  | mov	#7,r0 | 
|  | and	r0,r6 | 
|  |  | 
|  | ! fill bytes (length may be zero) | 
|  | 40:	tst	r6,r6 | 
|  | bt	5f | 
|  | 4: | 
|  | dt	r6 | 
|  | bf/s	4b | 
|  | mov.b	r5,@-r4 | 
|  | 5: | 
|  | rts | 
|  | mov	r4,r0 |