blob: 1b8812cd615db247b537785dde78819b09de8a1d [file] [log] [blame]
/* Cloned and hacked for uClibc by Paul Mundt, December 2003 */
/* Modified by SuperH, Inc. September 2003 */
!
! Fast SH memset
!
! by Toshiyasu Morita (tm@netcom.com)
!
! SH5 code by J"orn Rennecke (joern.rennecke@superh.com)
! Copyright 2002 SuperH Ltd.
!
#include <features.h>
#include <endian.h>
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define SHHI shlld
#define SHLO shlrd
#else
#define SHHI shlrd
#define SHLO shlld
#endif
.section .text..SHmedia32,"ax"
.globl memset
.type memset, @function
.align 5
memset:
pta/l multiquad, tr0
andi r2, 7, r22
ptabs r18, tr2
mshflo.b r3,r3,r3
add r4, r22, r23
mperm.w r3, r63, r3 /* Fill pattern now in every byte of r3 */
movi 8, r9
bgtu/u r23, r9, tr0 /* multiquad */
beqi/u r4, 0, tr2 /* Return with size 0 - ensures no mem accesses */
ldlo.q r2, 0, r7
shlli r4, 2, r4
movi -1, r8
SHHI r8, r4, r8
SHHI r8, r4, r8
mcmv r7, r8, r3
stlo.q r2, 0, r3
blink tr2, r63
multiquad:
pta/l lastquad, tr0
stlo.q r2, 0, r3
shlri r23, 3, r24
add r2, r4, r5
beqi/u r24, 1, tr0 /* lastquad */
pta/l loop, tr1
sub r2, r22, r25
andi r5, -8, r20 /* calculate end address and */
addi r20, -7*8, r8 /* loop end address; This might overflow, so we need
to use a different test before we start the loop
*/
bge/u r24, r9, tr1 /* loop */
st.q r25, 8, r3
st.q r20, -8, r3
shlri r24, 1, r24
beqi/u r24, 1, tr0 /* lastquad */
st.q r25, 16, r3
st.q r20, -16, r3
beqi/u r24, 2, tr0 /* lastquad */
st.q r25, 24, r3
st.q r20, -24, r3
lastquad:
sthi.q r5, -1, r3
blink tr2,r63
loop:
!!! alloco r25, 32 /* QQQ comment out for short-term fix to SHUK #3895.
QQQ commenting out is locically correct, but sub-optimal
QQQ Sean McGoogan - 4th April 2003. */
st.q r25, 8, r3
st.q r25, 16, r3
st.q r25, 24, r3
st.q r25, 32, r3
addi r25, 32, r25
bgeu/l r8, r25, tr1 /* loop */
st.q r20, -40, r3
st.q r20, -32, r3
st.q r20, -24, r3
st.q r20, -16, r3
st.q r20, -8, r3
sthi.q r5, -1, r3
blink tr2,r63
.size memset,.-memset
libc_hidden_def(memset)