src/kernel/linux/v4.19/arch/alpha/lib/memmove.S - T800 - Gitiles

 /* SPDX-License-Identifier: GPL-2.0 */
 /*
  * arch/alpha/lib/memmove.S
  *
  * Barely optimized memmove routine for Alpha EV5.
  *
  * This is hand-massaged output from the original memcpy.c.  We defer to
  * memcpy whenever possible; the backwards copy loops are not unrolled.
  */
 #include <asm/export.h>
 	.set noat
 	.set noreorder
 	.text

 	.align 4
 	.globl memmove
 	.ent memmove
 memmove:
 	ldgp $29, 0($27)
 	unop
 	nop
 	.prologue 1

 	addq $16,$18,$4
 	addq $17,$18,$5
 	cmpule $4,$17,$1		/*  dest + n <= src  */
 	cmpule $5,$16,$2		/*  dest >= src + n  */

 	bis $1,$2,$1
 	mov $16,$0
 	xor $16,$17,$2
 	bne $1,memcpy			!samegp

 	and $2,7,$2			/* Test for src/dest co-alignment.  */
 	and $16,7,$1
 	cmpule $16,$17,$3
 	bne $3,$memmove_up		/* dest < src */

 	and $4,7,$1
 	bne $2,$misaligned_dn
 	unop
 	beq $1,$skip_aligned_byte_loop_head_dn

 $aligned_byte_loop_head_dn:
 	lda $4,-1($4)
 	lda $5,-1($5)
 	unop
 	ble $18,$egress

 	ldq_u $3,0($5)
 	ldq_u $2,0($4)
 	lda $18,-1($18)
 	extbl $3,$5,$1

 	insbl $1,$4,$1
 	mskbl $2,$4,$2
 	bis $1,$2,$1
 	and $4,7,$6

 	stq_u $1,0($4)
 	bne $6,$aligned_byte_loop_head_dn

 $skip_aligned_byte_loop_head_dn:
 	lda $18,-8($18)
 	blt $18,$skip_aligned_word_loop_dn

 $aligned_word_loop_dn:
 	ldq $1,-8($5)
 	nop
 	lda $5,-8($5)
 	lda $18,-8($18)

 	stq $1,-8($4)
 	nop
 	lda $4,-8($4)
 	bge $18,$aligned_word_loop_dn

 $skip_aligned_word_loop_dn:
 	lda $18,8($18)
 	bgt $18,$byte_loop_tail_dn
 	unop
 	ret $31,($26),1

 	.align 4
 $misaligned_dn:
 	nop
 	fnop
 	unop
 	beq $18,$egress

 $byte_loop_tail_dn:
 	ldq_u $3,-1($5)
 	ldq_u $2,-1($4)
 	lda $5,-1($5)
 	lda $4,-1($4)

 	lda $18,-1($18)
 	extbl $3,$5,$1
 	insbl $1,$4,$1
 	mskbl $2,$4,$2

 	bis $1,$2,$1
 	stq_u $1,0($4)
 	bgt $18,$byte_loop_tail_dn
 	br $egress

 $memmove_up:
 	mov $16,$4
 	mov $17,$5
 	bne $2,$misaligned_up
 	beq $1,$skip_aligned_byte_loop_head_up

 $aligned_byte_loop_head_up:
 	unop
 	ble $18,$egress
 	ldq_u $3,0($5)
 	ldq_u $2,0($4)

 	lda $18,-1($18)
 	extbl $3,$5,$1
 	insbl $1,$4,$1
 	mskbl $2,$4,$2

 	bis $1,$2,$1
 	lda $5,1($5)
 	stq_u $1,0($4)
 	lda $4,1($4)

 	and $4,7,$6
 	bne $6,$aligned_byte_loop_head_up

 $skip_aligned_byte_loop_head_up:
 	lda $18,-8($18)
 	blt $18,$skip_aligned_word_loop_up

 $aligned_word_loop_up:
 	ldq $1,0($5)
 	nop
 	lda $5,8($5)
 	lda $18,-8($18)

 	stq $1,0($4)
 	nop
 	lda $4,8($4)
 	bge $18,$aligned_word_loop_up

 $skip_aligned_word_loop_up:
 	lda $18,8($18)
 	bgt $18,$byte_loop_tail_up
 	unop
 	ret $31,($26),1

 	.align 4
 $misaligned_up:
 	nop
 	fnop
 	unop
 	beq $18,$egress

 $byte_loop_tail_up:
 	ldq_u $3,0($5)
 	ldq_u $2,0($4)
 	lda $18,-1($18)
 	extbl $3,$5,$1

 	insbl $1,$4,$1
 	mskbl $2,$4,$2
 	bis $1,$2,$1
 	stq_u $1,0($4)

 	lda $5,1($5)
 	lda $4,1($4)
 	nop
 	bgt $18,$byte_loop_tail_up

 $egress:
 	ret $31,($26),1
 	nop
 	nop
 	nop

 	.end memmove
 	EXPORT_SYMBOL(memmove)
	/* SPDX-License-Identifier: GPL-2.0 */
	/*
	* arch/alpha/lib/memmove.S
	*
	* Barely optimized memmove routine for Alpha EV5.
	*
	* This is hand-massaged output from the original memcpy.c. We defer to
	* memcpy whenever possible; the backwards copy loops are not unrolled.
	*/
	#include <asm/export.h>
	.set noat
	.set noreorder
	.text

	.align 4
	.globl memmove
	.ent memmove
	memmove:
	ldgp $29, 0($27)
	unop
	nop
	.prologue 1

	addq $16,$18,$4
	addq $17,$18,$5
	cmpule $4,$17,$1 /* dest + n <= src */
	cmpule $5,$16,$2 /* dest >= src + n */

	bis $1,$2,$1
	mov $16,$0
	xor $16,$17,$2
	bne $1,memcpy !samegp

	and $2,7,$2 /* Test for src/dest co-alignment. */
	and $16,7,$1
	cmpule $16,$17,$3
	bne $3,$memmove_up /* dest < src */

	and $4,7,$1
	bne $2,$misaligned_dn
	unop
	beq $1,$skip_aligned_byte_loop_head_dn

	$aligned_byte_loop_head_dn:
	lda $4,-1($4)
	lda $5,-1($5)
	unop
	ble $18,$egress

	ldq_u $3,0($5)
	ldq_u $2,0($4)
	lda $18,-1($18)
	extbl $3,$5,$1

	insbl $1,$4,$1
	mskbl $2,$4,$2
	bis $1,$2,$1
	and $4,7,$6

	stq_u $1,0($4)
	bne $6,$aligned_byte_loop_head_dn

	$skip_aligned_byte_loop_head_dn:
	lda $18,-8($18)
	blt $18,$skip_aligned_word_loop_dn

	$aligned_word_loop_dn:
	ldq $1,-8($5)
	nop
	lda $5,-8($5)
	lda $18,-8($18)

	stq $1,-8($4)
	nop
	lda $4,-8($4)
	bge $18,$aligned_word_loop_dn

	$skip_aligned_word_loop_dn:
	lda $18,8($18)
	bgt $18,$byte_loop_tail_dn
	unop
	ret $31,($26),1

	.align 4
	$misaligned_dn:
	nop
	fnop
	unop
	beq $18,$egress

	$byte_loop_tail_dn:
	ldq_u $3,-1($5)
	ldq_u $2,-1($4)
	lda $5,-1($5)
	lda $4,-1($4)

	lda $18,-1($18)
	extbl $3,$5,$1
	insbl $1,$4,$1
	mskbl $2,$4,$2

	bis $1,$2,$1
	stq_u $1,0($4)
	bgt $18,$byte_loop_tail_dn
	br $egress

	$memmove_up:
	mov $16,$4
	mov $17,$5
	bne $2,$misaligned_up
	beq $1,$skip_aligned_byte_loop_head_up

	$aligned_byte_loop_head_up:
	unop
	ble $18,$egress
	ldq_u $3,0($5)
	ldq_u $2,0($4)

	lda $18,-1($18)
	extbl $3,$5,$1
	insbl $1,$4,$1
	mskbl $2,$4,$2

	bis $1,$2,$1
	lda $5,1($5)
	stq_u $1,0($4)
	lda $4,1($4)

	and $4,7,$6
	bne $6,$aligned_byte_loop_head_up

	$skip_aligned_byte_loop_head_up:
	lda $18,-8($18)
	blt $18,$skip_aligned_word_loop_up

	$aligned_word_loop_up:
	ldq $1,0($5)
	nop
	lda $5,8($5)
	lda $18,-8($18)

	stq $1,0($4)
	nop
	lda $4,8($4)
	bge $18,$aligned_word_loop_up

	$skip_aligned_word_loop_up:
	lda $18,8($18)
	bgt $18,$byte_loop_tail_up
	unop
	ret $31,($26),1

	.align 4
	$misaligned_up:
	nop
	fnop
	unop
	beq $18,$egress

	$byte_loop_tail_up:
	ldq_u $3,0($5)
	ldq_u $2,0($4)
	lda $18,-1($18)
	extbl $3,$5,$1

	insbl $1,$4,$1
	mskbl $2,$4,$2
	bis $1,$2,$1
	stq_u $1,0($4)

	lda $5,1($5)
	lda $4,1($4)
	nop
	bgt $18,$byte_loop_tail_up

	$egress:
	ret $31,($26),1
	nop
	nop
	nop

	.end memmove
	EXPORT_SYMBOL(memmove)