| /* | 
 |  * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) | 
 |  * | 
 |  * This program is free software; you can redistribute it and/or modify | 
 |  * it under the terms of the GNU General Public License version 2 as | 
 |  * published by the Free Software Foundation. | 
 |  */ | 
 |  | 
 | #include <linux/linkage.h> | 
 |  | 
 | #ifdef __LITTLE_ENDIAN__ | 
 | # define SHIFT_1(RX,RY,IMM)	asl	RX, RY, IMM	; << | 
 | # define SHIFT_2(RX,RY,IMM)	lsr	RX, RY, IMM	; >> | 
 | # define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM | 
 | # define MERGE_2(RX,RY,IMM) | 
 | # define EXTRACT_1(RX,RY,IMM)	and	RX, RY, 0xFFFF | 
 | # define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, IMM | 
 | #else | 
 | # define SHIFT_1(RX,RY,IMM)	lsr	RX, RY, IMM	; >> | 
 | # define SHIFT_2(RX,RY,IMM)	asl	RX, RY, IMM	; << | 
 | # define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM	; << | 
 | # define MERGE_2(RX,RY,IMM)	asl	RX, RY, IMM	; << | 
 | # define EXTRACT_1(RX,RY,IMM)	lsr	RX, RY, IMM | 
 | # define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, 0x08 | 
 | #endif | 
 |  | 
 | #ifdef CONFIG_ARC_HAS_LL64 | 
 | # define LOADX(DST,RX)		ldd.ab	DST, [RX, 8] | 
 | # define STOREX(SRC,RX)		std.ab	SRC, [RX, 8] | 
 | # define ZOLSHFT		5 | 
 | # define ZOLAND			0x1F | 
 | #else | 
 | # define LOADX(DST,RX)		ld.ab	DST, [RX, 4] | 
 | # define STOREX(SRC,RX)		st.ab	SRC, [RX, 4] | 
 | # define ZOLSHFT		4 | 
 | # define ZOLAND			0xF | 
 | #endif | 
 |  | 
 | ENTRY_CFI(memcpy) | 
 | 	mov.f	0, r2 | 
 | ;;; if size is zero | 
 | 	jz.d	[blink] | 
 | 	mov	r3, r0		; don;t clobber ret val | 
 |  | 
 | ;;; if size <= 8 | 
 | 	cmp	r2, 8 | 
 | 	bls.d	@.Lsmallchunk | 
 | 	mov.f	lp_count, r2 | 
 |  | 
 | 	and.f	r4, r0, 0x03 | 
 | 	rsub	lp_count, r4, 4 | 
 | 	lpnz	@.Laligndestination | 
 | 	;; LOOP BEGIN | 
 | 	ldb.ab	r5, [r1,1] | 
 | 	sub	r2, r2, 1 | 
 | 	stb.ab	r5, [r3,1] | 
 | .Laligndestination: | 
 |  | 
 | ;;; Check the alignment of the source | 
 | 	and.f	r4, r1, 0x03 | 
 | 	bnz.d	@.Lsourceunaligned | 
 |  | 
 | ;;; CASE 0: Both source and destination are 32bit aligned | 
 | ;;; Convert len to Dwords, unfold x4 | 
 | 	lsr.f	lp_count, r2, ZOLSHFT | 
 | 	lpnz	@.Lcopy32_64bytes | 
 | 	;; LOOP START | 
 | 	LOADX (r6, r1) | 
 | 	LOADX (r8, r1) | 
 | 	LOADX (r10, r1) | 
 | 	LOADX (r4, r1) | 
 | 	STOREX (r6, r3) | 
 | 	STOREX (r8, r3) | 
 | 	STOREX (r10, r3) | 
 | 	STOREX (r4, r3) | 
 | .Lcopy32_64bytes: | 
 |  | 
 | 	and.f	lp_count, r2, ZOLAND ;Last remaining 31 bytes | 
 | .Lsmallchunk: | 
 | 	lpnz	@.Lcopyremainingbytes | 
 | 	;; LOOP START | 
 | 	ldb.ab	r5, [r1,1] | 
 | 	stb.ab	r5, [r3,1] | 
 | .Lcopyremainingbytes: | 
 |  | 
 | 	j	[blink] | 
 | ;;; END CASE 0 | 
 |  | 
 | .Lsourceunaligned: | 
 | 	cmp	r4, 2 | 
 | 	beq.d	@.LunalignedOffby2 | 
 | 	sub	r2, r2, 1 | 
 |  | 
 | 	bhi.d	@.LunalignedOffby3 | 
 | 	ldb.ab	r5, [r1, 1] | 
 |  | 
 | ;;; CASE 1: The source is unaligned, off by 1 | 
 | 	;; Hence I need to read 1 byte for a 16bit alignment | 
 | 	;; and 2bytes to reach 32bit alignment | 
 | 	ldh.ab	r6, [r1, 2] | 
 | 	sub	r2, r2, 2 | 
 | 	;; Convert to words, unfold x2 | 
 | 	lsr.f	lp_count, r2, 3 | 
 | 	MERGE_1 (r6, r6, 8) | 
 | 	MERGE_2 (r5, r5, 24) | 
 | 	or	r5, r5, r6 | 
 |  | 
 | 	;; Both src and dst are aligned | 
 | 	lpnz	@.Lcopy8bytes_1 | 
 | 	;; LOOP START | 
 | 	ld.ab	r6, [r1, 4] | 
 | 	ld.ab	r8, [r1,4] | 
 |  | 
 | 	SHIFT_1	(r7, r6, 24) | 
 | 	or	r7, r7, r5 | 
 | 	SHIFT_2	(r5, r6, 8) | 
 |  | 
 | 	SHIFT_1	(r9, r8, 24) | 
 | 	or	r9, r9, r5 | 
 | 	SHIFT_2	(r5, r8, 8) | 
 |  | 
 | 	st.ab	r7, [r3, 4] | 
 | 	st.ab	r9, [r3, 4] | 
 | .Lcopy8bytes_1: | 
 |  | 
 | 	;; Write back the remaining 16bits | 
 | 	EXTRACT_1 (r6, r5, 16) | 
 | 	sth.ab	r6, [r3, 2] | 
 | 	;; Write back the remaining 8bits | 
 | 	EXTRACT_2 (r5, r5, 16) | 
 | 	stb.ab	r5, [r3, 1] | 
 |  | 
 | 	and.f	lp_count, r2, 0x07 ;Last 8bytes | 
 | 	lpnz	@.Lcopybytewise_1 | 
 | 	;; LOOP START | 
 | 	ldb.ab	r6, [r1,1] | 
 | 	stb.ab	r6, [r3,1] | 
 | .Lcopybytewise_1: | 
 | 	j	[blink] | 
 |  | 
 | .LunalignedOffby2: | 
 | ;;; CASE 2: The source is unaligned, off by 2 | 
 | 	ldh.ab	r5, [r1, 2] | 
 | 	sub	r2, r2, 1 | 
 |  | 
 | 	;; Both src and dst are aligned | 
 | 	;; Convert to words, unfold x2 | 
 | 	lsr.f	lp_count, r2, 3 | 
 | #ifdef __BIG_ENDIAN__ | 
 | 	asl.nz	r5, r5, 16 | 
 | #endif | 
 | 	lpnz	@.Lcopy8bytes_2 | 
 | 	;; LOOP START | 
 | 	ld.ab	r6, [r1, 4] | 
 | 	ld.ab	r8, [r1,4] | 
 |  | 
 | 	SHIFT_1	(r7, r6, 16) | 
 | 	or	r7, r7, r5 | 
 | 	SHIFT_2	(r5, r6, 16) | 
 |  | 
 | 	SHIFT_1	(r9, r8, 16) | 
 | 	or	r9, r9, r5 | 
 | 	SHIFT_2	(r5, r8, 16) | 
 |  | 
 | 	st.ab	r7, [r3, 4] | 
 | 	st.ab	r9, [r3, 4] | 
 | .Lcopy8bytes_2: | 
 |  | 
 | #ifdef __BIG_ENDIAN__ | 
 | 	lsr.nz	r5, r5, 16 | 
 | #endif | 
 | 	sth.ab	r5, [r3, 2] | 
 |  | 
 | 	and.f	lp_count, r2, 0x07 ;Last 8bytes | 
 | 	lpnz	@.Lcopybytewise_2 | 
 | 	;; LOOP START | 
 | 	ldb.ab	r6, [r1,1] | 
 | 	stb.ab	r6, [r3,1] | 
 | .Lcopybytewise_2: | 
 | 	j	[blink] | 
 |  | 
 | .LunalignedOffby3: | 
 | ;;; CASE 3: The source is unaligned, off by 3 | 
 | ;;; Hence, I need to read 1byte for achieve the 32bit alignment | 
 |  | 
 | 	;; Both src and dst are aligned | 
 | 	;; Convert to words, unfold x2 | 
 | 	lsr.f	lp_count, r2, 3 | 
 | #ifdef __BIG_ENDIAN__ | 
 | 	asl.ne	r5, r5, 24 | 
 | #endif | 
 | 	lpnz	@.Lcopy8bytes_3 | 
 | 	;; LOOP START | 
 | 	ld.ab	r6, [r1, 4] | 
 | 	ld.ab	r8, [r1,4] | 
 |  | 
 | 	SHIFT_1	(r7, r6, 8) | 
 | 	or	r7, r7, r5 | 
 | 	SHIFT_2	(r5, r6, 24) | 
 |  | 
 | 	SHIFT_1	(r9, r8, 8) | 
 | 	or	r9, r9, r5 | 
 | 	SHIFT_2	(r5, r8, 24) | 
 |  | 
 | 	st.ab	r7, [r3, 4] | 
 | 	st.ab	r9, [r3, 4] | 
 | .Lcopy8bytes_3: | 
 |  | 
 | #ifdef __BIG_ENDIAN__ | 
 | 	lsr.nz	r5, r5, 24 | 
 | #endif | 
 | 	stb.ab	r5, [r3, 1] | 
 |  | 
 | 	and.f	lp_count, r2, 0x07 ;Last 8bytes | 
 | 	lpnz	@.Lcopybytewise_3 | 
 | 	;; LOOP START | 
 | 	ldb.ab	r6, [r1,1] | 
 | 	stb.ab	r6, [r3,1] | 
 | .Lcopybytewise_3: | 
 | 	j	[blink] | 
 |  | 
 | END_CFI(memcpy) |