| /* | 
 |  * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) | 
 |  * | 
 |  * This program is free software; you can redistribute it and/or modify | 
 |  * it under the terms of the GNU General Public License version 2 as | 
 |  * published by the Free Software Foundation. | 
 |  */ | 
 |  | 
 | #include <linux/linkage.h> | 
 | #include <asm/cache.h> | 
 |  | 
 | /* | 
 |  * The memset implementation below is optimized to use prefetchw and prealloc | 
 |  * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6) | 
 |  * If you want to implement optimized memset for other possible L1 data cache | 
 |  * line lengths (32B and 128B) you should rewrite code carefully checking | 
 |  * we don't call any prefetchw/prealloc instruction for L1 cache lines which | 
 |  * don't belongs to memset area. | 
 |  */ | 
 |  | 
 | #if L1_CACHE_SHIFT == 6 | 
 |  | 
 | .macro PREALLOC_INSTR	reg, off | 
 | 	prealloc	[\reg, \off] | 
 | .endm | 
 |  | 
 | .macro PREFETCHW_INSTR	reg, off | 
 | 	prefetchw	[\reg, \off] | 
 | .endm | 
 |  | 
 | #else | 
 |  | 
 | .macro PREALLOC_INSTR | 
 | .endm | 
 |  | 
 | .macro PREFETCHW_INSTR | 
 | .endm | 
 |  | 
 | #endif | 
 |  | 
 | ENTRY_CFI(memset) | 
 | 	PREFETCHW_INSTR	r0, 0	; Prefetch the first write location | 
 | 	mov.f	0, r2 | 
 | ;;; if size is zero | 
 | 	jz.d	[blink] | 
 | 	mov	r3, r0		; don't clobber ret val | 
 |  | 
 | ;;; if length < 8 | 
 | 	brls.d.nt	r2, 8, .Lsmallchunk | 
 | 	mov.f	lp_count,r2 | 
 |  | 
 | 	and.f	r4, r0, 0x03 | 
 | 	rsub	lp_count, r4, 4 | 
 | 	lpnz	@.Laligndestination | 
 | 	;; LOOP BEGIN | 
 | 	stb.ab	r1, [r3,1] | 
 | 	sub	r2, r2, 1 | 
 | .Laligndestination: | 
 |  | 
 | ;;; Destination is aligned | 
 | 	and	r1, r1, 0xFF | 
 | 	asl	r4, r1, 8 | 
 | 	or	r4, r4, r1 | 
 | 	asl	r5, r4, 16 | 
 | 	or	r5, r5, r4 | 
 | 	mov	r4, r5 | 
 |  | 
 | 	sub3	lp_count, r2, 8 | 
 | 	cmp     r2, 64 | 
 | 	bmsk.hi	r2, r2, 5 | 
 | 	mov.ls	lp_count, 0 | 
 | 	add3.hi	r2, r2, 8 | 
 |  | 
 | ;;; Convert len to Dwords, unfold x8 | 
 | 	lsr.f	lp_count, lp_count, 6 | 
 |  | 
 | 	lpnz	@.Lset64bytes | 
 | 	;; LOOP START | 
 | 	PREALLOC_INSTR	r3, 64	; alloc next line w/o fetching | 
 |  | 
 | #ifdef CONFIG_ARC_HAS_LL64 | 
 | 	std.ab	r4, [r3, 8] | 
 | 	std.ab	r4, [r3, 8] | 
 | 	std.ab	r4, [r3, 8] | 
 | 	std.ab	r4, [r3, 8] | 
 | 	std.ab	r4, [r3, 8] | 
 | 	std.ab	r4, [r3, 8] | 
 | 	std.ab	r4, [r3, 8] | 
 | 	std.ab	r4, [r3, 8] | 
 | #else | 
 | 	st.ab	r4, [r3, 4] | 
 | 	st.ab	r4, [r3, 4] | 
 | 	st.ab	r4, [r3, 4] | 
 | 	st.ab	r4, [r3, 4] | 
 | 	st.ab	r4, [r3, 4] | 
 | 	st.ab	r4, [r3, 4] | 
 | 	st.ab	r4, [r3, 4] | 
 | 	st.ab	r4, [r3, 4] | 
 | 	st.ab	r4, [r3, 4] | 
 | 	st.ab	r4, [r3, 4] | 
 | 	st.ab	r4, [r3, 4] | 
 | 	st.ab	r4, [r3, 4] | 
 | 	st.ab	r4, [r3, 4] | 
 | 	st.ab	r4, [r3, 4] | 
 | 	st.ab	r4, [r3, 4] | 
 | 	st.ab	r4, [r3, 4] | 
 | #endif | 
 | .Lset64bytes: | 
 |  | 
 | 	lsr.f	lp_count, r2, 5 ;Last remaining  max 124 bytes | 
 | 	lpnz	.Lset32bytes | 
 | 	;; LOOP START | 
 | #ifdef CONFIG_ARC_HAS_LL64 | 
 | 	std.ab	r4, [r3, 8] | 
 | 	std.ab	r4, [r3, 8] | 
 | 	std.ab	r4, [r3, 8] | 
 | 	std.ab	r4, [r3, 8] | 
 | #else | 
 | 	st.ab	r4, [r3, 4] | 
 | 	st.ab	r4, [r3, 4] | 
 | 	st.ab	r4, [r3, 4] | 
 | 	st.ab	r4, [r3, 4] | 
 | 	st.ab	r4, [r3, 4] | 
 | 	st.ab	r4, [r3, 4] | 
 | 	st.ab	r4, [r3, 4] | 
 | 	st.ab	r4, [r3, 4] | 
 | #endif | 
 | .Lset32bytes: | 
 |  | 
 | 	and.f	lp_count, r2, 0x1F ;Last remaining 31 bytes | 
 | .Lsmallchunk: | 
 | 	lpnz	.Lcopy3bytes | 
 | 	;; LOOP START | 
 | 	stb.ab	r1, [r3, 1] | 
 | .Lcopy3bytes: | 
 |  | 
 | 	j	[blink] | 
 |  | 
 | END_CFI(memset) | 
 |  | 
 | ENTRY_CFI(memzero) | 
 |     ; adjust bzero args to memset args | 
 |     mov r2, r1 | 
 |     b.d  memset    ;tail call so need to tinker with blink | 
 |     mov r1, 0 | 
 | END_CFI(memzero) |