blob: 38f2f6228f054c0cfa066def73a3514e0d9457b7 [file] [log] [blame]
yuezonghe824eb0c2024-06-27 02:32:26 -07001/*
2 * linux/arch/arm/lib/memset.S
3 *
4 * Copyright (C) 1995-2000 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * ASM optimised string functions
11 */
12 #define FORCE_ARM 1
13 .syntax divided
14
15#undef PSR_ISETSTATE
16#undef ARM
17#undef THUMB
18#undef W
19#undef BSYM
20
21/* The CPSR bit describing the instruction set (ARM) */
22#define PSR_ISETSTATE 0
23
24#define ARM(x...) x
25#define THUMB(x...)
26#define W(instr) instr
27#define BSYM(sym) sym
28
29.arm
30
31#include <linux/linkage.h>
32#include <asm/assembler.h>
33
34 .text
35 .align 5
36
37ENTRY(memset)
38 ands r3, r0, #3 @ 1 unaligned?
39 mov ip, r0 @ preserve r0 as return value
40 bne 6f @ 1
41/*
42 * we know that the pointer in ip is aligned to a word boundary.
43 */
441: orr r1, r1, r1, lsl #8
45 orr r1, r1, r1, lsl #16
46 mov r3, r1
47 cmp r2, #16
48 blt 4f
49
50#if ! CALGN(1)+0
51
52/*
53 * We need 2 extra registers for this loop - use r8 and the LR
54 */
55 stmfd sp!, {r8, lr}
56 mov r8, r1
57 mov lr, r1
58
592: subs r2, r2, #64
60 stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time.
61 stmgeia ip!, {r1, r3, r8, lr}
62 stmgeia ip!, {r1, r3, r8, lr}
63 stmgeia ip!, {r1, r3, r8, lr}
64 bgt 2b
65 ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go.
66/*
67 * No need to correct the count; we're only testing bits from now on
68 */
69 tst r2, #32
70 stmneia ip!, {r1, r3, r8, lr}
71 stmneia ip!, {r1, r3, r8, lr}
72 tst r2, #16
73 stmneia ip!, {r1, r3, r8, lr}
74 ldmfd sp!, {r8, lr}
75
76#else
77
78/*
79 * This version aligns the destination pointer in order to write
80 * whole cache lines at once.
81 */
82
83 stmfd sp!, {r4-r8, lr}
84 mov r4, r1
85 mov r5, r1
86 mov r6, r1
87 mov r7, r1
88 mov r8, r1
89 mov lr, r1
90
91 cmp r2, #96
92 tstgt ip, #31
93 ble 3f
94
95 and r8, ip, #31
96 rsb r8, r8, #32
97 sub r2, r2, r8
98 movs r8, r8, lsl #(32 - 4)
99 stmcsia ip!, {r4, r5, r6, r7}
100 stmmiia ip!, {r4, r5}
101 tst r8, #(1 << 30)
102 mov r8, r1
103 strne r1, [ip], #4
104
1053: subs r2, r2, #64
106 stmgeia ip!, {r1, r3-r8, lr}
107 stmgeia ip!, {r1, r3-r8, lr}
108 bgt 3b
109 ldmeqfd sp!, {r4-r8, pc}
110
111 tst r2, #32
112 stmneia ip!, {r1, r3-r8, lr}
113 tst r2, #16
114 stmneia ip!, {r4-r7}
115 ldmfd sp!, {r4-r8, lr}
116
117#endif
118
1194: tst r2, #8
120 stmneia ip!, {r1, r3}
121 tst r2, #4
122 strne r1, [ip], #4
123/*
124 * When we get here, we've got less than 4 bytes to zero. We
125 * may have an unaligned pointer as well.
126 */
1275: tst r2, #2
128 strneb r1, [ip], #1
129 strneb r1, [ip], #1
130 tst r2, #1
131 strneb r1, [ip], #1
132 /* mov pc, lr */
133 bx lr
134
1356: subs r2, r2, #4 @ 1 do we have enough
136 blt 5b @ 1 bytes to align with?
137 cmp r3, #2 @ 1
138 strltb r1, [ip], #1 @ 1
139 strleb r1, [ip], #1 @ 1
140 strb r1, [ip], #1 @ 1
141 add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
142 b 1b
143ENDPROC(memset)