blob: ea14b0bf3116dfecb2655bd4a22816e05df13c4d [file] [log] [blame]
rjw1f884582022-01-06 17:20:42 +08001/*
2 * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/linkage.h>
10
11#ifdef __LITTLE_ENDIAN__
12# define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; <<
13# define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >>
14# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM
15# define MERGE_2(RX,RY,IMM)
16# define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF
17# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM
18#else
19# define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >>
20# define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; <<
21# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; <<
22# define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; <<
23# define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM
24# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08
25#endif
26
27#ifdef CONFIG_ARC_HAS_LL64
28# define LOADX(DST,RX) ldd.ab DST, [RX, 8]
29# define STOREX(SRC,RX) std.ab SRC, [RX, 8]
30# define ZOLSHFT 5
31# define ZOLAND 0x1F
32#else
33# define LOADX(DST,RX) ld.ab DST, [RX, 4]
34# define STOREX(SRC,RX) st.ab SRC, [RX, 4]
35# define ZOLSHFT 4
36# define ZOLAND 0xF
37#endif
38
39ENTRY_CFI(memcpy)
40 mov.f 0, r2
41;;; if size is zero
42 jz.d [blink]
43 mov r3, r0 ; don;t clobber ret val
44
45;;; if size <= 8
46 cmp r2, 8
47 bls.d @.Lsmallchunk
48 mov.f lp_count, r2
49
50 and.f r4, r0, 0x03
51 rsub lp_count, r4, 4
52 lpnz @.Laligndestination
53 ;; LOOP BEGIN
54 ldb.ab r5, [r1,1]
55 sub r2, r2, 1
56 stb.ab r5, [r3,1]
57.Laligndestination:
58
59;;; Check the alignment of the source
60 and.f r4, r1, 0x03
61 bnz.d @.Lsourceunaligned
62
63;;; CASE 0: Both source and destination are 32bit aligned
64;;; Convert len to Dwords, unfold x4
65 lsr.f lp_count, r2, ZOLSHFT
66 lpnz @.Lcopy32_64bytes
67 ;; LOOP START
68 LOADX (r6, r1)
69 LOADX (r8, r1)
70 LOADX (r10, r1)
71 LOADX (r4, r1)
72 STOREX (r6, r3)
73 STOREX (r8, r3)
74 STOREX (r10, r3)
75 STOREX (r4, r3)
76.Lcopy32_64bytes:
77
78 and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes
79.Lsmallchunk:
80 lpnz @.Lcopyremainingbytes
81 ;; LOOP START
82 ldb.ab r5, [r1,1]
83 stb.ab r5, [r3,1]
84.Lcopyremainingbytes:
85
86 j [blink]
87;;; END CASE 0
88
89.Lsourceunaligned:
90 cmp r4, 2
91 beq.d @.LunalignedOffby2
92 sub r2, r2, 1
93
94 bhi.d @.LunalignedOffby3
95 ldb.ab r5, [r1, 1]
96
97;;; CASE 1: The source is unaligned, off by 1
98 ;; Hence I need to read 1 byte for a 16bit alignment
99 ;; and 2bytes to reach 32bit alignment
100 ldh.ab r6, [r1, 2]
101 sub r2, r2, 2
102 ;; Convert to words, unfold x2
103 lsr.f lp_count, r2, 3
104 MERGE_1 (r6, r6, 8)
105 MERGE_2 (r5, r5, 24)
106 or r5, r5, r6
107
108 ;; Both src and dst are aligned
109 lpnz @.Lcopy8bytes_1
110 ;; LOOP START
111 ld.ab r6, [r1, 4]
112 ld.ab r8, [r1,4]
113
114 SHIFT_1 (r7, r6, 24)
115 or r7, r7, r5
116 SHIFT_2 (r5, r6, 8)
117
118 SHIFT_1 (r9, r8, 24)
119 or r9, r9, r5
120 SHIFT_2 (r5, r8, 8)
121
122 st.ab r7, [r3, 4]
123 st.ab r9, [r3, 4]
124.Lcopy8bytes_1:
125
126 ;; Write back the remaining 16bits
127 EXTRACT_1 (r6, r5, 16)
128 sth.ab r6, [r3, 2]
129 ;; Write back the remaining 8bits
130 EXTRACT_2 (r5, r5, 16)
131 stb.ab r5, [r3, 1]
132
133 and.f lp_count, r2, 0x07 ;Last 8bytes
134 lpnz @.Lcopybytewise_1
135 ;; LOOP START
136 ldb.ab r6, [r1,1]
137 stb.ab r6, [r3,1]
138.Lcopybytewise_1:
139 j [blink]
140
141.LunalignedOffby2:
142;;; CASE 2: The source is unaligned, off by 2
143 ldh.ab r5, [r1, 2]
144 sub r2, r2, 1
145
146 ;; Both src and dst are aligned
147 ;; Convert to words, unfold x2
148 lsr.f lp_count, r2, 3
149#ifdef __BIG_ENDIAN__
150 asl.nz r5, r5, 16
151#endif
152 lpnz @.Lcopy8bytes_2
153 ;; LOOP START
154 ld.ab r6, [r1, 4]
155 ld.ab r8, [r1,4]
156
157 SHIFT_1 (r7, r6, 16)
158 or r7, r7, r5
159 SHIFT_2 (r5, r6, 16)
160
161 SHIFT_1 (r9, r8, 16)
162 or r9, r9, r5
163 SHIFT_2 (r5, r8, 16)
164
165 st.ab r7, [r3, 4]
166 st.ab r9, [r3, 4]
167.Lcopy8bytes_2:
168
169#ifdef __BIG_ENDIAN__
170 lsr.nz r5, r5, 16
171#endif
172 sth.ab r5, [r3, 2]
173
174 and.f lp_count, r2, 0x07 ;Last 8bytes
175 lpnz @.Lcopybytewise_2
176 ;; LOOP START
177 ldb.ab r6, [r1,1]
178 stb.ab r6, [r3,1]
179.Lcopybytewise_2:
180 j [blink]
181
182.LunalignedOffby3:
183;;; CASE 3: The source is unaligned, off by 3
184;;; Hence, I need to read 1byte for achieve the 32bit alignment
185
186 ;; Both src and dst are aligned
187 ;; Convert to words, unfold x2
188 lsr.f lp_count, r2, 3
189#ifdef __BIG_ENDIAN__
190 asl.ne r5, r5, 24
191#endif
192 lpnz @.Lcopy8bytes_3
193 ;; LOOP START
194 ld.ab r6, [r1, 4]
195 ld.ab r8, [r1,4]
196
197 SHIFT_1 (r7, r6, 8)
198 or r7, r7, r5
199 SHIFT_2 (r5, r6, 24)
200
201 SHIFT_1 (r9, r8, 8)
202 or r9, r9, r5
203 SHIFT_2 (r5, r8, 24)
204
205 st.ab r7, [r3, 4]
206 st.ab r9, [r3, 4]
207.Lcopy8bytes_3:
208
209#ifdef __BIG_ENDIAN__
210 lsr.nz r5, r5, 24
211#endif
212 stb.ab r5, [r3, 1]
213
214 and.f lp_count, r2, 0x07 ;Last 8bytes
215 lpnz @.Lcopybytewise_3
216 ;; LOOP START
217 ldb.ab r6, [r1,1]
218 stb.ab r6, [r3,1]
219.Lcopybytewise_3:
220 j [blink]
221
222END_CFI(memcpy)