blob: 483a7130cf0e118de591837a067c4a489ca12a5e [file] [log] [blame]
xjb04a4022021-11-25 15:01:52 +08001/*
2 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
3 *
4 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11/* included by aes-ce.S and aes-neon.S */
12
13 .text
14 .align 4
15
16aes_encrypt_block4x:
17 encrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
18 ret
19ENDPROC(aes_encrypt_block4x)
20
21aes_decrypt_block4x:
22 decrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
23 ret
24ENDPROC(aes_decrypt_block4x)
25
26 /*
27 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
28 * int blocks)
29 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
30 * int blocks)
31 */
32
33AES_ENTRY(aes_ecb_encrypt)
34 frame_push 5
35
36 mov x19, x0
37 mov x20, x1
38 mov x21, x2
39 mov x22, x3
40 mov x23, x4
41
42.Lecbencrestart:
43 enc_prepare w22, x21, x5
44
45.LecbencloopNx:
46 subs w23, w23, #4
47 bmi .Lecbenc1x
48 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
49 bl aes_encrypt_block4x
50 st1 {v0.16b-v3.16b}, [x19], #64
51 cond_yield_neon .Lecbencrestart
52 b .LecbencloopNx
53.Lecbenc1x:
54 adds w23, w23, #4
55 beq .Lecbencout
56.Lecbencloop:
57 ld1 {v0.16b}, [x20], #16 /* get next pt block */
58 encrypt_block v0, w22, x21, x5, w6
59 st1 {v0.16b}, [x19], #16
60 subs w23, w23, #1
61 bne .Lecbencloop
62.Lecbencout:
63 frame_pop
64 ret
65AES_ENDPROC(aes_ecb_encrypt)
66
67
68AES_ENTRY(aes_ecb_decrypt)
69 frame_push 5
70
71 mov x19, x0
72 mov x20, x1
73 mov x21, x2
74 mov x22, x3
75 mov x23, x4
76
77.Lecbdecrestart:
78 dec_prepare w22, x21, x5
79
80.LecbdecloopNx:
81 subs w23, w23, #4
82 bmi .Lecbdec1x
83 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
84 bl aes_decrypt_block4x
85 st1 {v0.16b-v3.16b}, [x19], #64
86 cond_yield_neon .Lecbdecrestart
87 b .LecbdecloopNx
88.Lecbdec1x:
89 adds w23, w23, #4
90 beq .Lecbdecout
91.Lecbdecloop:
92 ld1 {v0.16b}, [x20], #16 /* get next ct block */
93 decrypt_block v0, w22, x21, x5, w6
94 st1 {v0.16b}, [x19], #16
95 subs w23, w23, #1
96 bne .Lecbdecloop
97.Lecbdecout:
98 frame_pop
99 ret
100AES_ENDPROC(aes_ecb_decrypt)
101
102
103 /*
104 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
105 * int blocks, u8 iv[])
106 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
107 * int blocks, u8 iv[])
108 */
109
110AES_ENTRY(aes_cbc_encrypt)
111 frame_push 6
112
113 mov x19, x0
114 mov x20, x1
115 mov x21, x2
116 mov x22, x3
117 mov x23, x4
118 mov x24, x5
119
120.Lcbcencrestart:
121 ld1 {v4.16b}, [x24] /* get iv */
122 enc_prepare w22, x21, x6
123
124.Lcbcencloop4x:
125 subs w23, w23, #4
126 bmi .Lcbcenc1x
127 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
128 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
129 encrypt_block v0, w22, x21, x6, w7
130 eor v1.16b, v1.16b, v0.16b
131 encrypt_block v1, w22, x21, x6, w7
132 eor v2.16b, v2.16b, v1.16b
133 encrypt_block v2, w22, x21, x6, w7
134 eor v3.16b, v3.16b, v2.16b
135 encrypt_block v3, w22, x21, x6, w7
136 st1 {v0.16b-v3.16b}, [x19], #64
137 mov v4.16b, v3.16b
138 st1 {v4.16b}, [x24] /* return iv */
139 cond_yield_neon .Lcbcencrestart
140 b .Lcbcencloop4x
141.Lcbcenc1x:
142 adds w23, w23, #4
143 beq .Lcbcencout
144.Lcbcencloop:
145 ld1 {v0.16b}, [x20], #16 /* get next pt block */
146 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
147 encrypt_block v4, w22, x21, x6, w7
148 st1 {v4.16b}, [x19], #16
149 subs w23, w23, #1
150 bne .Lcbcencloop
151.Lcbcencout:
152 st1 {v4.16b}, [x24] /* return iv */
153 frame_pop
154 ret
155AES_ENDPROC(aes_cbc_encrypt)
156
157
158AES_ENTRY(aes_cbc_decrypt)
159 frame_push 6
160
161 mov x19, x0
162 mov x20, x1
163 mov x21, x2
164 mov x22, x3
165 mov x23, x4
166 mov x24, x5
167
168.Lcbcdecrestart:
169 ld1 {v7.16b}, [x24] /* get iv */
170 dec_prepare w22, x21, x6
171
172.LcbcdecloopNx:
173 subs w23, w23, #4
174 bmi .Lcbcdec1x
175 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
176 mov v4.16b, v0.16b
177 mov v5.16b, v1.16b
178 mov v6.16b, v2.16b
179 bl aes_decrypt_block4x
180 sub x20, x20, #16
181 eor v0.16b, v0.16b, v7.16b
182 eor v1.16b, v1.16b, v4.16b
183 ld1 {v7.16b}, [x20], #16 /* reload 1 ct block */
184 eor v2.16b, v2.16b, v5.16b
185 eor v3.16b, v3.16b, v6.16b
186 st1 {v0.16b-v3.16b}, [x19], #64
187 st1 {v7.16b}, [x24] /* return iv */
188 cond_yield_neon .Lcbcdecrestart
189 b .LcbcdecloopNx
190.Lcbcdec1x:
191 adds w23, w23, #4
192 beq .Lcbcdecout
193.Lcbcdecloop:
194 ld1 {v1.16b}, [x20], #16 /* get next ct block */
195 mov v0.16b, v1.16b /* ...and copy to v0 */
196 decrypt_block v0, w22, x21, x6, w7
197 eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
198 mov v7.16b, v1.16b /* ct is next iv */
199 st1 {v0.16b}, [x19], #16
200 subs w23, w23, #1
201 bne .Lcbcdecloop
202.Lcbcdecout:
203 st1 {v7.16b}, [x24] /* return iv */
204 frame_pop
205 ret
206AES_ENDPROC(aes_cbc_decrypt)
207
208
209 /*
210 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
211 * int blocks, u8 ctr[])
212 */
213
214AES_ENTRY(aes_ctr_encrypt)
215 frame_push 6
216
217 mov x19, x0
218 mov x20, x1
219 mov x21, x2
220 mov x22, x3
221 mov x23, x4
222 mov x24, x5
223
224.Lctrrestart:
225 enc_prepare w22, x21, x6
226 ld1 {v4.16b}, [x24]
227
228 umov x6, v4.d[1] /* keep swabbed ctr in reg */
229 rev x6, x6
230.LctrloopNx:
231 subs w23, w23, #4
232 bmi .Lctr1x
233 cmn w6, #4 /* 32 bit overflow? */
234 bcs .Lctr1x
235 ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
236 dup v7.4s, w6
237 mov v0.16b, v4.16b
238 add v7.4s, v7.4s, v8.4s
239 mov v1.16b, v4.16b
240 rev32 v8.16b, v7.16b
241 mov v2.16b, v4.16b
242 mov v3.16b, v4.16b
243 mov v1.s[3], v8.s[0]
244 mov v2.s[3], v8.s[1]
245 mov v3.s[3], v8.s[2]
246 ld1 {v5.16b-v7.16b}, [x20], #48 /* get 3 input blocks */
247 bl aes_encrypt_block4x
248 eor v0.16b, v5.16b, v0.16b
249 ld1 {v5.16b}, [x20], #16 /* get 1 input block */
250 eor v1.16b, v6.16b, v1.16b
251 eor v2.16b, v7.16b, v2.16b
252 eor v3.16b, v5.16b, v3.16b
253 st1 {v0.16b-v3.16b}, [x19], #64
254 add x6, x6, #4
255 rev x7, x6
256 ins v4.d[1], x7
257 cbz w23, .Lctrout
258 st1 {v4.16b}, [x24] /* return next CTR value */
259 cond_yield_neon .Lctrrestart
260 b .LctrloopNx
261.Lctr1x:
262 adds w23, w23, #4
263 beq .Lctrout
264.Lctrloop:
265 mov v0.16b, v4.16b
266 encrypt_block v0, w22, x21, x8, w7
267
268 adds x6, x6, #1 /* increment BE ctr */
269 rev x7, x6
270 ins v4.d[1], x7
271 bcs .Lctrcarry /* overflow? */
272
273.Lctrcarrydone:
274 subs w23, w23, #1
275 bmi .Lctrtailblock /* blocks <0 means tail block */
276 ld1 {v3.16b}, [x20], #16
277 eor v3.16b, v0.16b, v3.16b
278 st1 {v3.16b}, [x19], #16
279 bne .Lctrloop
280
281.Lctrout:
282 st1 {v4.16b}, [x24] /* return next CTR value */
283.Lctrret:
284 frame_pop
285 ret
286
287.Lctrtailblock:
288 st1 {v0.16b}, [x19]
289 b .Lctrret
290
291.Lctrcarry:
292 umov x7, v4.d[0] /* load upper word of ctr */
293 rev x7, x7 /* ... to handle the carry */
294 add x7, x7, #1
295 rev x7, x7
296 ins v4.d[0], x7
297 b .Lctrcarrydone
298AES_ENDPROC(aes_ctr_encrypt)
299 .ltorg
300
301
302 /*
303 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
304 * int blocks, u8 const rk2[], u8 iv[], int first)
305 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
306 * int blocks, u8 const rk2[], u8 iv[], int first)
307 */
308
309 .macro next_tweak, out, in, const, tmp
310 sshr \tmp\().2d, \in\().2d, #63
311 and \tmp\().16b, \tmp\().16b, \const\().16b
312 add \out\().2d, \in\().2d, \in\().2d
313 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
314 eor \out\().16b, \out\().16b, \tmp\().16b
315 .endm
316
317.Lxts_mul_x:
318CPU_LE( .quad 1, 0x87 )
319CPU_BE( .quad 0x87, 1 )
320
321AES_ENTRY(aes_xts_encrypt)
322 frame_push 6
323
324 mov x19, x0
325 mov x20, x1
326 mov x21, x2
327 mov x22, x3
328 mov x23, x4
329 mov x24, x6
330
331 ld1 {v4.16b}, [x24]
332 cbz w7, .Lxtsencnotfirst
333
334 enc_prepare w3, x5, x8
335 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
336 enc_switch_key w3, x2, x8
337 ldr q7, .Lxts_mul_x
338 b .LxtsencNx
339
340.Lxtsencrestart:
341 ld1 {v4.16b}, [x24]
342.Lxtsencnotfirst:
343 enc_prepare w22, x21, x8
344.LxtsencloopNx:
345 ldr q7, .Lxts_mul_x
346 next_tweak v4, v4, v7, v8
347.LxtsencNx:
348 subs w23, w23, #4
349 bmi .Lxtsenc1x
350 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
351 next_tweak v5, v4, v7, v8
352 eor v0.16b, v0.16b, v4.16b
353 next_tweak v6, v5, v7, v8
354 eor v1.16b, v1.16b, v5.16b
355 eor v2.16b, v2.16b, v6.16b
356 next_tweak v7, v6, v7, v8
357 eor v3.16b, v3.16b, v7.16b
358 bl aes_encrypt_block4x
359 eor v3.16b, v3.16b, v7.16b
360 eor v0.16b, v0.16b, v4.16b
361 eor v1.16b, v1.16b, v5.16b
362 eor v2.16b, v2.16b, v6.16b
363 st1 {v0.16b-v3.16b}, [x19], #64
364 mov v4.16b, v7.16b
365 cbz w23, .Lxtsencout
366 st1 {v4.16b}, [x24]
367 cond_yield_neon .Lxtsencrestart
368 b .LxtsencloopNx
369.Lxtsenc1x:
370 adds w23, w23, #4
371 beq .Lxtsencout
372.Lxtsencloop:
373 ld1 {v1.16b}, [x20], #16
374 eor v0.16b, v1.16b, v4.16b
375 encrypt_block v0, w22, x21, x8, w7
376 eor v0.16b, v0.16b, v4.16b
377 st1 {v0.16b}, [x19], #16
378 subs w23, w23, #1
379 beq .Lxtsencout
380 next_tweak v4, v4, v7, v8
381 b .Lxtsencloop
382.Lxtsencout:
383 st1 {v4.16b}, [x24]
384 frame_pop
385 ret
386AES_ENDPROC(aes_xts_encrypt)
387
388
389AES_ENTRY(aes_xts_decrypt)
390 frame_push 6
391
392 mov x19, x0
393 mov x20, x1
394 mov x21, x2
395 mov x22, x3
396 mov x23, x4
397 mov x24, x6
398
399 ld1 {v4.16b}, [x24]
400 cbz w7, .Lxtsdecnotfirst
401
402 enc_prepare w3, x5, x8
403 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
404 dec_prepare w3, x2, x8
405 ldr q7, .Lxts_mul_x
406 b .LxtsdecNx
407
408.Lxtsdecrestart:
409 ld1 {v4.16b}, [x24]
410.Lxtsdecnotfirst:
411 dec_prepare w22, x21, x8
412.LxtsdecloopNx:
413 ldr q7, .Lxts_mul_x
414 next_tweak v4, v4, v7, v8
415.LxtsdecNx:
416 subs w23, w23, #4
417 bmi .Lxtsdec1x
418 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
419 next_tweak v5, v4, v7, v8
420 eor v0.16b, v0.16b, v4.16b
421 next_tweak v6, v5, v7, v8
422 eor v1.16b, v1.16b, v5.16b
423 eor v2.16b, v2.16b, v6.16b
424 next_tweak v7, v6, v7, v8
425 eor v3.16b, v3.16b, v7.16b
426 bl aes_decrypt_block4x
427 eor v3.16b, v3.16b, v7.16b
428 eor v0.16b, v0.16b, v4.16b
429 eor v1.16b, v1.16b, v5.16b
430 eor v2.16b, v2.16b, v6.16b
431 st1 {v0.16b-v3.16b}, [x19], #64
432 mov v4.16b, v7.16b
433 cbz w23, .Lxtsdecout
434 st1 {v4.16b}, [x24]
435 cond_yield_neon .Lxtsdecrestart
436 b .LxtsdecloopNx
437.Lxtsdec1x:
438 adds w23, w23, #4
439 beq .Lxtsdecout
440.Lxtsdecloop:
441 ld1 {v1.16b}, [x20], #16
442 eor v0.16b, v1.16b, v4.16b
443 decrypt_block v0, w22, x21, x8, w7
444 eor v0.16b, v0.16b, v4.16b
445 st1 {v0.16b}, [x19], #16
446 subs w23, w23, #1
447 beq .Lxtsdecout
448 next_tweak v4, v4, v7, v8
449 b .Lxtsdecloop
450.Lxtsdecout:
451 st1 {v4.16b}, [x24]
452 frame_pop
453 ret
454AES_ENDPROC(aes_xts_decrypt)
455
456 /*
457 * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
458 * int blocks, u8 dg[], int enc_before, int enc_after)
459 */
460AES_ENTRY(aes_mac_update)
461 frame_push 6
462
463 mov x19, x0
464 mov x20, x1
465 mov x21, x2
466 mov x22, x3
467 mov x23, x4
468 mov x24, x6
469
470 ld1 {v0.16b}, [x23] /* get dg */
471 enc_prepare w2, x1, x7
472 cbz w5, .Lmacloop4x
473
474 encrypt_block v0, w2, x1, x7, w8
475
476.Lmacloop4x:
477 subs w22, w22, #4
478 bmi .Lmac1x
479 ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */
480 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
481 encrypt_block v0, w21, x20, x7, w8
482 eor v0.16b, v0.16b, v2.16b
483 encrypt_block v0, w21, x20, x7, w8
484 eor v0.16b, v0.16b, v3.16b
485 encrypt_block v0, w21, x20, x7, w8
486 eor v0.16b, v0.16b, v4.16b
487 cmp w22, wzr
488 csinv x5, x24, xzr, eq
489 cbz w5, .Lmacout
490 encrypt_block v0, w21, x20, x7, w8
491 st1 {v0.16b}, [x23] /* return dg */
492 cond_yield_neon .Lmacrestart
493 b .Lmacloop4x
494.Lmac1x:
495 add w22, w22, #4
496.Lmacloop:
497 cbz w22, .Lmacout
498 ld1 {v1.16b}, [x19], #16 /* get next pt block */
499 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
500
501 subs w22, w22, #1
502 csinv x5, x24, xzr, eq
503 cbz w5, .Lmacout
504
505.Lmacenc:
506 encrypt_block v0, w21, x20, x7, w8
507 b .Lmacloop
508
509.Lmacout:
510 st1 {v0.16b}, [x23] /* return dg */
511 frame_pop
512 ret
513
514.Lmacrestart:
515 ld1 {v0.16b}, [x23] /* get dg */
516 enc_prepare w21, x20, x0
517 b .Lmacloop4x
518AES_ENDPROC(aes_mac_update)