blob: 9c2724308b2b38b69923bae171c90ff9e87997f0 [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2From: Ard Biesheuvel <ardb@kernel.org>
3Date: Fri, 8 Nov 2019 13:22:14 +0100
4Subject: [PATCH] crypto: arm/chacha - remove dependency on generic ChaCha
5 driver
6
7commit b36d8c09e710c71f6a9690b6586fea2d1c9e1e27 upstream.
8
9Instead of falling back to the generic ChaCha skcipher driver for
10non-SIMD cases, use a fast scalar implementation for ARM authored
11by Eric Biggers. This removes the module dependency on chacha-generic
12altogether, which also simplifies things when we expose the ChaCha
13library interface from this module.
14
15Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
16Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
17Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
18---
19 arch/arm/crypto/Kconfig | 4 +-
20 arch/arm/crypto/Makefile | 3 +-
21 arch/arm/crypto/chacha-glue.c | 304 +++++++++++++++++++++++++++
22 arch/arm/crypto/chacha-neon-glue.c | 202 ------------------
23 arch/arm/crypto/chacha-scalar-core.S | 65 +++---
24 arch/arm64/crypto/chacha-neon-glue.c | 2 +-
25 6 files changed, 340 insertions(+), 240 deletions(-)
26 create mode 100644 arch/arm/crypto/chacha-glue.c
27 delete mode 100644 arch/arm/crypto/chacha-neon-glue.c
28
29--- a/arch/arm/crypto/Kconfig
30+++ b/arch/arm/crypto/Kconfig
31@@ -129,10 +129,8 @@ config CRYPTO_CRC32_ARM_CE
32 select CRYPTO_HASH
33
34 config CRYPTO_CHACHA20_NEON
35- tristate "NEON accelerated ChaCha stream cipher algorithms"
36- depends on KERNEL_MODE_NEON
37+ tristate "NEON and scalar accelerated ChaCha stream cipher algorithms"
38 select CRYPTO_BLKCIPHER
39- select CRYPTO_CHACHA20
40
41 config CRYPTO_NHPOLY1305_NEON
42 tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
43--- a/arch/arm/crypto/Makefile
44+++ b/arch/arm/crypto/Makefile
45@@ -33,7 +33,8 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glu
46 ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
47 crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
48 crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
49-chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
50+chacha-neon-y := chacha-scalar-core.o chacha-glue.o
51+chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o
52 nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
53
54 ifdef REGENERATE_ARM_CRYPTO
55--- /dev/null
56+++ b/arch/arm/crypto/chacha-glue.c
57@@ -0,0 +1,304 @@
58+// SPDX-License-Identifier: GPL-2.0
59+/*
60+ * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
61+ * including ChaCha20 (RFC7539)
62+ *
63+ * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
64+ * Copyright (C) 2015 Martin Willi
65+ */
66+
67+#include <crypto/algapi.h>
68+#include <crypto/internal/chacha.h>
69+#include <crypto/internal/simd.h>
70+#include <crypto/internal/skcipher.h>
71+#include <linux/kernel.h>
72+#include <linux/module.h>
73+
74+#include <asm/cputype.h>
75+#include <asm/hwcap.h>
76+#include <asm/neon.h>
77+#include <asm/simd.h>
78+
79+asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
80+ int nrounds);
81+asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
82+ int nrounds);
83+asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
84+asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
85+
86+asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
87+ const u32 *state, int nrounds);
88+
89+static inline bool neon_usable(void)
90+{
91+ return crypto_simd_usable();
92+}
93+
94+static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
95+ unsigned int bytes, int nrounds)
96+{
97+ u8 buf[CHACHA_BLOCK_SIZE];
98+
99+ while (bytes >= CHACHA_BLOCK_SIZE * 4) {
100+ chacha_4block_xor_neon(state, dst, src, nrounds);
101+ bytes -= CHACHA_BLOCK_SIZE * 4;
102+ src += CHACHA_BLOCK_SIZE * 4;
103+ dst += CHACHA_BLOCK_SIZE * 4;
104+ state[12] += 4;
105+ }
106+ while (bytes >= CHACHA_BLOCK_SIZE) {
107+ chacha_block_xor_neon(state, dst, src, nrounds);
108+ bytes -= CHACHA_BLOCK_SIZE;
109+ src += CHACHA_BLOCK_SIZE;
110+ dst += CHACHA_BLOCK_SIZE;
111+ state[12]++;
112+ }
113+ if (bytes) {
114+ memcpy(buf, src, bytes);
115+ chacha_block_xor_neon(state, buf, buf, nrounds);
116+ memcpy(dst, buf, bytes);
117+ }
118+}
119+
120+static int chacha_stream_xor(struct skcipher_request *req,
121+ const struct chacha_ctx *ctx, const u8 *iv,
122+ bool neon)
123+{
124+ struct skcipher_walk walk;
125+ u32 state[16];
126+ int err;
127+
128+ err = skcipher_walk_virt(&walk, req, false);
129+
130+ chacha_init_generic(state, ctx->key, iv);
131+
132+ while (walk.nbytes > 0) {
133+ unsigned int nbytes = walk.nbytes;
134+
135+ if (nbytes < walk.total)
136+ nbytes = round_down(nbytes, walk.stride);
137+
138+ if (!neon) {
139+ chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
140+ nbytes, state, ctx->nrounds);
141+ state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
142+ } else {
143+ kernel_neon_begin();
144+ chacha_doneon(state, walk.dst.virt.addr,
145+ walk.src.virt.addr, nbytes, ctx->nrounds);
146+ kernel_neon_end();
147+ }
148+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
149+ }
150+
151+ return err;
152+}
153+
154+static int do_chacha(struct skcipher_request *req, bool neon)
155+{
156+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
157+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
158+
159+ return chacha_stream_xor(req, ctx, req->iv, neon);
160+}
161+
162+static int chacha_arm(struct skcipher_request *req)
163+{
164+ return do_chacha(req, false);
165+}
166+
167+static int chacha_neon(struct skcipher_request *req)
168+{
169+ return do_chacha(req, neon_usable());
170+}
171+
172+static int do_xchacha(struct skcipher_request *req, bool neon)
173+{
174+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
175+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
176+ struct chacha_ctx subctx;
177+ u32 state[16];
178+ u8 real_iv[16];
179+
180+ chacha_init_generic(state, ctx->key, req->iv);
181+
182+ if (!neon) {
183+ hchacha_block_arm(state, subctx.key, ctx->nrounds);
184+ } else {
185+ kernel_neon_begin();
186+ hchacha_block_neon(state, subctx.key, ctx->nrounds);
187+ kernel_neon_end();
188+ }
189+ subctx.nrounds = ctx->nrounds;
190+
191+ memcpy(&real_iv[0], req->iv + 24, 8);
192+ memcpy(&real_iv[8], req->iv + 16, 8);
193+ return chacha_stream_xor(req, &subctx, real_iv, neon);
194+}
195+
196+static int xchacha_arm(struct skcipher_request *req)
197+{
198+ return do_xchacha(req, false);
199+}
200+
201+static int xchacha_neon(struct skcipher_request *req)
202+{
203+ return do_xchacha(req, neon_usable());
204+}
205+
206+static struct skcipher_alg arm_algs[] = {
207+ {
208+ .base.cra_name = "chacha20",
209+ .base.cra_driver_name = "chacha20-arm",
210+ .base.cra_priority = 200,
211+ .base.cra_blocksize = 1,
212+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
213+ .base.cra_module = THIS_MODULE,
214+
215+ .min_keysize = CHACHA_KEY_SIZE,
216+ .max_keysize = CHACHA_KEY_SIZE,
217+ .ivsize = CHACHA_IV_SIZE,
218+ .chunksize = CHACHA_BLOCK_SIZE,
219+ .setkey = chacha20_setkey,
220+ .encrypt = chacha_arm,
221+ .decrypt = chacha_arm,
222+ }, {
223+ .base.cra_name = "xchacha20",
224+ .base.cra_driver_name = "xchacha20-arm",
225+ .base.cra_priority = 200,
226+ .base.cra_blocksize = 1,
227+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
228+ .base.cra_module = THIS_MODULE,
229+
230+ .min_keysize = CHACHA_KEY_SIZE,
231+ .max_keysize = CHACHA_KEY_SIZE,
232+ .ivsize = XCHACHA_IV_SIZE,
233+ .chunksize = CHACHA_BLOCK_SIZE,
234+ .setkey = chacha20_setkey,
235+ .encrypt = xchacha_arm,
236+ .decrypt = xchacha_arm,
237+ }, {
238+ .base.cra_name = "xchacha12",
239+ .base.cra_driver_name = "xchacha12-arm",
240+ .base.cra_priority = 200,
241+ .base.cra_blocksize = 1,
242+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
243+ .base.cra_module = THIS_MODULE,
244+
245+ .min_keysize = CHACHA_KEY_SIZE,
246+ .max_keysize = CHACHA_KEY_SIZE,
247+ .ivsize = XCHACHA_IV_SIZE,
248+ .chunksize = CHACHA_BLOCK_SIZE,
249+ .setkey = chacha12_setkey,
250+ .encrypt = xchacha_arm,
251+ .decrypt = xchacha_arm,
252+ },
253+};
254+
255+static struct skcipher_alg neon_algs[] = {
256+ {
257+ .base.cra_name = "chacha20",
258+ .base.cra_driver_name = "chacha20-neon",
259+ .base.cra_priority = 300,
260+ .base.cra_blocksize = 1,
261+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
262+ .base.cra_module = THIS_MODULE,
263+
264+ .min_keysize = CHACHA_KEY_SIZE,
265+ .max_keysize = CHACHA_KEY_SIZE,
266+ .ivsize = CHACHA_IV_SIZE,
267+ .chunksize = CHACHA_BLOCK_SIZE,
268+ .walksize = 4 * CHACHA_BLOCK_SIZE,
269+ .setkey = chacha20_setkey,
270+ .encrypt = chacha_neon,
271+ .decrypt = chacha_neon,
272+ }, {
273+ .base.cra_name = "xchacha20",
274+ .base.cra_driver_name = "xchacha20-neon",
275+ .base.cra_priority = 300,
276+ .base.cra_blocksize = 1,
277+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
278+ .base.cra_module = THIS_MODULE,
279+
280+ .min_keysize = CHACHA_KEY_SIZE,
281+ .max_keysize = CHACHA_KEY_SIZE,
282+ .ivsize = XCHACHA_IV_SIZE,
283+ .chunksize = CHACHA_BLOCK_SIZE,
284+ .walksize = 4 * CHACHA_BLOCK_SIZE,
285+ .setkey = chacha20_setkey,
286+ .encrypt = xchacha_neon,
287+ .decrypt = xchacha_neon,
288+ }, {
289+ .base.cra_name = "xchacha12",
290+ .base.cra_driver_name = "xchacha12-neon",
291+ .base.cra_priority = 300,
292+ .base.cra_blocksize = 1,
293+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
294+ .base.cra_module = THIS_MODULE,
295+
296+ .min_keysize = CHACHA_KEY_SIZE,
297+ .max_keysize = CHACHA_KEY_SIZE,
298+ .ivsize = XCHACHA_IV_SIZE,
299+ .chunksize = CHACHA_BLOCK_SIZE,
300+ .walksize = 4 * CHACHA_BLOCK_SIZE,
301+ .setkey = chacha12_setkey,
302+ .encrypt = xchacha_neon,
303+ .decrypt = xchacha_neon,
304+ }
305+};
306+
307+static int __init chacha_simd_mod_init(void)
308+{
309+ int err;
310+
311+ err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
312+ if (err)
313+ return err;
314+
315+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
316+ int i;
317+
318+ switch (read_cpuid_part()) {
319+ case ARM_CPU_PART_CORTEX_A7:
320+ case ARM_CPU_PART_CORTEX_A5:
321+ /*
322+ * The Cortex-A7 and Cortex-A5 do not perform well with
323+ * the NEON implementation but do incredibly with the
324+ * scalar one and use less power.
325+ */
326+ for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
327+ neon_algs[i].base.cra_priority = 0;
328+ break;
329+ }
330+
331+ err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
332+ if (err)
333+ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
334+ }
335+ return err;
336+}
337+
338+static void __exit chacha_simd_mod_fini(void)
339+{
340+ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
341+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
342+ crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
343+}
344+
345+module_init(chacha_simd_mod_init);
346+module_exit(chacha_simd_mod_fini);
347+
348+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)");
349+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
350+MODULE_LICENSE("GPL v2");
351+MODULE_ALIAS_CRYPTO("chacha20");
352+MODULE_ALIAS_CRYPTO("chacha20-arm");
353+MODULE_ALIAS_CRYPTO("xchacha20");
354+MODULE_ALIAS_CRYPTO("xchacha20-arm");
355+MODULE_ALIAS_CRYPTO("xchacha12");
356+MODULE_ALIAS_CRYPTO("xchacha12-arm");
357+#ifdef CONFIG_KERNEL_MODE_NEON
358+MODULE_ALIAS_CRYPTO("chacha20-neon");
359+MODULE_ALIAS_CRYPTO("xchacha20-neon");
360+MODULE_ALIAS_CRYPTO("xchacha12-neon");
361+#endif
362--- a/arch/arm/crypto/chacha-neon-glue.c
363+++ /dev/null
364@@ -1,202 +0,0 @@
365-/*
366- * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
367- * including ChaCha20 (RFC7539)
368- *
369- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
370- *
371- * This program is free software; you can redistribute it and/or modify
372- * it under the terms of the GNU General Public License version 2 as
373- * published by the Free Software Foundation.
374- *
375- * Based on:
376- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
377- *
378- * Copyright (C) 2015 Martin Willi
379- *
380- * This program is free software; you can redistribute it and/or modify
381- * it under the terms of the GNU General Public License as published by
382- * the Free Software Foundation; either version 2 of the License, or
383- * (at your option) any later version.
384- */
385-
386-#include <crypto/algapi.h>
387-#include <crypto/internal/chacha.h>
388-#include <crypto/internal/simd.h>
389-#include <crypto/internal/skcipher.h>
390-#include <linux/kernel.h>
391-#include <linux/module.h>
392-
393-#include <asm/hwcap.h>
394-#include <asm/neon.h>
395-#include <asm/simd.h>
396-
397-asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
398- int nrounds);
399-asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
400- int nrounds);
401-asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
402-
403-static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
404- unsigned int bytes, int nrounds)
405-{
406- u8 buf[CHACHA_BLOCK_SIZE];
407-
408- while (bytes >= CHACHA_BLOCK_SIZE * 4) {
409- chacha_4block_xor_neon(state, dst, src, nrounds);
410- bytes -= CHACHA_BLOCK_SIZE * 4;
411- src += CHACHA_BLOCK_SIZE * 4;
412- dst += CHACHA_BLOCK_SIZE * 4;
413- state[12] += 4;
414- }
415- while (bytes >= CHACHA_BLOCK_SIZE) {
416- chacha_block_xor_neon(state, dst, src, nrounds);
417- bytes -= CHACHA_BLOCK_SIZE;
418- src += CHACHA_BLOCK_SIZE;
419- dst += CHACHA_BLOCK_SIZE;
420- state[12]++;
421- }
422- if (bytes) {
423- memcpy(buf, src, bytes);
424- chacha_block_xor_neon(state, buf, buf, nrounds);
425- memcpy(dst, buf, bytes);
426- }
427-}
428-
429-static int chacha_neon_stream_xor(struct skcipher_request *req,
430- const struct chacha_ctx *ctx, const u8 *iv)
431-{
432- struct skcipher_walk walk;
433- u32 state[16];
434- int err;
435-
436- err = skcipher_walk_virt(&walk, req, false);
437-
438- crypto_chacha_init(state, ctx, iv);
439-
440- while (walk.nbytes > 0) {
441- unsigned int nbytes = walk.nbytes;
442-
443- if (nbytes < walk.total)
444- nbytes = round_down(nbytes, walk.stride);
445-
446- kernel_neon_begin();
447- chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
448- nbytes, ctx->nrounds);
449- kernel_neon_end();
450- err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
451- }
452-
453- return err;
454-}
455-
456-static int chacha_neon(struct skcipher_request *req)
457-{
458- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
459- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
460-
461- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
462- return crypto_chacha_crypt(req);
463-
464- return chacha_neon_stream_xor(req, ctx, req->iv);
465-}
466-
467-static int xchacha_neon(struct skcipher_request *req)
468-{
469- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
470- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
471- struct chacha_ctx subctx;
472- u32 state[16];
473- u8 real_iv[16];
474-
475- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
476- return crypto_xchacha_crypt(req);
477-
478- crypto_chacha_init(state, ctx, req->iv);
479-
480- kernel_neon_begin();
481- hchacha_block_neon(state, subctx.key, ctx->nrounds);
482- kernel_neon_end();
483- subctx.nrounds = ctx->nrounds;
484-
485- memcpy(&real_iv[0], req->iv + 24, 8);
486- memcpy(&real_iv[8], req->iv + 16, 8);
487- return chacha_neon_stream_xor(req, &subctx, real_iv);
488-}
489-
490-static struct skcipher_alg algs[] = {
491- {
492- .base.cra_name = "chacha20",
493- .base.cra_driver_name = "chacha20-neon",
494- .base.cra_priority = 300,
495- .base.cra_blocksize = 1,
496- .base.cra_ctxsize = sizeof(struct chacha_ctx),
497- .base.cra_module = THIS_MODULE,
498-
499- .min_keysize = CHACHA_KEY_SIZE,
500- .max_keysize = CHACHA_KEY_SIZE,
501- .ivsize = CHACHA_IV_SIZE,
502- .chunksize = CHACHA_BLOCK_SIZE,
503- .walksize = 4 * CHACHA_BLOCK_SIZE,
504- .setkey = crypto_chacha20_setkey,
505- .encrypt = chacha_neon,
506- .decrypt = chacha_neon,
507- }, {
508- .base.cra_name = "xchacha20",
509- .base.cra_driver_name = "xchacha20-neon",
510- .base.cra_priority = 300,
511- .base.cra_blocksize = 1,
512- .base.cra_ctxsize = sizeof(struct chacha_ctx),
513- .base.cra_module = THIS_MODULE,
514-
515- .min_keysize = CHACHA_KEY_SIZE,
516- .max_keysize = CHACHA_KEY_SIZE,
517- .ivsize = XCHACHA_IV_SIZE,
518- .chunksize = CHACHA_BLOCK_SIZE,
519- .walksize = 4 * CHACHA_BLOCK_SIZE,
520- .setkey = crypto_chacha20_setkey,
521- .encrypt = xchacha_neon,
522- .decrypt = xchacha_neon,
523- }, {
524- .base.cra_name = "xchacha12",
525- .base.cra_driver_name = "xchacha12-neon",
526- .base.cra_priority = 300,
527- .base.cra_blocksize = 1,
528- .base.cra_ctxsize = sizeof(struct chacha_ctx),
529- .base.cra_module = THIS_MODULE,
530-
531- .min_keysize = CHACHA_KEY_SIZE,
532- .max_keysize = CHACHA_KEY_SIZE,
533- .ivsize = XCHACHA_IV_SIZE,
534- .chunksize = CHACHA_BLOCK_SIZE,
535- .walksize = 4 * CHACHA_BLOCK_SIZE,
536- .setkey = crypto_chacha12_setkey,
537- .encrypt = xchacha_neon,
538- .decrypt = xchacha_neon,
539- }
540-};
541-
542-static int __init chacha_simd_mod_init(void)
543-{
544- if (!(elf_hwcap & HWCAP_NEON))
545- return -ENODEV;
546-
547- return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
548-}
549-
550-static void __exit chacha_simd_mod_fini(void)
551-{
552- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
553-}
554-
555-module_init(chacha_simd_mod_init);
556-module_exit(chacha_simd_mod_fini);
557-
558-MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
559-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
560-MODULE_LICENSE("GPL v2");
561-MODULE_ALIAS_CRYPTO("chacha20");
562-MODULE_ALIAS_CRYPTO("chacha20-neon");
563-MODULE_ALIAS_CRYPTO("xchacha20");
564-MODULE_ALIAS_CRYPTO("xchacha20-neon");
565-MODULE_ALIAS_CRYPTO("xchacha12");
566-MODULE_ALIAS_CRYPTO("xchacha12-neon");
567--- a/arch/arm/crypto/chacha-scalar-core.S
568+++ b/arch/arm/crypto/chacha-scalar-core.S
569@@ -41,14 +41,6 @@
570 X14 .req r12
571 X15 .req r14
572
573-.Lexpand_32byte_k:
574- // "expand 32-byte k"
575- .word 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
576-
577-#ifdef __thumb2__
578-# define adrl adr
579-#endif
580-
581 .macro __rev out, in, t0, t1, t2
582 .if __LINUX_ARM_ARCH__ >= 6
583 rev \out, \in
584@@ -391,61 +383,65 @@
585 .endm // _chacha
586
587 /*
588- * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8],
589- * const u32 iv[4]);
590+ * void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
591+ * const u32 *state, int nrounds);
592 */
593-ENTRY(chacha20_arm)
594+ENTRY(chacha_doarm)
595 cmp r2, #0 // len == 0?
596 reteq lr
597
598+ ldr ip, [sp]
599+ cmp ip, #12
600+
601 push {r0-r2,r4-r11,lr}
602
603 // Push state x0-x15 onto stack.
604 // Also store an extra copy of x10-x11 just before the state.
605
606- ldr r4, [sp, #48] // iv
607- mov r0, sp
608- sub sp, #80
609-
610- // iv: x12-x15
611- ldm r4, {X12,X13,X14,X15}
612- stmdb r0!, {X12,X13,X14,X15}
613+ add X12, r3, #48
614+ ldm X12, {X12,X13,X14,X15}
615+ push {X12,X13,X14,X15}
616+ sub sp, sp, #64
617
618- // key: x4-x11
619- __ldrd X8_X10, X9_X11, r3, 24
620+ __ldrd X8_X10, X9_X11, r3, 40
621 __strd X8_X10, X9_X11, sp, 8
622- stmdb r0!, {X8_X10, X9_X11}
623- ldm r3, {X4-X9_X11}
624- stmdb r0!, {X4-X9_X11}
625-
626- // constants: x0-x3
627- adrl X3, .Lexpand_32byte_k
628- ldm X3, {X0-X3}
629+ __strd X8_X10, X9_X11, sp, 56
630+ ldm r3, {X0-X9_X11}
631 __strd X0, X1, sp, 16
632 __strd X2, X3, sp, 24
633+ __strd X4, X5, sp, 32
634+ __strd X6, X7, sp, 40
635+ __strd X8_X10, X9_X11, sp, 48
636
637+ beq 1f
638 _chacha 20
639
640- add sp, #76
641+0: add sp, #76
642 pop {r4-r11, pc}
643-ENDPROC(chacha20_arm)
644+
645+1: _chacha 12
646+ b 0b
647+ENDPROC(chacha_doarm)
648
649 /*
650- * void hchacha20_arm(const u32 state[16], u32 out[8]);
651+ * void hchacha_block_arm(const u32 state[16], u32 out[8], int nrounds);
652 */
653-ENTRY(hchacha20_arm)
654+ENTRY(hchacha_block_arm)
655 push {r1,r4-r11,lr}
656
657+ cmp r2, #12 // ChaCha12 ?
658+
659 mov r14, r0
660 ldmia r14!, {r0-r11} // load x0-x11
661 push {r10-r11} // store x10-x11 to stack
662 ldm r14, {r10-r12,r14} // load x12-x15
663 sub sp, #8
664
665+ beq 1f
666 _chacha_permute 20
667
668 // Skip over (unused0-unused1, x10-x11)
669- add sp, #16
670+0: add sp, #16
671
672 // Fix up rotations of x12-x15
673 ror X12, X12, #drot
674@@ -458,4 +454,7 @@ ENTRY(hchacha20_arm)
675 stm r4, {X0,X1,X2,X3,X12,X13,X14,X15}
676
677 pop {r4-r11,pc}
678-ENDPROC(hchacha20_arm)
679+
680+1: _chacha_permute 12
681+ b 0b
682+ENDPROC(hchacha_block_arm)
683--- a/arch/arm64/crypto/chacha-neon-glue.c
684+++ b/arch/arm64/crypto/chacha-neon-glue.c
685@@ -1,5 +1,5 @@
686 /*
687- * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
688+ * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers,
689 * including ChaCha20 (RFC7539)
690 *
691 * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>