b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame] | 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
| 2 | From: Ard Biesheuvel <ardb@kernel.org> |
| 3 | Date: Fri, 8 Nov 2019 13:22:14 +0100 |
| 4 | Subject: [PATCH] crypto: arm/chacha - remove dependency on generic ChaCha |
| 5 | driver |
| 6 | |
| 7 | commit b36d8c09e710c71f6a9690b6586fea2d1c9e1e27 upstream. |
| 8 | |
| 9 | Instead of falling back to the generic ChaCha skcipher driver for |
| 10 | non-SIMD cases, use a fast scalar implementation for ARM authored |
| 11 | by Eric Biggers. This removes the module dependency on chacha-generic |
| 12 | altogether, which also simplifies things when we expose the ChaCha |
| 13 | library interface from this module. |
| 14 | |
| 15 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| 16 | Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| 17 | Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| 18 | --- |
| 19 | arch/arm/crypto/Kconfig | 4 +- |
| 20 | arch/arm/crypto/Makefile | 3 +- |
| 21 | arch/arm/crypto/chacha-glue.c | 304 +++++++++++++++++++++++++++ |
| 22 | arch/arm/crypto/chacha-neon-glue.c | 202 ------------------ |
| 23 | arch/arm/crypto/chacha-scalar-core.S | 65 +++--- |
| 24 | arch/arm64/crypto/chacha-neon-glue.c | 2 +- |
| 25 | 6 files changed, 340 insertions(+), 240 deletions(-) |
| 26 | create mode 100644 arch/arm/crypto/chacha-glue.c |
| 27 | delete mode 100644 arch/arm/crypto/chacha-neon-glue.c |
| 28 | |
| 29 | --- a/arch/arm/crypto/Kconfig |
| 30 | +++ b/arch/arm/crypto/Kconfig |
| 31 | @@ -129,10 +129,8 @@ config CRYPTO_CRC32_ARM_CE |
| 32 | select CRYPTO_HASH |
| 33 | |
| 34 | config CRYPTO_CHACHA20_NEON |
| 35 | - tristate "NEON accelerated ChaCha stream cipher algorithms" |
| 36 | - depends on KERNEL_MODE_NEON |
| 37 | + tristate "NEON and scalar accelerated ChaCha stream cipher algorithms" |
| 38 | select CRYPTO_BLKCIPHER |
| 39 | - select CRYPTO_CHACHA20 |
| 40 | |
| 41 | config CRYPTO_NHPOLY1305_NEON |
| 42 | tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)" |
| 43 | --- a/arch/arm/crypto/Makefile |
| 44 | +++ b/arch/arm/crypto/Makefile |
| 45 | @@ -33,7 +33,8 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glu |
| 46 | ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o |
| 47 | crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o |
| 48 | crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o |
| 49 | -chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o |
| 50 | +chacha-neon-y := chacha-scalar-core.o chacha-glue.o |
| 51 | +chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o |
| 52 | nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o |
| 53 | |
| 54 | ifdef REGENERATE_ARM_CRYPTO |
| 55 | --- /dev/null |
| 56 | +++ b/arch/arm/crypto/chacha-glue.c |
| 57 | @@ -0,0 +1,304 @@ |
| 58 | +// SPDX-License-Identifier: GPL-2.0 |
| 59 | +/* |
| 60 | + * ARM NEON accelerated ChaCha and XChaCha stream ciphers, |
| 61 | + * including ChaCha20 (RFC7539) |
| 62 | + * |
| 63 | + * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> |
| 64 | + * Copyright (C) 2015 Martin Willi |
| 65 | + */ |
| 66 | + |
| 67 | +#include <crypto/algapi.h> |
| 68 | +#include <crypto/internal/chacha.h> |
| 69 | +#include <crypto/internal/simd.h> |
| 70 | +#include <crypto/internal/skcipher.h> |
| 71 | +#include <linux/kernel.h> |
| 72 | +#include <linux/module.h> |
| 73 | + |
| 74 | +#include <asm/cputype.h> |
| 75 | +#include <asm/hwcap.h> |
| 76 | +#include <asm/neon.h> |
| 77 | +#include <asm/simd.h> |
| 78 | + |
| 79 | +asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, |
| 80 | + int nrounds); |
| 81 | +asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, |
| 82 | + int nrounds); |
| 83 | +asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds); |
| 84 | +asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); |
| 85 | + |
| 86 | +asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, |
| 87 | + const u32 *state, int nrounds); |
| 88 | + |
| 89 | +static inline bool neon_usable(void) |
| 90 | +{ |
| 91 | + return crypto_simd_usable(); |
| 92 | +} |
| 93 | + |
| 94 | +static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, |
| 95 | + unsigned int bytes, int nrounds) |
| 96 | +{ |
| 97 | + u8 buf[CHACHA_BLOCK_SIZE]; |
| 98 | + |
| 99 | + while (bytes >= CHACHA_BLOCK_SIZE * 4) { |
| 100 | + chacha_4block_xor_neon(state, dst, src, nrounds); |
| 101 | + bytes -= CHACHA_BLOCK_SIZE * 4; |
| 102 | + src += CHACHA_BLOCK_SIZE * 4; |
| 103 | + dst += CHACHA_BLOCK_SIZE * 4; |
| 104 | + state[12] += 4; |
| 105 | + } |
| 106 | + while (bytes >= CHACHA_BLOCK_SIZE) { |
| 107 | + chacha_block_xor_neon(state, dst, src, nrounds); |
| 108 | + bytes -= CHACHA_BLOCK_SIZE; |
| 109 | + src += CHACHA_BLOCK_SIZE; |
| 110 | + dst += CHACHA_BLOCK_SIZE; |
| 111 | + state[12]++; |
| 112 | + } |
| 113 | + if (bytes) { |
| 114 | + memcpy(buf, src, bytes); |
| 115 | + chacha_block_xor_neon(state, buf, buf, nrounds); |
| 116 | + memcpy(dst, buf, bytes); |
| 117 | + } |
| 118 | +} |
| 119 | + |
| 120 | +static int chacha_stream_xor(struct skcipher_request *req, |
| 121 | + const struct chacha_ctx *ctx, const u8 *iv, |
| 122 | + bool neon) |
| 123 | +{ |
| 124 | + struct skcipher_walk walk; |
| 125 | + u32 state[16]; |
| 126 | + int err; |
| 127 | + |
| 128 | + err = skcipher_walk_virt(&walk, req, false); |
| 129 | + |
| 130 | + chacha_init_generic(state, ctx->key, iv); |
| 131 | + |
| 132 | + while (walk.nbytes > 0) { |
| 133 | + unsigned int nbytes = walk.nbytes; |
| 134 | + |
| 135 | + if (nbytes < walk.total) |
| 136 | + nbytes = round_down(nbytes, walk.stride); |
| 137 | + |
| 138 | + if (!neon) { |
| 139 | + chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr, |
| 140 | + nbytes, state, ctx->nrounds); |
| 141 | + state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE); |
| 142 | + } else { |
| 143 | + kernel_neon_begin(); |
| 144 | + chacha_doneon(state, walk.dst.virt.addr, |
| 145 | + walk.src.virt.addr, nbytes, ctx->nrounds); |
| 146 | + kernel_neon_end(); |
| 147 | + } |
| 148 | + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); |
| 149 | + } |
| 150 | + |
| 151 | + return err; |
| 152 | +} |
| 153 | + |
| 154 | +static int do_chacha(struct skcipher_request *req, bool neon) |
| 155 | +{ |
| 156 | + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| 157 | + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
| 158 | + |
| 159 | + return chacha_stream_xor(req, ctx, req->iv, neon); |
| 160 | +} |
| 161 | + |
| 162 | +static int chacha_arm(struct skcipher_request *req) |
| 163 | +{ |
| 164 | + return do_chacha(req, false); |
| 165 | +} |
| 166 | + |
| 167 | +static int chacha_neon(struct skcipher_request *req) |
| 168 | +{ |
| 169 | + return do_chacha(req, neon_usable()); |
| 170 | +} |
| 171 | + |
| 172 | +static int do_xchacha(struct skcipher_request *req, bool neon) |
| 173 | +{ |
| 174 | + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| 175 | + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
| 176 | + struct chacha_ctx subctx; |
| 177 | + u32 state[16]; |
| 178 | + u8 real_iv[16]; |
| 179 | + |
| 180 | + chacha_init_generic(state, ctx->key, req->iv); |
| 181 | + |
| 182 | + if (!neon) { |
| 183 | + hchacha_block_arm(state, subctx.key, ctx->nrounds); |
| 184 | + } else { |
| 185 | + kernel_neon_begin(); |
| 186 | + hchacha_block_neon(state, subctx.key, ctx->nrounds); |
| 187 | + kernel_neon_end(); |
| 188 | + } |
| 189 | + subctx.nrounds = ctx->nrounds; |
| 190 | + |
| 191 | + memcpy(&real_iv[0], req->iv + 24, 8); |
| 192 | + memcpy(&real_iv[8], req->iv + 16, 8); |
| 193 | + return chacha_stream_xor(req, &subctx, real_iv, neon); |
| 194 | +} |
| 195 | + |
| 196 | +static int xchacha_arm(struct skcipher_request *req) |
| 197 | +{ |
| 198 | + return do_xchacha(req, false); |
| 199 | +} |
| 200 | + |
| 201 | +static int xchacha_neon(struct skcipher_request *req) |
| 202 | +{ |
| 203 | + return do_xchacha(req, neon_usable()); |
| 204 | +} |
| 205 | + |
| 206 | +static struct skcipher_alg arm_algs[] = { |
| 207 | + { |
| 208 | + .base.cra_name = "chacha20", |
| 209 | + .base.cra_driver_name = "chacha20-arm", |
| 210 | + .base.cra_priority = 200, |
| 211 | + .base.cra_blocksize = 1, |
| 212 | + .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 213 | + .base.cra_module = THIS_MODULE, |
| 214 | + |
| 215 | + .min_keysize = CHACHA_KEY_SIZE, |
| 216 | + .max_keysize = CHACHA_KEY_SIZE, |
| 217 | + .ivsize = CHACHA_IV_SIZE, |
| 218 | + .chunksize = CHACHA_BLOCK_SIZE, |
| 219 | + .setkey = chacha20_setkey, |
| 220 | + .encrypt = chacha_arm, |
| 221 | + .decrypt = chacha_arm, |
| 222 | + }, { |
| 223 | + .base.cra_name = "xchacha20", |
| 224 | + .base.cra_driver_name = "xchacha20-arm", |
| 225 | + .base.cra_priority = 200, |
| 226 | + .base.cra_blocksize = 1, |
| 227 | + .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 228 | + .base.cra_module = THIS_MODULE, |
| 229 | + |
| 230 | + .min_keysize = CHACHA_KEY_SIZE, |
| 231 | + .max_keysize = CHACHA_KEY_SIZE, |
| 232 | + .ivsize = XCHACHA_IV_SIZE, |
| 233 | + .chunksize = CHACHA_BLOCK_SIZE, |
| 234 | + .setkey = chacha20_setkey, |
| 235 | + .encrypt = xchacha_arm, |
| 236 | + .decrypt = xchacha_arm, |
| 237 | + }, { |
| 238 | + .base.cra_name = "xchacha12", |
| 239 | + .base.cra_driver_name = "xchacha12-arm", |
| 240 | + .base.cra_priority = 200, |
| 241 | + .base.cra_blocksize = 1, |
| 242 | + .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 243 | + .base.cra_module = THIS_MODULE, |
| 244 | + |
| 245 | + .min_keysize = CHACHA_KEY_SIZE, |
| 246 | + .max_keysize = CHACHA_KEY_SIZE, |
| 247 | + .ivsize = XCHACHA_IV_SIZE, |
| 248 | + .chunksize = CHACHA_BLOCK_SIZE, |
| 249 | + .setkey = chacha12_setkey, |
| 250 | + .encrypt = xchacha_arm, |
| 251 | + .decrypt = xchacha_arm, |
| 252 | + }, |
| 253 | +}; |
| 254 | + |
| 255 | +static struct skcipher_alg neon_algs[] = { |
| 256 | + { |
| 257 | + .base.cra_name = "chacha20", |
| 258 | + .base.cra_driver_name = "chacha20-neon", |
| 259 | + .base.cra_priority = 300, |
| 260 | + .base.cra_blocksize = 1, |
| 261 | + .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 262 | + .base.cra_module = THIS_MODULE, |
| 263 | + |
| 264 | + .min_keysize = CHACHA_KEY_SIZE, |
| 265 | + .max_keysize = CHACHA_KEY_SIZE, |
| 266 | + .ivsize = CHACHA_IV_SIZE, |
| 267 | + .chunksize = CHACHA_BLOCK_SIZE, |
| 268 | + .walksize = 4 * CHACHA_BLOCK_SIZE, |
| 269 | + .setkey = chacha20_setkey, |
| 270 | + .encrypt = chacha_neon, |
| 271 | + .decrypt = chacha_neon, |
| 272 | + }, { |
| 273 | + .base.cra_name = "xchacha20", |
| 274 | + .base.cra_driver_name = "xchacha20-neon", |
| 275 | + .base.cra_priority = 300, |
| 276 | + .base.cra_blocksize = 1, |
| 277 | + .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 278 | + .base.cra_module = THIS_MODULE, |
| 279 | + |
| 280 | + .min_keysize = CHACHA_KEY_SIZE, |
| 281 | + .max_keysize = CHACHA_KEY_SIZE, |
| 282 | + .ivsize = XCHACHA_IV_SIZE, |
| 283 | + .chunksize = CHACHA_BLOCK_SIZE, |
| 284 | + .walksize = 4 * CHACHA_BLOCK_SIZE, |
| 285 | + .setkey = chacha20_setkey, |
| 286 | + .encrypt = xchacha_neon, |
| 287 | + .decrypt = xchacha_neon, |
| 288 | + }, { |
| 289 | + .base.cra_name = "xchacha12", |
| 290 | + .base.cra_driver_name = "xchacha12-neon", |
| 291 | + .base.cra_priority = 300, |
| 292 | + .base.cra_blocksize = 1, |
| 293 | + .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 294 | + .base.cra_module = THIS_MODULE, |
| 295 | + |
| 296 | + .min_keysize = CHACHA_KEY_SIZE, |
| 297 | + .max_keysize = CHACHA_KEY_SIZE, |
| 298 | + .ivsize = XCHACHA_IV_SIZE, |
| 299 | + .chunksize = CHACHA_BLOCK_SIZE, |
| 300 | + .walksize = 4 * CHACHA_BLOCK_SIZE, |
| 301 | + .setkey = chacha12_setkey, |
| 302 | + .encrypt = xchacha_neon, |
| 303 | + .decrypt = xchacha_neon, |
| 304 | + } |
| 305 | +}; |
| 306 | + |
| 307 | +static int __init chacha_simd_mod_init(void) |
| 308 | +{ |
| 309 | + int err; |
| 310 | + |
| 311 | + err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); |
| 312 | + if (err) |
| 313 | + return err; |
| 314 | + |
| 315 | + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { |
| 316 | + int i; |
| 317 | + |
| 318 | + switch (read_cpuid_part()) { |
| 319 | + case ARM_CPU_PART_CORTEX_A7: |
| 320 | + case ARM_CPU_PART_CORTEX_A5: |
| 321 | + /* |
| 322 | + * The Cortex-A7 and Cortex-A5 do not perform well with |
| 323 | + * the NEON implementation but do incredibly with the |
| 324 | + * scalar one and use less power. |
| 325 | + */ |
| 326 | + for (i = 0; i < ARRAY_SIZE(neon_algs); i++) |
| 327 | + neon_algs[i].base.cra_priority = 0; |
| 328 | + break; |
| 329 | + } |
| 330 | + |
| 331 | + err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); |
| 332 | + if (err) |
| 333 | + crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); |
| 334 | + } |
| 335 | + return err; |
| 336 | +} |
| 337 | + |
| 338 | +static void __exit chacha_simd_mod_fini(void) |
| 339 | +{ |
| 340 | + crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); |
| 341 | + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) |
| 342 | + crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); |
| 343 | +} |
| 344 | + |
| 345 | +module_init(chacha_simd_mod_init); |
| 346 | +module_exit(chacha_simd_mod_fini); |
| 347 | + |
| 348 | +MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)"); |
| 349 | +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); |
| 350 | +MODULE_LICENSE("GPL v2"); |
| 351 | +MODULE_ALIAS_CRYPTO("chacha20"); |
| 352 | +MODULE_ALIAS_CRYPTO("chacha20-arm"); |
| 353 | +MODULE_ALIAS_CRYPTO("xchacha20"); |
| 354 | +MODULE_ALIAS_CRYPTO("xchacha20-arm"); |
| 355 | +MODULE_ALIAS_CRYPTO("xchacha12"); |
| 356 | +MODULE_ALIAS_CRYPTO("xchacha12-arm"); |
| 357 | +#ifdef CONFIG_KERNEL_MODE_NEON |
| 358 | +MODULE_ALIAS_CRYPTO("chacha20-neon"); |
| 359 | +MODULE_ALIAS_CRYPTO("xchacha20-neon"); |
| 360 | +MODULE_ALIAS_CRYPTO("xchacha12-neon"); |
| 361 | +#endif |
| 362 | --- a/arch/arm/crypto/chacha-neon-glue.c |
| 363 | +++ /dev/null |
| 364 | @@ -1,202 +0,0 @@ |
| 365 | -/* |
| 366 | - * ARM NEON accelerated ChaCha and XChaCha stream ciphers, |
| 367 | - * including ChaCha20 (RFC7539) |
| 368 | - * |
| 369 | - * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org> |
| 370 | - * |
| 371 | - * This program is free software; you can redistribute it and/or modify |
| 372 | - * it under the terms of the GNU General Public License version 2 as |
| 373 | - * published by the Free Software Foundation. |
| 374 | - * |
| 375 | - * Based on: |
| 376 | - * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code |
| 377 | - * |
| 378 | - * Copyright (C) 2015 Martin Willi |
| 379 | - * |
| 380 | - * This program is free software; you can redistribute it and/or modify |
| 381 | - * it under the terms of the GNU General Public License as published by |
| 382 | - * the Free Software Foundation; either version 2 of the License, or |
| 383 | - * (at your option) any later version. |
| 384 | - */ |
| 385 | - |
| 386 | -#include <crypto/algapi.h> |
| 387 | -#include <crypto/internal/chacha.h> |
| 388 | -#include <crypto/internal/simd.h> |
| 389 | -#include <crypto/internal/skcipher.h> |
| 390 | -#include <linux/kernel.h> |
| 391 | -#include <linux/module.h> |
| 392 | - |
| 393 | -#include <asm/hwcap.h> |
| 394 | -#include <asm/neon.h> |
| 395 | -#include <asm/simd.h> |
| 396 | - |
| 397 | -asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, |
| 398 | - int nrounds); |
| 399 | -asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, |
| 400 | - int nrounds); |
| 401 | -asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); |
| 402 | - |
| 403 | -static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, |
| 404 | - unsigned int bytes, int nrounds) |
| 405 | -{ |
| 406 | - u8 buf[CHACHA_BLOCK_SIZE]; |
| 407 | - |
| 408 | - while (bytes >= CHACHA_BLOCK_SIZE * 4) { |
| 409 | - chacha_4block_xor_neon(state, dst, src, nrounds); |
| 410 | - bytes -= CHACHA_BLOCK_SIZE * 4; |
| 411 | - src += CHACHA_BLOCK_SIZE * 4; |
| 412 | - dst += CHACHA_BLOCK_SIZE * 4; |
| 413 | - state[12] += 4; |
| 414 | - } |
| 415 | - while (bytes >= CHACHA_BLOCK_SIZE) { |
| 416 | - chacha_block_xor_neon(state, dst, src, nrounds); |
| 417 | - bytes -= CHACHA_BLOCK_SIZE; |
| 418 | - src += CHACHA_BLOCK_SIZE; |
| 419 | - dst += CHACHA_BLOCK_SIZE; |
| 420 | - state[12]++; |
| 421 | - } |
| 422 | - if (bytes) { |
| 423 | - memcpy(buf, src, bytes); |
| 424 | - chacha_block_xor_neon(state, buf, buf, nrounds); |
| 425 | - memcpy(dst, buf, bytes); |
| 426 | - } |
| 427 | -} |
| 428 | - |
| 429 | -static int chacha_neon_stream_xor(struct skcipher_request *req, |
| 430 | - const struct chacha_ctx *ctx, const u8 *iv) |
| 431 | -{ |
| 432 | - struct skcipher_walk walk; |
| 433 | - u32 state[16]; |
| 434 | - int err; |
| 435 | - |
| 436 | - err = skcipher_walk_virt(&walk, req, false); |
| 437 | - |
| 438 | - crypto_chacha_init(state, ctx, iv); |
| 439 | - |
| 440 | - while (walk.nbytes > 0) { |
| 441 | - unsigned int nbytes = walk.nbytes; |
| 442 | - |
| 443 | - if (nbytes < walk.total) |
| 444 | - nbytes = round_down(nbytes, walk.stride); |
| 445 | - |
| 446 | - kernel_neon_begin(); |
| 447 | - chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, |
| 448 | - nbytes, ctx->nrounds); |
| 449 | - kernel_neon_end(); |
| 450 | - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); |
| 451 | - } |
| 452 | - |
| 453 | - return err; |
| 454 | -} |
| 455 | - |
| 456 | -static int chacha_neon(struct skcipher_request *req) |
| 457 | -{ |
| 458 | - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| 459 | - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
| 460 | - |
| 461 | - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) |
| 462 | - return crypto_chacha_crypt(req); |
| 463 | - |
| 464 | - return chacha_neon_stream_xor(req, ctx, req->iv); |
| 465 | -} |
| 466 | - |
| 467 | -static int xchacha_neon(struct skcipher_request *req) |
| 468 | -{ |
| 469 | - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| 470 | - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
| 471 | - struct chacha_ctx subctx; |
| 472 | - u32 state[16]; |
| 473 | - u8 real_iv[16]; |
| 474 | - |
| 475 | - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) |
| 476 | - return crypto_xchacha_crypt(req); |
| 477 | - |
| 478 | - crypto_chacha_init(state, ctx, req->iv); |
| 479 | - |
| 480 | - kernel_neon_begin(); |
| 481 | - hchacha_block_neon(state, subctx.key, ctx->nrounds); |
| 482 | - kernel_neon_end(); |
| 483 | - subctx.nrounds = ctx->nrounds; |
| 484 | - |
| 485 | - memcpy(&real_iv[0], req->iv + 24, 8); |
| 486 | - memcpy(&real_iv[8], req->iv + 16, 8); |
| 487 | - return chacha_neon_stream_xor(req, &subctx, real_iv); |
| 488 | -} |
| 489 | - |
| 490 | -static struct skcipher_alg algs[] = { |
| 491 | - { |
| 492 | - .base.cra_name = "chacha20", |
| 493 | - .base.cra_driver_name = "chacha20-neon", |
| 494 | - .base.cra_priority = 300, |
| 495 | - .base.cra_blocksize = 1, |
| 496 | - .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 497 | - .base.cra_module = THIS_MODULE, |
| 498 | - |
| 499 | - .min_keysize = CHACHA_KEY_SIZE, |
| 500 | - .max_keysize = CHACHA_KEY_SIZE, |
| 501 | - .ivsize = CHACHA_IV_SIZE, |
| 502 | - .chunksize = CHACHA_BLOCK_SIZE, |
| 503 | - .walksize = 4 * CHACHA_BLOCK_SIZE, |
| 504 | - .setkey = crypto_chacha20_setkey, |
| 505 | - .encrypt = chacha_neon, |
| 506 | - .decrypt = chacha_neon, |
| 507 | - }, { |
| 508 | - .base.cra_name = "xchacha20", |
| 509 | - .base.cra_driver_name = "xchacha20-neon", |
| 510 | - .base.cra_priority = 300, |
| 511 | - .base.cra_blocksize = 1, |
| 512 | - .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 513 | - .base.cra_module = THIS_MODULE, |
| 514 | - |
| 515 | - .min_keysize = CHACHA_KEY_SIZE, |
| 516 | - .max_keysize = CHACHA_KEY_SIZE, |
| 517 | - .ivsize = XCHACHA_IV_SIZE, |
| 518 | - .chunksize = CHACHA_BLOCK_SIZE, |
| 519 | - .walksize = 4 * CHACHA_BLOCK_SIZE, |
| 520 | - .setkey = crypto_chacha20_setkey, |
| 521 | - .encrypt = xchacha_neon, |
| 522 | - .decrypt = xchacha_neon, |
| 523 | - }, { |
| 524 | - .base.cra_name = "xchacha12", |
| 525 | - .base.cra_driver_name = "xchacha12-neon", |
| 526 | - .base.cra_priority = 300, |
| 527 | - .base.cra_blocksize = 1, |
| 528 | - .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 529 | - .base.cra_module = THIS_MODULE, |
| 530 | - |
| 531 | - .min_keysize = CHACHA_KEY_SIZE, |
| 532 | - .max_keysize = CHACHA_KEY_SIZE, |
| 533 | - .ivsize = XCHACHA_IV_SIZE, |
| 534 | - .chunksize = CHACHA_BLOCK_SIZE, |
| 535 | - .walksize = 4 * CHACHA_BLOCK_SIZE, |
| 536 | - .setkey = crypto_chacha12_setkey, |
| 537 | - .encrypt = xchacha_neon, |
| 538 | - .decrypt = xchacha_neon, |
| 539 | - } |
| 540 | -}; |
| 541 | - |
| 542 | -static int __init chacha_simd_mod_init(void) |
| 543 | -{ |
| 544 | - if (!(elf_hwcap & HWCAP_NEON)) |
| 545 | - return -ENODEV; |
| 546 | - |
| 547 | - return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); |
| 548 | -} |
| 549 | - |
| 550 | -static void __exit chacha_simd_mod_fini(void) |
| 551 | -{ |
| 552 | - crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); |
| 553 | -} |
| 554 | - |
| 555 | -module_init(chacha_simd_mod_init); |
| 556 | -module_exit(chacha_simd_mod_fini); |
| 557 | - |
| 558 | -MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)"); |
| 559 | -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); |
| 560 | -MODULE_LICENSE("GPL v2"); |
| 561 | -MODULE_ALIAS_CRYPTO("chacha20"); |
| 562 | -MODULE_ALIAS_CRYPTO("chacha20-neon"); |
| 563 | -MODULE_ALIAS_CRYPTO("xchacha20"); |
| 564 | -MODULE_ALIAS_CRYPTO("xchacha20-neon"); |
| 565 | -MODULE_ALIAS_CRYPTO("xchacha12"); |
| 566 | -MODULE_ALIAS_CRYPTO("xchacha12-neon"); |
| 567 | --- a/arch/arm/crypto/chacha-scalar-core.S |
| 568 | +++ b/arch/arm/crypto/chacha-scalar-core.S |
| 569 | @@ -41,14 +41,6 @@ |
| 570 | X14 .req r12 |
| 571 | X15 .req r14 |
| 572 | |
| 573 | -.Lexpand_32byte_k: |
| 574 | - // "expand 32-byte k" |
| 575 | - .word 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 |
| 576 | - |
| 577 | -#ifdef __thumb2__ |
| 578 | -# define adrl adr |
| 579 | -#endif |
| 580 | - |
| 581 | .macro __rev out, in, t0, t1, t2 |
| 582 | .if __LINUX_ARM_ARCH__ >= 6 |
| 583 | rev \out, \in |
| 584 | @@ -391,61 +383,65 @@ |
| 585 | .endm // _chacha |
| 586 | |
| 587 | /* |
| 588 | - * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8], |
| 589 | - * const u32 iv[4]); |
| 590 | + * void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, |
| 591 | + * const u32 *state, int nrounds); |
| 592 | */ |
| 593 | -ENTRY(chacha20_arm) |
| 594 | +ENTRY(chacha_doarm) |
| 595 | cmp r2, #0 // len == 0? |
| 596 | reteq lr |
| 597 | |
| 598 | + ldr ip, [sp] |
| 599 | + cmp ip, #12 |
| 600 | + |
| 601 | push {r0-r2,r4-r11,lr} |
| 602 | |
| 603 | // Push state x0-x15 onto stack. |
| 604 | // Also store an extra copy of x10-x11 just before the state. |
| 605 | |
| 606 | - ldr r4, [sp, #48] // iv |
| 607 | - mov r0, sp |
| 608 | - sub sp, #80 |
| 609 | - |
| 610 | - // iv: x12-x15 |
| 611 | - ldm r4, {X12,X13,X14,X15} |
| 612 | - stmdb r0!, {X12,X13,X14,X15} |
| 613 | + add X12, r3, #48 |
| 614 | + ldm X12, {X12,X13,X14,X15} |
| 615 | + push {X12,X13,X14,X15} |
| 616 | + sub sp, sp, #64 |
| 617 | |
| 618 | - // key: x4-x11 |
| 619 | - __ldrd X8_X10, X9_X11, r3, 24 |
| 620 | + __ldrd X8_X10, X9_X11, r3, 40 |
| 621 | __strd X8_X10, X9_X11, sp, 8 |
| 622 | - stmdb r0!, {X8_X10, X9_X11} |
| 623 | - ldm r3, {X4-X9_X11} |
| 624 | - stmdb r0!, {X4-X9_X11} |
| 625 | - |
| 626 | - // constants: x0-x3 |
| 627 | - adrl X3, .Lexpand_32byte_k |
| 628 | - ldm X3, {X0-X3} |
| 629 | + __strd X8_X10, X9_X11, sp, 56 |
| 630 | + ldm r3, {X0-X9_X11} |
| 631 | __strd X0, X1, sp, 16 |
| 632 | __strd X2, X3, sp, 24 |
| 633 | + __strd X4, X5, sp, 32 |
| 634 | + __strd X6, X7, sp, 40 |
| 635 | + __strd X8_X10, X9_X11, sp, 48 |
| 636 | |
| 637 | + beq 1f |
| 638 | _chacha 20 |
| 639 | |
| 640 | - add sp, #76 |
| 641 | +0: add sp, #76 |
| 642 | pop {r4-r11, pc} |
| 643 | -ENDPROC(chacha20_arm) |
| 644 | + |
| 645 | +1: _chacha 12 |
| 646 | + b 0b |
| 647 | +ENDPROC(chacha_doarm) |
| 648 | |
| 649 | /* |
| 650 | - * void hchacha20_arm(const u32 state[16], u32 out[8]); |
| 651 | + * void hchacha_block_arm(const u32 state[16], u32 out[8], int nrounds); |
| 652 | */ |
| 653 | -ENTRY(hchacha20_arm) |
| 654 | +ENTRY(hchacha_block_arm) |
| 655 | push {r1,r4-r11,lr} |
| 656 | |
| 657 | + cmp r2, #12 // ChaCha12 ? |
| 658 | + |
| 659 | mov r14, r0 |
| 660 | ldmia r14!, {r0-r11} // load x0-x11 |
| 661 | push {r10-r11} // store x10-x11 to stack |
| 662 | ldm r14, {r10-r12,r14} // load x12-x15 |
| 663 | sub sp, #8 |
| 664 | |
| 665 | + beq 1f |
| 666 | _chacha_permute 20 |
| 667 | |
| 668 | // Skip over (unused0-unused1, x10-x11) |
| 669 | - add sp, #16 |
| 670 | +0: add sp, #16 |
| 671 | |
| 672 | // Fix up rotations of x12-x15 |
| 673 | ror X12, X12, #drot |
| 674 | @@ -458,4 +454,7 @@ ENTRY(hchacha20_arm) |
| 675 | stm r4, {X0,X1,X2,X3,X12,X13,X14,X15} |
| 676 | |
| 677 | pop {r4-r11,pc} |
| 678 | -ENDPROC(hchacha20_arm) |
| 679 | + |
| 680 | +1: _chacha_permute 12 |
| 681 | + b 0b |
| 682 | +ENDPROC(hchacha_block_arm) |
| 683 | --- a/arch/arm64/crypto/chacha-neon-glue.c |
| 684 | +++ b/arch/arm64/crypto/chacha-neon-glue.c |
| 685 | @@ -1,5 +1,5 @@ |
| 686 | /* |
| 687 | - * ARM NEON accelerated ChaCha and XChaCha stream ciphers, |
| 688 | + * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers, |
| 689 | * including ChaCha20 (RFC7539) |
| 690 | * |
| 691 | * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org> |