| From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 | 
 | From: Ard Biesheuvel <ardb@kernel.org> | 
 | Date: Fri, 8 Nov 2019 13:22:14 +0100 | 
 | Subject: [PATCH] crypto: arm/chacha - remove dependency on generic ChaCha | 
 |  driver | 
 |  | 
 | commit b36d8c09e710c71f6a9690b6586fea2d1c9e1e27 upstream. | 
 |  | 
 | Instead of falling back to the generic ChaCha skcipher driver for | 
 | non-SIMD cases, use a fast scalar implementation for ARM authored | 
 | by Eric Biggers. This removes the module dependency on chacha-generic | 
 | altogether, which also simplifies things when we expose the ChaCha | 
 | library interface from this module. | 
 |  | 
 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> | 
 | Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> | 
 | Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> | 
 | --- | 
 |  arch/arm/crypto/Kconfig              |   4 +- | 
 |  arch/arm/crypto/Makefile             |   3 +- | 
 |  arch/arm/crypto/chacha-glue.c        | 304 +++++++++++++++++++++++++++ | 
 |  arch/arm/crypto/chacha-neon-glue.c   | 202 ------------------ | 
 |  arch/arm/crypto/chacha-scalar-core.S |  65 +++--- | 
 |  arch/arm64/crypto/chacha-neon-glue.c |   2 +- | 
 |  6 files changed, 340 insertions(+), 240 deletions(-) | 
 |  create mode 100644 arch/arm/crypto/chacha-glue.c | 
 |  delete mode 100644 arch/arm/crypto/chacha-neon-glue.c | 
 |  | 
 | --- a/arch/arm/crypto/Kconfig | 
 | +++ b/arch/arm/crypto/Kconfig | 
 | @@ -129,10 +129,8 @@ config CRYPTO_CRC32_ARM_CE | 
 |  	select CRYPTO_HASH | 
 |   | 
 |  config CRYPTO_CHACHA20_NEON | 
 | -	tristate "NEON accelerated ChaCha stream cipher algorithms" | 
 | -	depends on KERNEL_MODE_NEON | 
 | +	tristate "NEON and scalar accelerated ChaCha stream cipher algorithms" | 
 |  	select CRYPTO_BLKCIPHER | 
 | -	select CRYPTO_CHACHA20 | 
 |   | 
 |  config CRYPTO_NHPOLY1305_NEON | 
 |  	tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)" | 
 | --- a/arch/arm/crypto/Makefile | 
 | +++ b/arch/arm/crypto/Makefile | 
 | @@ -33,7 +33,8 @@ aes-arm-ce-y	:= aes-ce-core.o aes-ce-glu | 
 |  ghash-arm-ce-y	:= ghash-ce-core.o ghash-ce-glue.o | 
 |  crct10dif-arm-ce-y	:= crct10dif-ce-core.o crct10dif-ce-glue.o | 
 |  crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o | 
 | -chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o | 
 | +chacha-neon-y := chacha-scalar-core.o chacha-glue.o | 
 | +chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o | 
 |  nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o | 
 |   | 
 |  ifdef REGENERATE_ARM_CRYPTO | 
 | --- /dev/null | 
 | +++ b/arch/arm/crypto/chacha-glue.c | 
 | @@ -0,0 +1,304 @@ | 
 | +// SPDX-License-Identifier: GPL-2.0 | 
 | +/* | 
 | + * ARM NEON accelerated ChaCha and XChaCha stream ciphers, | 
 | + * including ChaCha20 (RFC7539) | 
 | + * | 
 | + * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> | 
 | + * Copyright (C) 2015 Martin Willi | 
 | + */ | 
 | + | 
 | +#include <crypto/algapi.h> | 
 | +#include <crypto/internal/chacha.h> | 
 | +#include <crypto/internal/simd.h> | 
 | +#include <crypto/internal/skcipher.h> | 
 | +#include <linux/kernel.h> | 
 | +#include <linux/module.h> | 
 | + | 
 | +#include <asm/cputype.h> | 
 | +#include <asm/hwcap.h> | 
 | +#include <asm/neon.h> | 
 | +#include <asm/simd.h> | 
 | + | 
 | +asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, | 
 | +				      int nrounds); | 
 | +asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, | 
 | +				       int nrounds); | 
 | +asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds); | 
 | +asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); | 
 | + | 
 | +asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, | 
 | +			     const u32 *state, int nrounds); | 
 | + | 
 | +static inline bool neon_usable(void) | 
 | +{ | 
 | +	return crypto_simd_usable(); | 
 | +} | 
 | + | 
 | +static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, | 
 | +			  unsigned int bytes, int nrounds) | 
 | +{ | 
 | +	u8 buf[CHACHA_BLOCK_SIZE]; | 
 | + | 
 | +	while (bytes >= CHACHA_BLOCK_SIZE * 4) { | 
 | +		chacha_4block_xor_neon(state, dst, src, nrounds); | 
 | +		bytes -= CHACHA_BLOCK_SIZE * 4; | 
 | +		src += CHACHA_BLOCK_SIZE * 4; | 
 | +		dst += CHACHA_BLOCK_SIZE * 4; | 
 | +		state[12] += 4; | 
 | +	} | 
 | +	while (bytes >= CHACHA_BLOCK_SIZE) { | 
 | +		chacha_block_xor_neon(state, dst, src, nrounds); | 
 | +		bytes -= CHACHA_BLOCK_SIZE; | 
 | +		src += CHACHA_BLOCK_SIZE; | 
 | +		dst += CHACHA_BLOCK_SIZE; | 
 | +		state[12]++; | 
 | +	} | 
 | +	if (bytes) { | 
 | +		memcpy(buf, src, bytes); | 
 | +		chacha_block_xor_neon(state, buf, buf, nrounds); | 
 | +		memcpy(dst, buf, bytes); | 
 | +	} | 
 | +} | 
 | + | 
 | +static int chacha_stream_xor(struct skcipher_request *req, | 
 | +			     const struct chacha_ctx *ctx, const u8 *iv, | 
 | +			     bool neon) | 
 | +{ | 
 | +	struct skcipher_walk walk; | 
 | +	u32 state[16]; | 
 | +	int err; | 
 | + | 
 | +	err = skcipher_walk_virt(&walk, req, false); | 
 | + | 
 | +	chacha_init_generic(state, ctx->key, iv); | 
 | + | 
 | +	while (walk.nbytes > 0) { | 
 | +		unsigned int nbytes = walk.nbytes; | 
 | + | 
 | +		if (nbytes < walk.total) | 
 | +			nbytes = round_down(nbytes, walk.stride); | 
 | + | 
 | +		if (!neon) { | 
 | +			chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr, | 
 | +				     nbytes, state, ctx->nrounds); | 
 | +			state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE); | 
 | +		} else { | 
 | +			kernel_neon_begin(); | 
 | +			chacha_doneon(state, walk.dst.virt.addr, | 
 | +				      walk.src.virt.addr, nbytes, ctx->nrounds); | 
 | +			kernel_neon_end(); | 
 | +		} | 
 | +		err = skcipher_walk_done(&walk, walk.nbytes - nbytes); | 
 | +	} | 
 | + | 
 | +	return err; | 
 | +} | 
 | + | 
 | +static int do_chacha(struct skcipher_request *req, bool neon) | 
 | +{ | 
 | +	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); | 
 | +	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); | 
 | + | 
 | +	return chacha_stream_xor(req, ctx, req->iv, neon); | 
 | +} | 
 | + | 
 | +static int chacha_arm(struct skcipher_request *req) | 
 | +{ | 
 | +	return do_chacha(req, false); | 
 | +} | 
 | + | 
 | +static int chacha_neon(struct skcipher_request *req) | 
 | +{ | 
 | +	return do_chacha(req, neon_usable()); | 
 | +} | 
 | + | 
 | +static int do_xchacha(struct skcipher_request *req, bool neon) | 
 | +{ | 
 | +	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); | 
 | +	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); | 
 | +	struct chacha_ctx subctx; | 
 | +	u32 state[16]; | 
 | +	u8 real_iv[16]; | 
 | + | 
 | +	chacha_init_generic(state, ctx->key, req->iv); | 
 | + | 
 | +	if (!neon) { | 
 | +		hchacha_block_arm(state, subctx.key, ctx->nrounds); | 
 | +	} else { | 
 | +		kernel_neon_begin(); | 
 | +		hchacha_block_neon(state, subctx.key, ctx->nrounds); | 
 | +		kernel_neon_end(); | 
 | +	} | 
 | +	subctx.nrounds = ctx->nrounds; | 
 | + | 
 | +	memcpy(&real_iv[0], req->iv + 24, 8); | 
 | +	memcpy(&real_iv[8], req->iv + 16, 8); | 
 | +	return chacha_stream_xor(req, &subctx, real_iv, neon); | 
 | +} | 
 | + | 
 | +static int xchacha_arm(struct skcipher_request *req) | 
 | +{ | 
 | +	return do_xchacha(req, false); | 
 | +} | 
 | + | 
 | +static int xchacha_neon(struct skcipher_request *req) | 
 | +{ | 
 | +	return do_xchacha(req, neon_usable()); | 
 | +} | 
 | + | 
 | +static struct skcipher_alg arm_algs[] = { | 
 | +	{ | 
 | +		.base.cra_name		= "chacha20", | 
 | +		.base.cra_driver_name	= "chacha20-arm", | 
 | +		.base.cra_priority	= 200, | 
 | +		.base.cra_blocksize	= 1, | 
 | +		.base.cra_ctxsize	= sizeof(struct chacha_ctx), | 
 | +		.base.cra_module	= THIS_MODULE, | 
 | + | 
 | +		.min_keysize		= CHACHA_KEY_SIZE, | 
 | +		.max_keysize		= CHACHA_KEY_SIZE, | 
 | +		.ivsize			= CHACHA_IV_SIZE, | 
 | +		.chunksize		= CHACHA_BLOCK_SIZE, | 
 | +		.setkey			= chacha20_setkey, | 
 | +		.encrypt		= chacha_arm, | 
 | +		.decrypt		= chacha_arm, | 
 | +	}, { | 
 | +		.base.cra_name		= "xchacha20", | 
 | +		.base.cra_driver_name	= "xchacha20-arm", | 
 | +		.base.cra_priority	= 200, | 
 | +		.base.cra_blocksize	= 1, | 
 | +		.base.cra_ctxsize	= sizeof(struct chacha_ctx), | 
 | +		.base.cra_module	= THIS_MODULE, | 
 | + | 
 | +		.min_keysize		= CHACHA_KEY_SIZE, | 
 | +		.max_keysize		= CHACHA_KEY_SIZE, | 
 | +		.ivsize			= XCHACHA_IV_SIZE, | 
 | +		.chunksize		= CHACHA_BLOCK_SIZE, | 
 | +		.setkey			= chacha20_setkey, | 
 | +		.encrypt		= xchacha_arm, | 
 | +		.decrypt		= xchacha_arm, | 
 | +	}, { | 
 | +		.base.cra_name		= "xchacha12", | 
 | +		.base.cra_driver_name	= "xchacha12-arm", | 
 | +		.base.cra_priority	= 200, | 
 | +		.base.cra_blocksize	= 1, | 
 | +		.base.cra_ctxsize	= sizeof(struct chacha_ctx), | 
 | +		.base.cra_module	= THIS_MODULE, | 
 | + | 
 | +		.min_keysize		= CHACHA_KEY_SIZE, | 
 | +		.max_keysize		= CHACHA_KEY_SIZE, | 
 | +		.ivsize			= XCHACHA_IV_SIZE, | 
 | +		.chunksize		= CHACHA_BLOCK_SIZE, | 
 | +		.setkey			= chacha12_setkey, | 
 | +		.encrypt		= xchacha_arm, | 
 | +		.decrypt		= xchacha_arm, | 
 | +	}, | 
 | +}; | 
 | + | 
 | +static struct skcipher_alg neon_algs[] = { | 
 | +	{ | 
 | +		.base.cra_name		= "chacha20", | 
 | +		.base.cra_driver_name	= "chacha20-neon", | 
 | +		.base.cra_priority	= 300, | 
 | +		.base.cra_blocksize	= 1, | 
 | +		.base.cra_ctxsize	= sizeof(struct chacha_ctx), | 
 | +		.base.cra_module	= THIS_MODULE, | 
 | + | 
 | +		.min_keysize		= CHACHA_KEY_SIZE, | 
 | +		.max_keysize		= CHACHA_KEY_SIZE, | 
 | +		.ivsize			= CHACHA_IV_SIZE, | 
 | +		.chunksize		= CHACHA_BLOCK_SIZE, | 
 | +		.walksize		= 4 * CHACHA_BLOCK_SIZE, | 
 | +		.setkey			= chacha20_setkey, | 
 | +		.encrypt		= chacha_neon, | 
 | +		.decrypt		= chacha_neon, | 
 | +	}, { | 
 | +		.base.cra_name		= "xchacha20", | 
 | +		.base.cra_driver_name	= "xchacha20-neon", | 
 | +		.base.cra_priority	= 300, | 
 | +		.base.cra_blocksize	= 1, | 
 | +		.base.cra_ctxsize	= sizeof(struct chacha_ctx), | 
 | +		.base.cra_module	= THIS_MODULE, | 
 | + | 
 | +		.min_keysize		= CHACHA_KEY_SIZE, | 
 | +		.max_keysize		= CHACHA_KEY_SIZE, | 
 | +		.ivsize			= XCHACHA_IV_SIZE, | 
 | +		.chunksize		= CHACHA_BLOCK_SIZE, | 
 | +		.walksize		= 4 * CHACHA_BLOCK_SIZE, | 
 | +		.setkey			= chacha20_setkey, | 
 | +		.encrypt		= xchacha_neon, | 
 | +		.decrypt		= xchacha_neon, | 
 | +	}, { | 
 | +		.base.cra_name		= "xchacha12", | 
 | +		.base.cra_driver_name	= "xchacha12-neon", | 
 | +		.base.cra_priority	= 300, | 
 | +		.base.cra_blocksize	= 1, | 
 | +		.base.cra_ctxsize	= sizeof(struct chacha_ctx), | 
 | +		.base.cra_module	= THIS_MODULE, | 
 | + | 
 | +		.min_keysize		= CHACHA_KEY_SIZE, | 
 | +		.max_keysize		= CHACHA_KEY_SIZE, | 
 | +		.ivsize			= XCHACHA_IV_SIZE, | 
 | +		.chunksize		= CHACHA_BLOCK_SIZE, | 
 | +		.walksize		= 4 * CHACHA_BLOCK_SIZE, | 
 | +		.setkey			= chacha12_setkey, | 
 | +		.encrypt		= xchacha_neon, | 
 | +		.decrypt		= xchacha_neon, | 
 | +	} | 
 | +}; | 
 | + | 
 | +static int __init chacha_simd_mod_init(void) | 
 | +{ | 
 | +	int err; | 
 | + | 
 | +	err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); | 
 | +	if (err) | 
 | +		return err; | 
 | + | 
 | +	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { | 
 | +		int i; | 
 | + | 
 | +		switch (read_cpuid_part()) { | 
 | +		case ARM_CPU_PART_CORTEX_A7: | 
 | +		case ARM_CPU_PART_CORTEX_A5: | 
 | +			/* | 
 | +			 * The Cortex-A7 and Cortex-A5 do not perform well with | 
 | +			 * the NEON implementation but do incredibly with the | 
 | +			 * scalar one and use less power. | 
 | +			 */ | 
 | +			for (i = 0; i < ARRAY_SIZE(neon_algs); i++) | 
 | +				neon_algs[i].base.cra_priority = 0; | 
 | +			break; | 
 | +		} | 
 | + | 
 | +		err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); | 
 | +		if (err) | 
 | +			crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); | 
 | +	} | 
 | +	return err; | 
 | +} | 
 | + | 
 | +static void __exit chacha_simd_mod_fini(void) | 
 | +{ | 
 | +	crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); | 
 | +	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) | 
 | +		crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); | 
 | +} | 
 | + | 
 | +module_init(chacha_simd_mod_init); | 
 | +module_exit(chacha_simd_mod_fini); | 
 | + | 
 | +MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)"); | 
 | +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | 
 | +MODULE_LICENSE("GPL v2"); | 
 | +MODULE_ALIAS_CRYPTO("chacha20"); | 
 | +MODULE_ALIAS_CRYPTO("chacha20-arm"); | 
 | +MODULE_ALIAS_CRYPTO("xchacha20"); | 
 | +MODULE_ALIAS_CRYPTO("xchacha20-arm"); | 
 | +MODULE_ALIAS_CRYPTO("xchacha12"); | 
 | +MODULE_ALIAS_CRYPTO("xchacha12-arm"); | 
 | +#ifdef CONFIG_KERNEL_MODE_NEON | 
 | +MODULE_ALIAS_CRYPTO("chacha20-neon"); | 
 | +MODULE_ALIAS_CRYPTO("xchacha20-neon"); | 
 | +MODULE_ALIAS_CRYPTO("xchacha12-neon"); | 
 | +#endif | 
 | --- a/arch/arm/crypto/chacha-neon-glue.c | 
 | +++ /dev/null | 
 | @@ -1,202 +0,0 @@ | 
 | -/* | 
 | - * ARM NEON accelerated ChaCha and XChaCha stream ciphers, | 
 | - * including ChaCha20 (RFC7539) | 
 | - * | 
 | - * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org> | 
 | - * | 
 | - * This program is free software; you can redistribute it and/or modify | 
 | - * it under the terms of the GNU General Public License version 2 as | 
 | - * published by the Free Software Foundation. | 
 | - * | 
 | - * Based on: | 
 | - * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code | 
 | - * | 
 | - * Copyright (C) 2015 Martin Willi | 
 | - * | 
 | - * This program is free software; you can redistribute it and/or modify | 
 | - * it under the terms of the GNU General Public License as published by | 
 | - * the Free Software Foundation; either version 2 of the License, or | 
 | - * (at your option) any later version. | 
 | - */ | 
 | - | 
 | -#include <crypto/algapi.h> | 
 | -#include <crypto/internal/chacha.h> | 
 | -#include <crypto/internal/simd.h> | 
 | -#include <crypto/internal/skcipher.h> | 
 | -#include <linux/kernel.h> | 
 | -#include <linux/module.h> | 
 | - | 
 | -#include <asm/hwcap.h> | 
 | -#include <asm/neon.h> | 
 | -#include <asm/simd.h> | 
 | - | 
 | -asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, | 
 | -				      int nrounds); | 
 | -asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, | 
 | -				       int nrounds); | 
 | -asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); | 
 | - | 
 | -static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, | 
 | -			  unsigned int bytes, int nrounds) | 
 | -{ | 
 | -	u8 buf[CHACHA_BLOCK_SIZE]; | 
 | - | 
 | -	while (bytes >= CHACHA_BLOCK_SIZE * 4) { | 
 | -		chacha_4block_xor_neon(state, dst, src, nrounds); | 
 | -		bytes -= CHACHA_BLOCK_SIZE * 4; | 
 | -		src += CHACHA_BLOCK_SIZE * 4; | 
 | -		dst += CHACHA_BLOCK_SIZE * 4; | 
 | -		state[12] += 4; | 
 | -	} | 
 | -	while (bytes >= CHACHA_BLOCK_SIZE) { | 
 | -		chacha_block_xor_neon(state, dst, src, nrounds); | 
 | -		bytes -= CHACHA_BLOCK_SIZE; | 
 | -		src += CHACHA_BLOCK_SIZE; | 
 | -		dst += CHACHA_BLOCK_SIZE; | 
 | -		state[12]++; | 
 | -	} | 
 | -	if (bytes) { | 
 | -		memcpy(buf, src, bytes); | 
 | -		chacha_block_xor_neon(state, buf, buf, nrounds); | 
 | -		memcpy(dst, buf, bytes); | 
 | -	} | 
 | -} | 
 | - | 
 | -static int chacha_neon_stream_xor(struct skcipher_request *req, | 
 | -				  const struct chacha_ctx *ctx, const u8 *iv) | 
 | -{ | 
 | -	struct skcipher_walk walk; | 
 | -	u32 state[16]; | 
 | -	int err; | 
 | - | 
 | -	err = skcipher_walk_virt(&walk, req, false); | 
 | - | 
 | -	crypto_chacha_init(state, ctx, iv); | 
 | - | 
 | -	while (walk.nbytes > 0) { | 
 | -		unsigned int nbytes = walk.nbytes; | 
 | - | 
 | -		if (nbytes < walk.total) | 
 | -			nbytes = round_down(nbytes, walk.stride); | 
 | - | 
 | -		kernel_neon_begin(); | 
 | -		chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, | 
 | -			      nbytes, ctx->nrounds); | 
 | -		kernel_neon_end(); | 
 | -		err = skcipher_walk_done(&walk, walk.nbytes - nbytes); | 
 | -	} | 
 | - | 
 | -	return err; | 
 | -} | 
 | - | 
 | -static int chacha_neon(struct skcipher_request *req) | 
 | -{ | 
 | -	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); | 
 | -	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); | 
 | - | 
 | -	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) | 
 | -		return crypto_chacha_crypt(req); | 
 | - | 
 | -	return chacha_neon_stream_xor(req, ctx, req->iv); | 
 | -} | 
 | - | 
 | -static int xchacha_neon(struct skcipher_request *req) | 
 | -{ | 
 | -	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); | 
 | -	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); | 
 | -	struct chacha_ctx subctx; | 
 | -	u32 state[16]; | 
 | -	u8 real_iv[16]; | 
 | - | 
 | -	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) | 
 | -		return crypto_xchacha_crypt(req); | 
 | - | 
 | -	crypto_chacha_init(state, ctx, req->iv); | 
 | - | 
 | -	kernel_neon_begin(); | 
 | -	hchacha_block_neon(state, subctx.key, ctx->nrounds); | 
 | -	kernel_neon_end(); | 
 | -	subctx.nrounds = ctx->nrounds; | 
 | - | 
 | -	memcpy(&real_iv[0], req->iv + 24, 8); | 
 | -	memcpy(&real_iv[8], req->iv + 16, 8); | 
 | -	return chacha_neon_stream_xor(req, &subctx, real_iv); | 
 | -} | 
 | - | 
 | -static struct skcipher_alg algs[] = { | 
 | -	{ | 
 | -		.base.cra_name		= "chacha20", | 
 | -		.base.cra_driver_name	= "chacha20-neon", | 
 | -		.base.cra_priority	= 300, | 
 | -		.base.cra_blocksize	= 1, | 
 | -		.base.cra_ctxsize	= sizeof(struct chacha_ctx), | 
 | -		.base.cra_module	= THIS_MODULE, | 
 | - | 
 | -		.min_keysize		= CHACHA_KEY_SIZE, | 
 | -		.max_keysize		= CHACHA_KEY_SIZE, | 
 | -		.ivsize			= CHACHA_IV_SIZE, | 
 | -		.chunksize		= CHACHA_BLOCK_SIZE, | 
 | -		.walksize		= 4 * CHACHA_BLOCK_SIZE, | 
 | -		.setkey			= crypto_chacha20_setkey, | 
 | -		.encrypt		= chacha_neon, | 
 | -		.decrypt		= chacha_neon, | 
 | -	}, { | 
 | -		.base.cra_name		= "xchacha20", | 
 | -		.base.cra_driver_name	= "xchacha20-neon", | 
 | -		.base.cra_priority	= 300, | 
 | -		.base.cra_blocksize	= 1, | 
 | -		.base.cra_ctxsize	= sizeof(struct chacha_ctx), | 
 | -		.base.cra_module	= THIS_MODULE, | 
 | - | 
 | -		.min_keysize		= CHACHA_KEY_SIZE, | 
 | -		.max_keysize		= CHACHA_KEY_SIZE, | 
 | -		.ivsize			= XCHACHA_IV_SIZE, | 
 | -		.chunksize		= CHACHA_BLOCK_SIZE, | 
 | -		.walksize		= 4 * CHACHA_BLOCK_SIZE, | 
 | -		.setkey			= crypto_chacha20_setkey, | 
 | -		.encrypt		= xchacha_neon, | 
 | -		.decrypt		= xchacha_neon, | 
 | -	}, { | 
 | -		.base.cra_name		= "xchacha12", | 
 | -		.base.cra_driver_name	= "xchacha12-neon", | 
 | -		.base.cra_priority	= 300, | 
 | -		.base.cra_blocksize	= 1, | 
 | -		.base.cra_ctxsize	= sizeof(struct chacha_ctx), | 
 | -		.base.cra_module	= THIS_MODULE, | 
 | - | 
 | -		.min_keysize		= CHACHA_KEY_SIZE, | 
 | -		.max_keysize		= CHACHA_KEY_SIZE, | 
 | -		.ivsize			= XCHACHA_IV_SIZE, | 
 | -		.chunksize		= CHACHA_BLOCK_SIZE, | 
 | -		.walksize		= 4 * CHACHA_BLOCK_SIZE, | 
 | -		.setkey			= crypto_chacha12_setkey, | 
 | -		.encrypt		= xchacha_neon, | 
 | -		.decrypt		= xchacha_neon, | 
 | -	} | 
 | -}; | 
 | - | 
 | -static int __init chacha_simd_mod_init(void) | 
 | -{ | 
 | -	if (!(elf_hwcap & HWCAP_NEON)) | 
 | -		return -ENODEV; | 
 | - | 
 | -	return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); | 
 | -} | 
 | - | 
 | -static void __exit chacha_simd_mod_fini(void) | 
 | -{ | 
 | -	crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); | 
 | -} | 
 | - | 
 | -module_init(chacha_simd_mod_init); | 
 | -module_exit(chacha_simd_mod_fini); | 
 | - | 
 | -MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)"); | 
 | -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | 
 | -MODULE_LICENSE("GPL v2"); | 
 | -MODULE_ALIAS_CRYPTO("chacha20"); | 
 | -MODULE_ALIAS_CRYPTO("chacha20-neon"); | 
 | -MODULE_ALIAS_CRYPTO("xchacha20"); | 
 | -MODULE_ALIAS_CRYPTO("xchacha20-neon"); | 
 | -MODULE_ALIAS_CRYPTO("xchacha12"); | 
 | -MODULE_ALIAS_CRYPTO("xchacha12-neon"); | 
 | --- a/arch/arm/crypto/chacha-scalar-core.S | 
 | +++ b/arch/arm/crypto/chacha-scalar-core.S | 
 | @@ -41,14 +41,6 @@ | 
 |  	X14	.req	r12 | 
 |  	X15	.req	r14 | 
 |   | 
 | -.Lexpand_32byte_k: | 
 | -	// "expand 32-byte k" | 
 | -	.word	0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 | 
 | - | 
 | -#ifdef __thumb2__ | 
 | -#  define adrl adr | 
 | -#endif | 
 | - | 
 |  .macro __rev		out, in,  t0, t1, t2 | 
 |  .if __LINUX_ARM_ARCH__ >= 6 | 
 |  	rev		\out, \in | 
 | @@ -391,61 +383,65 @@ | 
 |  .endm	// _chacha | 
 |   | 
 |  /* | 
 | - * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8], | 
 | - *		     const u32 iv[4]); | 
 | + * void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, | 
 | + *		     const u32 *state, int nrounds); | 
 |   */ | 
 | -ENTRY(chacha20_arm) | 
 | +ENTRY(chacha_doarm) | 
 |  	cmp		r2, #0			// len == 0? | 
 |  	reteq		lr | 
 |   | 
 | +	ldr		ip, [sp] | 
 | +	cmp		ip, #12 | 
 | + | 
 |  	push		{r0-r2,r4-r11,lr} | 
 |   | 
 |  	// Push state x0-x15 onto stack. | 
 |  	// Also store an extra copy of x10-x11 just before the state. | 
 |   | 
 | -	ldr		r4, [sp, #48]		// iv | 
 | -	mov		r0, sp | 
 | -	sub		sp, #80 | 
 | - | 
 | -	// iv: x12-x15 | 
 | -	ldm		r4, {X12,X13,X14,X15} | 
 | -	stmdb		r0!, {X12,X13,X14,X15} | 
 | +	add		X12, r3, #48 | 
 | +	ldm		X12, {X12,X13,X14,X15} | 
 | +	push		{X12,X13,X14,X15} | 
 | +	sub		sp, sp, #64 | 
 |   | 
 | -	// key: x4-x11 | 
 | -	__ldrd		X8_X10, X9_X11, r3, 24 | 
 | +	__ldrd		X8_X10, X9_X11, r3, 40 | 
 |  	__strd		X8_X10, X9_X11, sp, 8 | 
 | -	stmdb		r0!, {X8_X10, X9_X11} | 
 | -	ldm		r3, {X4-X9_X11} | 
 | -	stmdb		r0!, {X4-X9_X11} | 
 | - | 
 | -	// constants: x0-x3 | 
 | -	adrl		X3, .Lexpand_32byte_k | 
 | -	ldm		X3, {X0-X3} | 
 | +	__strd		X8_X10, X9_X11, sp, 56 | 
 | +	ldm		r3, {X0-X9_X11} | 
 |  	__strd		X0, X1, sp, 16 | 
 |  	__strd		X2, X3, sp, 24 | 
 | +	__strd		X4, X5, sp, 32 | 
 | +	__strd		X6, X7, sp, 40 | 
 | +	__strd		X8_X10, X9_X11, sp, 48 | 
 |   | 
 | +	beq		1f | 
 |  	_chacha		20 | 
 |   | 
 | -	add		sp, #76 | 
 | +0:	add		sp, #76 | 
 |  	pop		{r4-r11, pc} | 
 | -ENDPROC(chacha20_arm) | 
 | + | 
 | +1:	_chacha		12 | 
 | +	b		0b | 
 | +ENDPROC(chacha_doarm) | 
 |   | 
 |  /* | 
 | - * void hchacha20_arm(const u32 state[16], u32 out[8]); | 
 | + * void hchacha_block_arm(const u32 state[16], u32 out[8], int nrounds); | 
 |   */ | 
 | -ENTRY(hchacha20_arm) | 
 | +ENTRY(hchacha_block_arm) | 
 |  	push		{r1,r4-r11,lr} | 
 |   | 
 | +	cmp		r2, #12			// ChaCha12 ? | 
 | + | 
 |  	mov		r14, r0 | 
 |  	ldmia		r14!, {r0-r11}		// load x0-x11 | 
 |  	push		{r10-r11}		// store x10-x11 to stack | 
 |  	ldm		r14, {r10-r12,r14}	// load x12-x15 | 
 |  	sub		sp, #8 | 
 |   | 
 | +	beq		1f | 
 |  	_chacha_permute	20 | 
 |   | 
 |  	// Skip over (unused0-unused1, x10-x11) | 
 | -	add		sp, #16 | 
 | +0:	add		sp, #16 | 
 |   | 
 |  	// Fix up rotations of x12-x15 | 
 |  	ror		X12, X12, #drot | 
 | @@ -458,4 +454,7 @@ ENTRY(hchacha20_arm) | 
 |  	stm		r4, {X0,X1,X2,X3,X12,X13,X14,X15} | 
 |   | 
 |  	pop		{r4-r11,pc} | 
 | -ENDPROC(hchacha20_arm) | 
 | + | 
 | +1:	_chacha_permute	12 | 
 | +	b		0b | 
 | +ENDPROC(hchacha_block_arm) | 
 | --- a/arch/arm64/crypto/chacha-neon-glue.c | 
 | +++ b/arch/arm64/crypto/chacha-neon-glue.c | 
 | @@ -1,5 +1,5 @@ | 
 |  /* | 
 | - * ARM NEON accelerated ChaCha and XChaCha stream ciphers, | 
 | + * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers, | 
 |   * including ChaCha20 (RFC7539) | 
 |   * | 
 |   * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org> |