b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * ARM NEON accelerated ChaCha and XChaCha stream ciphers, |
| 4 | * including ChaCha20 (RFC7539) |
| 5 | * |
| 6 | * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> |
| 7 | * Copyright (C) 2015 Martin Willi |
| 8 | */ |
| 9 | |
| 10 | #include <crypto/algapi.h> |
| 11 | #include <crypto/internal/chacha.h> |
| 12 | #include <crypto/internal/simd.h> |
| 13 | #include <crypto/internal/skcipher.h> |
| 14 | #include <linux/jump_label.h> |
| 15 | #include <linux/kernel.h> |
| 16 | #include <linux/module.h> |
| 17 | |
| 18 | #include <asm/cputype.h> |
| 19 | #include <asm/hwcap.h> |
| 20 | #include <asm/neon.h> |
| 21 | #include <asm/simd.h> |
| 22 | |
| 23 | asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, |
| 24 | int nrounds); |
| 25 | asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, |
| 26 | int nrounds, unsigned int nbytes); |
| 27 | asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds); |
| 28 | asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); |
| 29 | |
| 30 | asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, |
| 31 | const u32 *state, int nrounds); |
| 32 | |
| 33 | static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon); |
| 34 | |
| 35 | static inline bool neon_usable(void) |
| 36 | { |
| 37 | return static_branch_likely(&use_neon) && crypto_simd_usable(); |
| 38 | } |
| 39 | |
| 40 | static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, |
| 41 | unsigned int bytes, int nrounds) |
| 42 | { |
| 43 | u8 buf[CHACHA_BLOCK_SIZE]; |
| 44 | |
| 45 | while (bytes > CHACHA_BLOCK_SIZE) { |
| 46 | unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U); |
| 47 | |
| 48 | chacha_4block_xor_neon(state, dst, src, nrounds, l); |
| 49 | bytes -= l; |
| 50 | src += l; |
| 51 | dst += l; |
| 52 | state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); |
| 53 | } |
| 54 | if (bytes) { |
| 55 | const u8 *s = src; |
| 56 | u8 *d = dst; |
| 57 | |
| 58 | if (bytes != CHACHA_BLOCK_SIZE) |
| 59 | s = d = memcpy(buf, src, bytes); |
| 60 | chacha_block_xor_neon(state, d, s, nrounds); |
| 61 | if (d != dst) |
| 62 | memcpy(dst, buf, bytes); |
| 63 | state[12]++; |
| 64 | } |
| 65 | } |
| 66 | |
| 67 | void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) |
| 68 | { |
| 69 | if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { |
| 70 | hchacha_block_arm(state, stream, nrounds); |
| 71 | } else { |
| 72 | kernel_neon_begin(); |
| 73 | hchacha_block_neon(state, stream, nrounds); |
| 74 | kernel_neon_end(); |
| 75 | } |
| 76 | } |
| 77 | EXPORT_SYMBOL(hchacha_block_arch); |
| 78 | |
| 79 | void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) |
| 80 | { |
| 81 | chacha_init_generic(state, key, iv); |
| 82 | } |
| 83 | EXPORT_SYMBOL(chacha_init_arch); |
| 84 | |
| 85 | void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, |
| 86 | int nrounds) |
| 87 | { |
| 88 | if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() || |
| 89 | bytes <= CHACHA_BLOCK_SIZE) { |
| 90 | chacha_doarm(dst, src, bytes, state, nrounds); |
| 91 | state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); |
| 92 | return; |
| 93 | } |
| 94 | |
| 95 | do { |
| 96 | unsigned int todo = min_t(unsigned int, bytes, SZ_4K); |
| 97 | |
| 98 | kernel_neon_begin(); |
| 99 | chacha_doneon(state, dst, src, todo, nrounds); |
| 100 | kernel_neon_end(); |
| 101 | |
| 102 | bytes -= todo; |
| 103 | src += todo; |
| 104 | dst += todo; |
| 105 | } while (bytes); |
| 106 | } |
| 107 | EXPORT_SYMBOL(chacha_crypt_arch); |
| 108 | |
| 109 | static int chacha_stream_xor(struct skcipher_request *req, |
| 110 | const struct chacha_ctx *ctx, const u8 *iv, |
| 111 | bool neon) |
| 112 | { |
| 113 | struct skcipher_walk walk; |
| 114 | u32 state[16]; |
| 115 | int err; |
| 116 | |
| 117 | err = skcipher_walk_virt(&walk, req, false); |
| 118 | |
| 119 | chacha_init_generic(state, ctx->key, iv); |
| 120 | |
| 121 | while (walk.nbytes > 0) { |
| 122 | unsigned int nbytes = walk.nbytes; |
| 123 | |
| 124 | if (nbytes < walk.total) |
| 125 | nbytes = round_down(nbytes, walk.stride); |
| 126 | |
| 127 | if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) { |
| 128 | chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr, |
| 129 | nbytes, state, ctx->nrounds); |
| 130 | state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE); |
| 131 | } else { |
| 132 | kernel_neon_begin(); |
| 133 | chacha_doneon(state, walk.dst.virt.addr, |
| 134 | walk.src.virt.addr, nbytes, ctx->nrounds); |
| 135 | kernel_neon_end(); |
| 136 | } |
| 137 | err = skcipher_walk_done(&walk, walk.nbytes - nbytes); |
| 138 | } |
| 139 | |
| 140 | return err; |
| 141 | } |
| 142 | |
| 143 | static int do_chacha(struct skcipher_request *req, bool neon) |
| 144 | { |
| 145 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| 146 | struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
| 147 | |
| 148 | return chacha_stream_xor(req, ctx, req->iv, neon); |
| 149 | } |
| 150 | |
| 151 | static int chacha_arm(struct skcipher_request *req) |
| 152 | { |
| 153 | return do_chacha(req, false); |
| 154 | } |
| 155 | |
| 156 | static int chacha_neon(struct skcipher_request *req) |
| 157 | { |
| 158 | return do_chacha(req, neon_usable()); |
| 159 | } |
| 160 | |
| 161 | static int do_xchacha(struct skcipher_request *req, bool neon) |
| 162 | { |
| 163 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| 164 | struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
| 165 | struct chacha_ctx subctx; |
| 166 | u32 state[16]; |
| 167 | u8 real_iv[16]; |
| 168 | |
| 169 | chacha_init_generic(state, ctx->key, req->iv); |
| 170 | |
| 171 | if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) { |
| 172 | hchacha_block_arm(state, subctx.key, ctx->nrounds); |
| 173 | } else { |
| 174 | kernel_neon_begin(); |
| 175 | hchacha_block_neon(state, subctx.key, ctx->nrounds); |
| 176 | kernel_neon_end(); |
| 177 | } |
| 178 | subctx.nrounds = ctx->nrounds; |
| 179 | |
| 180 | memcpy(&real_iv[0], req->iv + 24, 8); |
| 181 | memcpy(&real_iv[8], req->iv + 16, 8); |
| 182 | return chacha_stream_xor(req, &subctx, real_iv, neon); |
| 183 | } |
| 184 | |
| 185 | static int xchacha_arm(struct skcipher_request *req) |
| 186 | { |
| 187 | return do_xchacha(req, false); |
| 188 | } |
| 189 | |
| 190 | static int xchacha_neon(struct skcipher_request *req) |
| 191 | { |
| 192 | return do_xchacha(req, neon_usable()); |
| 193 | } |
| 194 | |
| 195 | static struct skcipher_alg arm_algs[] = { |
| 196 | { |
| 197 | .base.cra_name = "chacha20", |
| 198 | .base.cra_driver_name = "chacha20-arm", |
| 199 | .base.cra_priority = 200, |
| 200 | .base.cra_blocksize = 1, |
| 201 | .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 202 | .base.cra_module = THIS_MODULE, |
| 203 | |
| 204 | .min_keysize = CHACHA_KEY_SIZE, |
| 205 | .max_keysize = CHACHA_KEY_SIZE, |
| 206 | .ivsize = CHACHA_IV_SIZE, |
| 207 | .chunksize = CHACHA_BLOCK_SIZE, |
| 208 | .setkey = chacha20_setkey, |
| 209 | .encrypt = chacha_arm, |
| 210 | .decrypt = chacha_arm, |
| 211 | }, { |
| 212 | .base.cra_name = "xchacha20", |
| 213 | .base.cra_driver_name = "xchacha20-arm", |
| 214 | .base.cra_priority = 200, |
| 215 | .base.cra_blocksize = 1, |
| 216 | .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 217 | .base.cra_module = THIS_MODULE, |
| 218 | |
| 219 | .min_keysize = CHACHA_KEY_SIZE, |
| 220 | .max_keysize = CHACHA_KEY_SIZE, |
| 221 | .ivsize = XCHACHA_IV_SIZE, |
| 222 | .chunksize = CHACHA_BLOCK_SIZE, |
| 223 | .setkey = chacha20_setkey, |
| 224 | .encrypt = xchacha_arm, |
| 225 | .decrypt = xchacha_arm, |
| 226 | }, { |
| 227 | .base.cra_name = "xchacha12", |
| 228 | .base.cra_driver_name = "xchacha12-arm", |
| 229 | .base.cra_priority = 200, |
| 230 | .base.cra_blocksize = 1, |
| 231 | .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 232 | .base.cra_module = THIS_MODULE, |
| 233 | |
| 234 | .min_keysize = CHACHA_KEY_SIZE, |
| 235 | .max_keysize = CHACHA_KEY_SIZE, |
| 236 | .ivsize = XCHACHA_IV_SIZE, |
| 237 | .chunksize = CHACHA_BLOCK_SIZE, |
| 238 | .setkey = chacha12_setkey, |
| 239 | .encrypt = xchacha_arm, |
| 240 | .decrypt = xchacha_arm, |
| 241 | }, |
| 242 | }; |
| 243 | |
| 244 | static struct skcipher_alg neon_algs[] = { |
| 245 | { |
| 246 | .base.cra_name = "chacha20", |
| 247 | .base.cra_driver_name = "chacha20-neon", |
| 248 | .base.cra_priority = 300, |
| 249 | .base.cra_blocksize = 1, |
| 250 | .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 251 | .base.cra_module = THIS_MODULE, |
| 252 | |
| 253 | .min_keysize = CHACHA_KEY_SIZE, |
| 254 | .max_keysize = CHACHA_KEY_SIZE, |
| 255 | .ivsize = CHACHA_IV_SIZE, |
| 256 | .chunksize = CHACHA_BLOCK_SIZE, |
| 257 | .walksize = 4 * CHACHA_BLOCK_SIZE, |
| 258 | .setkey = chacha20_setkey, |
| 259 | .encrypt = chacha_neon, |
| 260 | .decrypt = chacha_neon, |
| 261 | }, { |
| 262 | .base.cra_name = "xchacha20", |
| 263 | .base.cra_driver_name = "xchacha20-neon", |
| 264 | .base.cra_priority = 300, |
| 265 | .base.cra_blocksize = 1, |
| 266 | .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 267 | .base.cra_module = THIS_MODULE, |
| 268 | |
| 269 | .min_keysize = CHACHA_KEY_SIZE, |
| 270 | .max_keysize = CHACHA_KEY_SIZE, |
| 271 | .ivsize = XCHACHA_IV_SIZE, |
| 272 | .chunksize = CHACHA_BLOCK_SIZE, |
| 273 | .walksize = 4 * CHACHA_BLOCK_SIZE, |
| 274 | .setkey = chacha20_setkey, |
| 275 | .encrypt = xchacha_neon, |
| 276 | .decrypt = xchacha_neon, |
| 277 | }, { |
| 278 | .base.cra_name = "xchacha12", |
| 279 | .base.cra_driver_name = "xchacha12-neon", |
| 280 | .base.cra_priority = 300, |
| 281 | .base.cra_blocksize = 1, |
| 282 | .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 283 | .base.cra_module = THIS_MODULE, |
| 284 | |
| 285 | .min_keysize = CHACHA_KEY_SIZE, |
| 286 | .max_keysize = CHACHA_KEY_SIZE, |
| 287 | .ivsize = XCHACHA_IV_SIZE, |
| 288 | .chunksize = CHACHA_BLOCK_SIZE, |
| 289 | .walksize = 4 * CHACHA_BLOCK_SIZE, |
| 290 | .setkey = chacha12_setkey, |
| 291 | .encrypt = xchacha_neon, |
| 292 | .decrypt = xchacha_neon, |
| 293 | } |
| 294 | }; |
| 295 | |
| 296 | static int __init chacha_simd_mod_init(void) |
| 297 | { |
| 298 | int err = 0; |
| 299 | |
| 300 | if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER)) { |
| 301 | err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); |
| 302 | if (err) |
| 303 | return err; |
| 304 | } |
| 305 | |
| 306 | if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { |
| 307 | int i; |
| 308 | |
| 309 | switch (read_cpuid_part()) { |
| 310 | case ARM_CPU_PART_CORTEX_A7: |
| 311 | case ARM_CPU_PART_CORTEX_A5: |
| 312 | /* |
| 313 | * The Cortex-A7 and Cortex-A5 do not perform well with |
| 314 | * the NEON implementation but do incredibly with the |
| 315 | * scalar one and use less power. |
| 316 | */ |
| 317 | for (i = 0; i < ARRAY_SIZE(neon_algs); i++) |
| 318 | neon_algs[i].base.cra_priority = 0; |
| 319 | break; |
| 320 | default: |
| 321 | static_branch_enable(&use_neon); |
| 322 | } |
| 323 | |
| 324 | if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER)) { |
| 325 | err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); |
| 326 | if (err) |
| 327 | crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); |
| 328 | } |
| 329 | } |
| 330 | return err; |
| 331 | } |
| 332 | |
| 333 | static void __exit chacha_simd_mod_fini(void) |
| 334 | { |
| 335 | if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER)) { |
| 336 | crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); |
| 337 | if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) |
| 338 | crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); |
| 339 | } |
| 340 | } |
| 341 | |
| 342 | module_init(chacha_simd_mod_init); |
| 343 | module_exit(chacha_simd_mod_fini); |
| 344 | |
| 345 | MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)"); |
| 346 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); |
| 347 | MODULE_LICENSE("GPL v2"); |
| 348 | MODULE_ALIAS_CRYPTO("chacha20"); |
| 349 | MODULE_ALIAS_CRYPTO("chacha20-arm"); |
| 350 | MODULE_ALIAS_CRYPTO("xchacha20"); |
| 351 | MODULE_ALIAS_CRYPTO("xchacha20-arm"); |
| 352 | MODULE_ALIAS_CRYPTO("xchacha12"); |
| 353 | MODULE_ALIAS_CRYPTO("xchacha12-arm"); |
| 354 | #ifdef CONFIG_KERNEL_MODE_NEON |
| 355 | MODULE_ALIAS_CRYPTO("chacha20-neon"); |
| 356 | MODULE_ALIAS_CRYPTO("xchacha20-neon"); |
| 357 | MODULE_ALIAS_CRYPTO("xchacha12-neon"); |
| 358 | #endif |