| rjw | 1f88458 | 2022-01-06 17:20:42 +0800 | [diff] [blame] | 1 | /* | 
|  | 2 | * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions | 
|  | 3 | * | 
|  | 4 | * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org> | 
|  | 5 | * | 
|  | 6 | * This program is free software; you can redistribute it and/or modify | 
|  | 7 | * it under the terms of the GNU General Public License version 2 as | 
|  | 8 | * published by the Free Software Foundation. | 
|  | 9 | * | 
|  | 10 | * Based on: | 
|  | 11 | * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code | 
|  | 12 | * | 
|  | 13 | * Copyright (C) 2015 Martin Willi | 
|  | 14 | * | 
|  | 15 | * This program is free software; you can redistribute it and/or modify | 
|  | 16 | * it under the terms of the GNU General Public License as published by | 
|  | 17 | * the Free Software Foundation; either version 2 of the License, or | 
|  | 18 | * (at your option) any later version. | 
|  | 19 | */ | 
|  | 20 |  | 
|  | 21 | #include <crypto/algapi.h> | 
|  | 22 | #include <crypto/chacha20.h> | 
|  | 23 | #include <crypto/internal/skcipher.h> | 
|  | 24 | #include <linux/kernel.h> | 
|  | 25 | #include <linux/module.h> | 
|  | 26 |  | 
|  | 27 | #include <asm/hwcap.h> | 
|  | 28 | #include <asm/neon.h> | 
|  | 29 | #include <asm/simd.h> | 
|  | 30 |  | 
|  | 31 | asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src); | 
|  | 32 | asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src); | 
|  | 33 |  | 
|  | 34 | static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, | 
|  | 35 | unsigned int bytes) | 
|  | 36 | { | 
|  | 37 | u8 buf[CHACHA20_BLOCK_SIZE]; | 
|  | 38 |  | 
|  | 39 | while (bytes >= CHACHA20_BLOCK_SIZE * 4) { | 
|  | 40 | chacha20_4block_xor_neon(state, dst, src); | 
|  | 41 | bytes -= CHACHA20_BLOCK_SIZE * 4; | 
|  | 42 | src += CHACHA20_BLOCK_SIZE * 4; | 
|  | 43 | dst += CHACHA20_BLOCK_SIZE * 4; | 
|  | 44 | state[12] += 4; | 
|  | 45 | } | 
|  | 46 | while (bytes >= CHACHA20_BLOCK_SIZE) { | 
|  | 47 | chacha20_block_xor_neon(state, dst, src); | 
|  | 48 | bytes -= CHACHA20_BLOCK_SIZE; | 
|  | 49 | src += CHACHA20_BLOCK_SIZE; | 
|  | 50 | dst += CHACHA20_BLOCK_SIZE; | 
|  | 51 | state[12]++; | 
|  | 52 | } | 
|  | 53 | if (bytes) { | 
|  | 54 | memcpy(buf, src, bytes); | 
|  | 55 | chacha20_block_xor_neon(state, buf, buf); | 
|  | 56 | memcpy(dst, buf, bytes); | 
|  | 57 | } | 
|  | 58 | } | 
|  | 59 |  | 
|  | 60 | static int chacha20_neon(struct skcipher_request *req) | 
|  | 61 | { | 
|  | 62 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); | 
|  | 63 | struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); | 
|  | 64 | struct skcipher_walk walk; | 
|  | 65 | u32 state[16]; | 
|  | 66 | int err; | 
|  | 67 |  | 
|  | 68 | if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd()) | 
|  | 69 | return crypto_chacha20_crypt(req); | 
|  | 70 |  | 
|  | 71 | err = skcipher_walk_virt(&walk, req, true); | 
|  | 72 |  | 
|  | 73 | crypto_chacha20_init(state, ctx, walk.iv); | 
|  | 74 |  | 
|  | 75 | kernel_neon_begin(); | 
|  | 76 | while (walk.nbytes > 0) { | 
|  | 77 | unsigned int nbytes = walk.nbytes; | 
|  | 78 |  | 
|  | 79 | if (nbytes < walk.total) | 
|  | 80 | nbytes = round_down(nbytes, walk.stride); | 
|  | 81 |  | 
|  | 82 | chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, | 
|  | 83 | nbytes); | 
|  | 84 | err = skcipher_walk_done(&walk, walk.nbytes - nbytes); | 
|  | 85 | } | 
|  | 86 | kernel_neon_end(); | 
|  | 87 |  | 
|  | 88 | return err; | 
|  | 89 | } | 
|  | 90 |  | 
|  | 91 | static struct skcipher_alg alg = { | 
|  | 92 | .base.cra_name		= "chacha20", | 
|  | 93 | .base.cra_driver_name	= "chacha20-neon", | 
|  | 94 | .base.cra_priority	= 300, | 
|  | 95 | .base.cra_blocksize	= 1, | 
|  | 96 | .base.cra_ctxsize	= sizeof(struct chacha20_ctx), | 
|  | 97 | .base.cra_module	= THIS_MODULE, | 
|  | 98 |  | 
|  | 99 | .min_keysize		= CHACHA20_KEY_SIZE, | 
|  | 100 | .max_keysize		= CHACHA20_KEY_SIZE, | 
|  | 101 | .ivsize			= CHACHA20_IV_SIZE, | 
|  | 102 | .chunksize		= CHACHA20_BLOCK_SIZE, | 
|  | 103 | .walksize		= 4 * CHACHA20_BLOCK_SIZE, | 
|  | 104 | .setkey			= crypto_chacha20_setkey, | 
|  | 105 | .encrypt		= chacha20_neon, | 
|  | 106 | .decrypt		= chacha20_neon, | 
|  | 107 | }; | 
|  | 108 |  | 
|  | 109 | static int __init chacha20_simd_mod_init(void) | 
|  | 110 | { | 
|  | 111 | if (!(elf_hwcap & HWCAP_NEON)) | 
|  | 112 | return -ENODEV; | 
|  | 113 |  | 
|  | 114 | return crypto_register_skcipher(&alg); | 
|  | 115 | } | 
|  | 116 |  | 
|  | 117 | static void __exit chacha20_simd_mod_fini(void) | 
|  | 118 | { | 
|  | 119 | crypto_unregister_skcipher(&alg); | 
|  | 120 | } | 
|  | 121 |  | 
|  | 122 | module_init(chacha20_simd_mod_init); | 
|  | 123 | module_exit(chacha20_simd_mod_fini); | 
|  | 124 |  | 
|  | 125 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | 
|  | 126 | MODULE_LICENSE("GPL v2"); | 
|  | 127 | MODULE_ALIAS_CRYPTO("chacha20"); |