b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame] | 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
| 2 | From: Ard Biesheuvel <ardb@kernel.org> |
| 3 | Date: Fri, 8 Nov 2019 13:22:20 +0100 |
| 4 | Subject: [PATCH] crypto: x86/poly1305 - unify Poly1305 state struct with |
| 5 | generic code |
| 6 | |
| 7 | commit ad8f5b88383ea685f2b8df2a12ee3e08089a1287 upstream. |
| 8 | |
| 9 | In preparation of exposing a Poly1305 library interface directly from |
| 10 | the accelerated x86 driver, align the state descriptor of the x86 code |
| 11 | with the one used by the generic driver. This is needed to make the |
| 12 | library interface unified between all implementations. |
| 13 | |
| 14 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
| 15 | Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| 16 | Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> |
| 17 | --- |
| 18 | arch/x86/crypto/poly1305_glue.c | 88 ++++++++++-------------------- |
| 19 | crypto/poly1305_generic.c | 6 +- |
| 20 | include/crypto/internal/poly1305.h | 4 +- |
| 21 | include/crypto/poly1305.h | 18 +++--- |
| 22 | 4 files changed, 43 insertions(+), 73 deletions(-) |
| 23 | |
| 24 | --- a/arch/x86/crypto/poly1305_glue.c |
| 25 | +++ b/arch/x86/crypto/poly1305_glue.c |
| 26 | @@ -14,40 +14,14 @@ |
| 27 | #include <linux/module.h> |
| 28 | #include <asm/simd.h> |
| 29 | |
| 30 | -struct poly1305_simd_desc_ctx { |
| 31 | - struct poly1305_desc_ctx base; |
| 32 | - /* derived key u set? */ |
| 33 | - bool uset; |
| 34 | -#ifdef CONFIG_AS_AVX2 |
| 35 | - /* derived keys r^3, r^4 set? */ |
| 36 | - bool wset; |
| 37 | -#endif |
| 38 | - /* derived Poly1305 key r^2 */ |
| 39 | - u32 u[5]; |
| 40 | - /* ... silently appended r^3 and r^4 when using AVX2 */ |
| 41 | -}; |
| 42 | - |
| 43 | asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src, |
| 44 | const u32 *r, unsigned int blocks); |
| 45 | asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r, |
| 46 | unsigned int blocks, const u32 *u); |
| 47 | -#ifdef CONFIG_AS_AVX2 |
| 48 | asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r, |
| 49 | unsigned int blocks, const u32 *u); |
| 50 | -static bool poly1305_use_avx2; |
| 51 | -#endif |
| 52 | |
| 53 | -static int poly1305_simd_init(struct shash_desc *desc) |
| 54 | -{ |
| 55 | - struct poly1305_simd_desc_ctx *sctx = shash_desc_ctx(desc); |
| 56 | - |
| 57 | - sctx->uset = false; |
| 58 | -#ifdef CONFIG_AS_AVX2 |
| 59 | - sctx->wset = false; |
| 60 | -#endif |
| 61 | - |
| 62 | - return crypto_poly1305_init(desc); |
| 63 | -} |
| 64 | +static bool poly1305_use_avx2 __ro_after_init; |
| 65 | |
| 66 | static void poly1305_simd_mult(u32 *a, const u32 *b) |
| 67 | { |
| 68 | @@ -63,53 +37,49 @@ static void poly1305_simd_mult(u32 *a, c |
| 69 | static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx, |
| 70 | const u8 *src, unsigned int srclen) |
| 71 | { |
| 72 | - struct poly1305_simd_desc_ctx *sctx; |
| 73 | unsigned int blocks, datalen; |
| 74 | |
| 75 | - BUILD_BUG_ON(offsetof(struct poly1305_simd_desc_ctx, base)); |
| 76 | - sctx = container_of(dctx, struct poly1305_simd_desc_ctx, base); |
| 77 | - |
| 78 | if (unlikely(!dctx->sset)) { |
| 79 | datalen = crypto_poly1305_setdesckey(dctx, src, srclen); |
| 80 | src += srclen - datalen; |
| 81 | srclen = datalen; |
| 82 | } |
| 83 | |
| 84 | -#ifdef CONFIG_AS_AVX2 |
| 85 | - if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) { |
| 86 | - if (unlikely(!sctx->wset)) { |
| 87 | - if (!sctx->uset) { |
| 88 | - memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); |
| 89 | - poly1305_simd_mult(sctx->u, dctx->r.r); |
| 90 | - sctx->uset = true; |
| 91 | + if (IS_ENABLED(CONFIG_AS_AVX2) && |
| 92 | + poly1305_use_avx2 && |
| 93 | + srclen >= POLY1305_BLOCK_SIZE * 4) { |
| 94 | + if (unlikely(dctx->rset < 4)) { |
| 95 | + if (dctx->rset < 2) { |
| 96 | + dctx->r[1] = dctx->r[0]; |
| 97 | + poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r); |
| 98 | } |
| 99 | - memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u)); |
| 100 | - poly1305_simd_mult(sctx->u + 5, dctx->r.r); |
| 101 | - memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u)); |
| 102 | - poly1305_simd_mult(sctx->u + 10, dctx->r.r); |
| 103 | - sctx->wset = true; |
| 104 | + dctx->r[2] = dctx->r[1]; |
| 105 | + poly1305_simd_mult(dctx->r[2].r, dctx->r[0].r); |
| 106 | + dctx->r[3] = dctx->r[2]; |
| 107 | + poly1305_simd_mult(dctx->r[3].r, dctx->r[0].r); |
| 108 | + dctx->rset = 4; |
| 109 | } |
| 110 | blocks = srclen / (POLY1305_BLOCK_SIZE * 4); |
| 111 | - poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks, |
| 112 | - sctx->u); |
| 113 | + poly1305_4block_avx2(dctx->h.h, src, dctx->r[0].r, blocks, |
| 114 | + dctx->r[1].r); |
| 115 | src += POLY1305_BLOCK_SIZE * 4 * blocks; |
| 116 | srclen -= POLY1305_BLOCK_SIZE * 4 * blocks; |
| 117 | } |
| 118 | -#endif |
| 119 | + |
| 120 | if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) { |
| 121 | - if (unlikely(!sctx->uset)) { |
| 122 | - memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); |
| 123 | - poly1305_simd_mult(sctx->u, dctx->r.r); |
| 124 | - sctx->uset = true; |
| 125 | + if (unlikely(dctx->rset < 2)) { |
| 126 | + dctx->r[1] = dctx->r[0]; |
| 127 | + poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r); |
| 128 | + dctx->rset = 2; |
| 129 | } |
| 130 | blocks = srclen / (POLY1305_BLOCK_SIZE * 2); |
| 131 | - poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks, |
| 132 | - sctx->u); |
| 133 | + poly1305_2block_sse2(dctx->h.h, src, dctx->r[0].r, |
| 134 | + blocks, dctx->r[1].r); |
| 135 | src += POLY1305_BLOCK_SIZE * 2 * blocks; |
| 136 | srclen -= POLY1305_BLOCK_SIZE * 2 * blocks; |
| 137 | } |
| 138 | if (srclen >= POLY1305_BLOCK_SIZE) { |
| 139 | - poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1); |
| 140 | + poly1305_block_sse2(dctx->h.h, src, dctx->r[0].r, 1); |
| 141 | srclen -= POLY1305_BLOCK_SIZE; |
| 142 | } |
| 143 | return srclen; |
| 144 | @@ -159,10 +129,10 @@ static int poly1305_simd_update(struct s |
| 145 | |
| 146 | static struct shash_alg alg = { |
| 147 | .digestsize = POLY1305_DIGEST_SIZE, |
| 148 | - .init = poly1305_simd_init, |
| 149 | + .init = crypto_poly1305_init, |
| 150 | .update = poly1305_simd_update, |
| 151 | .final = crypto_poly1305_final, |
| 152 | - .descsize = sizeof(struct poly1305_simd_desc_ctx), |
| 153 | + .descsize = sizeof(struct poly1305_desc_ctx), |
| 154 | .base = { |
| 155 | .cra_name = "poly1305", |
| 156 | .cra_driver_name = "poly1305-simd", |
| 157 | @@ -177,14 +147,14 @@ static int __init poly1305_simd_mod_init |
| 158 | if (!boot_cpu_has(X86_FEATURE_XMM2)) |
| 159 | return -ENODEV; |
| 160 | |
| 161 | -#ifdef CONFIG_AS_AVX2 |
| 162 | - poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && |
| 163 | + poly1305_use_avx2 = IS_ENABLED(CONFIG_AS_AVX2) && |
| 164 | + boot_cpu_has(X86_FEATURE_AVX) && |
| 165 | boot_cpu_has(X86_FEATURE_AVX2) && |
| 166 | cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); |
| 167 | - alg.descsize = sizeof(struct poly1305_simd_desc_ctx); |
| 168 | + alg.descsize = sizeof(struct poly1305_desc_ctx) + 5 * sizeof(u32); |
| 169 | if (poly1305_use_avx2) |
| 170 | alg.descsize += 10 * sizeof(u32); |
| 171 | -#endif |
| 172 | + |
| 173 | return crypto_register_shash(&alg); |
| 174 | } |
| 175 | |
| 176 | --- a/crypto/poly1305_generic.c |
| 177 | +++ b/crypto/poly1305_generic.c |
| 178 | @@ -25,7 +25,7 @@ int crypto_poly1305_init(struct shash_de |
| 179 | |
| 180 | poly1305_core_init(&dctx->h); |
| 181 | dctx->buflen = 0; |
| 182 | - dctx->rset = false; |
| 183 | + dctx->rset = 0; |
| 184 | dctx->sset = false; |
| 185 | |
| 186 | return 0; |
| 187 | @@ -43,7 +43,7 @@ static void poly1305_blocks(struct poly1 |
| 188 | srclen = datalen; |
| 189 | } |
| 190 | |
| 191 | - poly1305_core_blocks(&dctx->h, &dctx->r, src, |
| 192 | + poly1305_core_blocks(&dctx->h, dctx->r, src, |
| 193 | srclen / POLY1305_BLOCK_SIZE, 1); |
| 194 | } |
| 195 | |
| 196 | @@ -95,7 +95,7 @@ int crypto_poly1305_final(struct shash_d |
| 197 | dctx->buf[dctx->buflen++] = 1; |
| 198 | memset(dctx->buf + dctx->buflen, 0, |
| 199 | POLY1305_BLOCK_SIZE - dctx->buflen); |
| 200 | - poly1305_core_blocks(&dctx->h, &dctx->r, dctx->buf, 1, 0); |
| 201 | + poly1305_core_blocks(&dctx->h, dctx->r, dctx->buf, 1, 0); |
| 202 | } |
| 203 | |
| 204 | poly1305_core_emit(&dctx->h, digest); |
| 205 | --- a/include/crypto/internal/poly1305.h |
| 206 | +++ b/include/crypto/internal/poly1305.h |
| 207 | @@ -46,10 +46,10 @@ unsigned int crypto_poly1305_setdesckey( |
| 208 | { |
| 209 | if (!dctx->sset) { |
| 210 | if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { |
| 211 | - poly1305_core_setkey(&dctx->r, src); |
| 212 | + poly1305_core_setkey(dctx->r, src); |
| 213 | src += POLY1305_BLOCK_SIZE; |
| 214 | srclen -= POLY1305_BLOCK_SIZE; |
| 215 | - dctx->rset = true; |
| 216 | + dctx->rset = 1; |
| 217 | } |
| 218 | if (srclen >= POLY1305_BLOCK_SIZE) { |
| 219 | dctx->s[0] = get_unaligned_le32(src + 0); |
| 220 | --- a/include/crypto/poly1305.h |
| 221 | +++ b/include/crypto/poly1305.h |
| 222 | @@ -22,20 +22,20 @@ struct poly1305_state { |
| 223 | }; |
| 224 | |
| 225 | struct poly1305_desc_ctx { |
| 226 | - /* key */ |
| 227 | - struct poly1305_key r; |
| 228 | - /* finalize key */ |
| 229 | - u32 s[4]; |
| 230 | - /* accumulator */ |
| 231 | - struct poly1305_state h; |
| 232 | /* partial buffer */ |
| 233 | u8 buf[POLY1305_BLOCK_SIZE]; |
| 234 | /* bytes used in partial buffer */ |
| 235 | unsigned int buflen; |
| 236 | - /* r key has been set */ |
| 237 | - bool rset; |
| 238 | - /* s key has been set */ |
| 239 | + /* how many keys have been set in r[] */ |
| 240 | + unsigned short rset; |
| 241 | + /* whether s[] has been set */ |
| 242 | bool sset; |
| 243 | + /* finalize key */ |
| 244 | + u32 s[4]; |
| 245 | + /* accumulator */ |
| 246 | + struct poly1305_state h; |
| 247 | + /* key */ |
| 248 | + struct poly1305_key r[1]; |
| 249 | }; |
| 250 | |
| 251 | #endif |