Blame - ap/lib/libssl/openssl-1.1.1o/crypto/ec/ec_mult.c - R306

blob: 9a1e3974ed9e298649ff2cc53abd4b582e24c057 [file] [log] [blame]

yuezonghe	824eb0c	2024-06-27 02:32:26 -0700	[diff] [blame]	1	/*
				2	* Copyright 2001-2020 The OpenSSL Project Authors. All Rights Reserved.
				3	* Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved
				4	*
				5	* Licensed under the OpenSSL license (the "License"). You may not use
				6	* this file except in compliance with the License. You can obtain a copy
				7	* in the file LICENSE in the source distribution or at
				8	* https://www.openssl.org/source/license.html
				9	*/
				10
				11	#include <string.h>
				12	#include <openssl/err.h>
				13
				14	#include "internal/cryptlib.h"
				15	#include "crypto/bn.h"
				16	#include "ec_local.h"
				17	#include "internal/refcount.h"
				18
				19	/*
				20	* This file implements the wNAF-based interleaving multi-exponentiation method
				21	* Formerly at:
				22	* http://www.informatik.tu-darmstadt.de/TI/Mitarbeiter/moeller.html#multiexp
				23	* You might now find it here:
				24	* http://link.springer.com/chapter/10.1007%2F3-540-45537-X_13
				25	* http://www.bmoeller.de/pdf/TI-01-08.multiexp.pdf
				26	* For multiplication with precomputation, we use wNAF splitting, formerly at:
				27	* http://www.informatik.tu-darmstadt.de/TI/Mitarbeiter/moeller.html#fastexp
				28	*/
				29
				30	/* structure for precomputed multiples of the generator */
				31	struct ec_pre_comp_st {
				32	const EC_GROUP group; / parent EC_GROUP object */
				33	size_t blocksize; /* block size for wNAF splitting */
				34	size_t numblocks; /* max. number of blocks for which we have
				35	* precomputation */
				36	size_t w; /* window size */
				37	EC_POINT *points; / array with pre-calculated multiples of
				38	* generator: 'num' pointers to EC_POINT
				39	* objects followed by a NULL */
				40	size_t num; /* numblocks * 2^(w-1) */
				41	CRYPTO_REF_COUNT references;
				42	CRYPTO_RWLOCK *lock;
				43	};
				44
				45	static EC_PRE_COMP ec_pre_comp_new(const EC_GROUP group)
				46	{
				47	EC_PRE_COMP *ret = NULL;
				48
				49	if (!group)
				50	return NULL;
				51
				52	ret = OPENSSL_zalloc(sizeof(*ret));
				53	if (ret == NULL) {
				54	ECerr(EC_F_EC_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
				55	return ret;
				56	}
				57
				58	ret->group = group;
				59	ret->blocksize = 8; /* default */
				60	ret->w = 4; /* default */
				61	ret->references = 1;
				62
				63	ret->lock = CRYPTO_THREAD_lock_new();
				64	if (ret->lock == NULL) {
				65	ECerr(EC_F_EC_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
				66	OPENSSL_free(ret);
				67	return NULL;
				68	}
				69	return ret;
				70	}
				71
				72	EC_PRE_COMP EC_ec_pre_comp_dup(EC_PRE_COMP pre)
				73	{
				74	int i;
				75	if (pre != NULL)
				76	CRYPTO_UP_REF(&pre->references, &i, pre->lock);
				77	return pre;
				78	}
				79
				80	void EC_ec_pre_comp_free(EC_PRE_COMP *pre)
				81	{
				82	int i;
				83
				84	if (pre == NULL)
				85	return;
				86
				87	CRYPTO_DOWN_REF(&pre->references, &i, pre->lock);
				88	REF_PRINT_COUNT("EC_ec", pre);
				89	if (i > 0)
				90	return;
				91	REF_ASSERT_ISNT(i < 0);
				92
				93	if (pre->points != NULL) {
				94	EC_POINT **pts;
				95
				96	for (pts = pre->points; *pts != NULL; pts++)
				97	EC_POINT_free(*pts);
				98	OPENSSL_free(pre->points);
				99	}
				100	CRYPTO_THREAD_lock_free(pre->lock);
				101	OPENSSL_free(pre);
				102	}
				103
				104	#define EC_POINT_BN_set_flags(P, flags) do { \
				105	BN_set_flags((P)->X, (flags)); \
				106	BN_set_flags((P)->Y, (flags)); \
				107	BN_set_flags((P)->Z, (flags)); \
				108	} while(0)
				109
				110	/*-
				111	* This functions computes a single point multiplication over the EC group,
				112	* using, at a high level, a Montgomery ladder with conditional swaps, with
				113	* various timing attack defenses.
				114	*
				115	* It performs either a fixed point multiplication
				116	* (scalar * generator)
				117	* when point is NULL, or a variable point multiplication
				118	* (scalar * point)
				119	* when point is not NULL.
				120	*
				121	* `scalar` cannot be NULL and should be in the range [0,n) otherwise all
				122	* constant time bets are off (where n is the cardinality of the EC group).
				123	*
				124	* This function expects `group->order` and `group->cardinality` to be well
				125	* defined and non-zero: it fails with an error code otherwise.
				126	*
				127	* NB: This says nothing about the constant-timeness of the ladder step
				128	* implementation (i.e., the default implementation is based on EC_POINT_add and
				129	* EC_POINT_dbl, which of course are not constant time themselves) or the
				130	* underlying multiprecision arithmetic.
				131	*
				132	* The product is stored in `r`.
				133	*
				134	* This is an internal function: callers are in charge of ensuring that the
				135	* input parameters `group`, `r`, `scalar` and `ctx` are not NULL.
				136	*
				137	* Returns 1 on success, 0 otherwise.
				138	*/
				139	int ec_scalar_mul_ladder(const EC_GROUP group, EC_POINT r,
				140	const BIGNUM scalar, const EC_POINT point,
				141	BN_CTX *ctx)
				142	{
				143	int i, cardinality_bits, group_top, kbit, pbit, Z_is_one;
				144	EC_POINT *p = NULL;
				145	EC_POINT *s = NULL;
				146	BIGNUM *k = NULL;
				147	BIGNUM *lambda = NULL;
				148	BIGNUM *cardinality = NULL;
				149	int ret = 0;
				150
				151	/* early exit if the input point is the point at infinity */
				152	if (point != NULL && EC_POINT_is_at_infinity(group, point))
				153	return EC_POINT_set_to_infinity(group, r);
				154
				155	if (BN_is_zero(group->order)) {
				156	ECerr(EC_F_EC_SCALAR_MUL_LADDER, EC_R_UNKNOWN_ORDER);
				157	return 0;
				158	}
				159	if (BN_is_zero(group->cofactor)) {
				160	ECerr(EC_F_EC_SCALAR_MUL_LADDER, EC_R_UNKNOWN_COFACTOR);
				161	return 0;
				162	}
				163
				164	BN_CTX_start(ctx);
				165
				166	if (((p = EC_POINT_new(group)) == NULL)
				167	\|\| ((s = EC_POINT_new(group)) == NULL)) {
				168	ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_MALLOC_FAILURE);
				169	goto err;
				170	}
				171
				172	if (point == NULL) {
				173	if (!EC_POINT_copy(p, group->generator)) {
				174	ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_EC_LIB);
				175	goto err;
				176	}
				177	} else {
				178	if (!EC_POINT_copy(p, point)) {
				179	ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_EC_LIB);
				180	goto err;
				181	}
				182	}
				183
				184	EC_POINT_BN_set_flags(p, BN_FLG_CONSTTIME);
				185	EC_POINT_BN_set_flags(r, BN_FLG_CONSTTIME);
				186	EC_POINT_BN_set_flags(s, BN_FLG_CONSTTIME);
				187
				188	cardinality = BN_CTX_get(ctx);
				189	lambda = BN_CTX_get(ctx);
				190	k = BN_CTX_get(ctx);
				191	if (k == NULL) {
				192	ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_MALLOC_FAILURE);
				193	goto err;
				194	}
				195
				196	if (!BN_mul(cardinality, group->order, group->cofactor, ctx)) {
				197	ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_BN_LIB);
				198	goto err;
				199	}
				200
				201	/*
				202	* Group cardinalities are often on a word boundary.
				203	* So when we pad the scalar, some timing diff might
				204	* pop if it needs to be expanded due to carries.
				205	* So expand ahead of time.
				206	*/
				207	cardinality_bits = BN_num_bits(cardinality);
				208	group_top = bn_get_top(cardinality);
				209	if ((bn_wexpand(k, group_top + 2) == NULL)
				210	\|\| (bn_wexpand(lambda, group_top + 2) == NULL)) {
				211	ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_BN_LIB);
				212	goto err;
				213	}
				214
				215	if (!BN_copy(k, scalar)) {
				216	ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_BN_LIB);
				217	goto err;
				218	}
				219
				220	BN_set_flags(k, BN_FLG_CONSTTIME);
				221
				222	if ((BN_num_bits(k) > cardinality_bits) \|\| (BN_is_negative(k))) {
				223	/*-
				224	* this is an unusual input, and we don't guarantee
				225	* constant-timeness
				226	*/
				227	if (!BN_nnmod(k, k, cardinality, ctx)) {
				228	ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_BN_LIB);
				229	goto err;
				230	}
				231	}
				232
				233	if (!BN_add(lambda, k, cardinality)) {
				234	ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_BN_LIB);
				235	goto err;
				236	}
				237	BN_set_flags(lambda, BN_FLG_CONSTTIME);
				238	if (!BN_add(k, lambda, cardinality)) {
				239	ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_BN_LIB);
				240	goto err;
				241	}
				242	/*
				243	* lambda := scalar + cardinality
				244	* k := scalar + 2*cardinality
				245	*/
				246	kbit = BN_is_bit_set(lambda, cardinality_bits);
				247	BN_consttime_swap(kbit, k, lambda, group_top + 2);
				248
				249	group_top = bn_get_top(group->field);
				250	if ((bn_wexpand(s->X, group_top) == NULL)
				251	\|\| (bn_wexpand(s->Y, group_top) == NULL)
				252	\|\| (bn_wexpand(s->Z, group_top) == NULL)
				253	\|\| (bn_wexpand(r->X, group_top) == NULL)
				254	\|\| (bn_wexpand(r->Y, group_top) == NULL)
				255	\|\| (bn_wexpand(r->Z, group_top) == NULL)
				256	\|\| (bn_wexpand(p->X, group_top) == NULL)
				257	\|\| (bn_wexpand(p->Y, group_top) == NULL)
				258	\|\| (bn_wexpand(p->Z, group_top) == NULL)) {
				259	ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_BN_LIB);
				260	goto err;
				261	}
				262
				263	/* ensure input point is in affine coords for ladder step efficiency */
				264	if (!p->Z_is_one && !EC_POINT_make_affine(group, p, ctx)) {
				265	ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_EC_LIB);
				266	goto err;
				267	}
				268
				269	/* Initialize the Montgomery ladder */
				270	if (!ec_point_ladder_pre(group, r, s, p, ctx)) {
				271	ECerr(EC_F_EC_SCALAR_MUL_LADDER, EC_R_LADDER_PRE_FAILURE);
				272	goto err;
				273	}
				274
				275	/* top bit is a 1, in a fixed pos */
				276	pbit = 1;
				277
				278	#define EC_POINT_CSWAP(c, a, b, w, t) do { \
				279	BN_consttime_swap(c, (a)->X, (b)->X, w); \
				280	BN_consttime_swap(c, (a)->Y, (b)->Y, w); \
				281	BN_consttime_swap(c, (a)->Z, (b)->Z, w); \
				282	t = ((a)->Z_is_one ^ (b)->Z_is_one) & (c); \
				283	(a)->Z_is_one ^= (t); \
				284	(b)->Z_is_one ^= (t); \
				285	} while(0)
				286
				287	/*-
				288	* The ladder step, with branches, is
				289	*
				290	* k[i] == 0: S = add(R, S), R = dbl(R)
				291	* k[i] == 1: R = add(S, R), S = dbl(S)
				292	*
				293	* Swapping R, S conditionally on k[i] leaves you with state
				294	*
				295	* k[i] == 0: T, U = R, S
				296	* k[i] == 1: T, U = S, R
				297	*
				298	* Then perform the ECC ops.
				299	*
				300	* U = add(T, U)
				301	* T = dbl(T)
				302	*
				303	* Which leaves you with state
				304	*
				305	* k[i] == 0: U = add(R, S), T = dbl(R)
				306	* k[i] == 1: U = add(S, R), T = dbl(S)
				307	*
				308	* Swapping T, U conditionally on k[i] leaves you with state
				309	*
				310	* k[i] == 0: R, S = T, U
				311	* k[i] == 1: R, S = U, T
				312	*
				313	* Which leaves you with state
				314	*
				315	* k[i] == 0: S = add(R, S), R = dbl(R)
				316	* k[i] == 1: R = add(S, R), S = dbl(S)
				317	*
				318	* So we get the same logic, but instead of a branch it's a
				319	* conditional swap, followed by ECC ops, then another conditional swap.
				320	*
				321	* Optimization: The end of iteration i and start of i-1 looks like
				322	*
				323	* ...
				324	* CSWAP(k[i], R, S)
				325	* ECC
				326	* CSWAP(k[i], R, S)
				327	* (next iteration)
				328	* CSWAP(k[i-1], R, S)
				329	* ECC
				330	* CSWAP(k[i-1], R, S)
				331	* ...
				332	*
				333	* So instead of two contiguous swaps, you can merge the condition
				334	* bits and do a single swap.
				335	*
				336	* k[i] k[i-1] Outcome
				337	* 0 0 No Swap
				338	* 0 1 Swap
				339	* 1 0 Swap
				340	* 1 1 No Swap
				341	*
				342	* This is XOR. pbit tracks the previous bit of k.
				343	*/
				344
				345	for (i = cardinality_bits - 1; i >= 0; i--) {
				346	kbit = BN_is_bit_set(k, i) ^ pbit;
				347	EC_POINT_CSWAP(kbit, r, s, group_top, Z_is_one);
				348
				349	/* Perform a single step of the Montgomery ladder */
				350	if (!ec_point_ladder_step(group, r, s, p, ctx)) {
				351	ECerr(EC_F_EC_SCALAR_MUL_LADDER, EC_R_LADDER_STEP_FAILURE);
				352	goto err;
				353	}
				354	/*
				355	* pbit logic merges this cswap with that of the
				356	* next iteration
				357	*/
				358	pbit ^= kbit;
				359	}
				360	/* one final cswap to move the right value into r */
				361	EC_POINT_CSWAP(pbit, r, s, group_top, Z_is_one);
				362	#undef EC_POINT_CSWAP
				363
				364	/* Finalize ladder (and recover full point coordinates) */
				365	if (!ec_point_ladder_post(group, r, s, p, ctx)) {
				366	ECerr(EC_F_EC_SCALAR_MUL_LADDER, EC_R_LADDER_POST_FAILURE);
				367	goto err;
				368	}
				369
				370	ret = 1;
				371
				372	err:
				373	EC_POINT_free(p);
				374	EC_POINT_clear_free(s);
				375	BN_CTX_end(ctx);
				376
				377	return ret;
				378	}
				379
				380	#undef EC_POINT_BN_set_flags
				381
				382	/*
				383	* TODO: table should be optimised for the wNAF-based implementation,
				384	* sometimes smaller windows will give better performance (thus the
				385	* boundaries should be increased)
				386	*/
				387	#define EC_window_bits_for_scalar_size(b) \
				388	((size_t) \
				389	((b) >= 2000 ? 6 : \
				390	(b) >= 800 ? 5 : \
				391	(b) >= 300 ? 4 : \
				392	(b) >= 70 ? 3 : \
				393	(b) >= 20 ? 2 : \
				394	1))
				395
				396	/*-
				397	* Compute
				398	* \sum scalars[i]*points[i],
				399	* also including
				400	* scalar*generator
				401	* in the addition if scalar != NULL
				402	*/
				403	int ec_wNAF_mul(const EC_GROUP group, EC_POINT r, const BIGNUM *scalar,
				404	size_t num, const EC_POINT points[], const BIGNUM scalars[],
				405	BN_CTX *ctx)
				406	{
				407	const EC_POINT *generator = NULL;
				408	EC_POINT *tmp = NULL;
				409	size_t totalnum;
				410	size_t blocksize = 0, numblocks = 0; /* for wNAF splitting */
				411	size_t pre_points_per_block = 0;
				412	size_t i, j;
				413	int k;
				414	int r_is_inverted = 0;
				415	int r_is_at_infinity = 1;
				416	size_t wsize = NULL; / individual window sizes */
				417	signed char *wNAF = NULL; / individual wNAFs */
				418	size_t *wNAF_len = NULL;
				419	size_t max_len = 0;
				420	size_t num_val;
				421	EC_POINT *val = NULL; / precomputation */
				422	EC_POINT **v;
				423	EC_POINT **val_sub = NULL; / pointers to sub-arrays of 'val' or
				424	* 'pre_comp->points' */
				425	const EC_PRE_COMP *pre_comp = NULL;
				426	int num_scalar = 0; /* flag: will be set to 1 if 'scalar' must be
				427	* treated like other scalars, i.e.
				428	* precomputation is not available */
				429	int ret = 0;
				430
				431	if (!BN_is_zero(group->order) && !BN_is_zero(group->cofactor)) {
				432	/*-
				433	* Handle the common cases where the scalar is secret, enforcing a
				434	* scalar multiplication implementation based on a Montgomery ladder,
				435	* with various timing attack defenses.
				436	*/
				437	if ((scalar != group->order) && (scalar != NULL) && (num == 0)) {
				438	/*-
				439	* In this case we want to compute scalar * GeneratorPoint: this
				440	* codepath is reached most prominently by (ephemeral) key
				441	* generation of EC cryptosystems (i.e. ECDSA keygen and sign setup,
				442	* ECDH keygen/first half), where the scalar is always secret. This
				443	* is why we ignore if BN_FLG_CONSTTIME is actually set and we
				444	* always call the ladder version.
				445	*/
				446	return ec_scalar_mul_ladder(group, r, scalar, NULL, ctx);
				447	}
				448	if ((scalar == NULL) && (num == 1) && (scalars[0] != group->order)) {
				449	/*-
				450	* In this case we want to compute scalar * VariablePoint: this
				451	* codepath is reached most prominently by the second half of ECDH,
				452	* where the secret scalar is multiplied by the peer's public point.
				453	* To protect the secret scalar, we ignore if BN_FLG_CONSTTIME is
				454	* actually set and we always call the ladder version.
				455	*/
				456	return ec_scalar_mul_ladder(group, r, scalars[0], points[0], ctx);
				457	}
				458	}
				459
				460	if (scalar != NULL) {
				461	generator = EC_GROUP_get0_generator(group);
				462	if (generator == NULL) {
				463	ECerr(EC_F_EC_WNAF_MUL, EC_R_UNDEFINED_GENERATOR);
				464	goto err;
				465	}
				466
				467	/* look if we can use precomputed multiples of generator */
				468
				469	pre_comp = group->pre_comp.ec;
				470	if (pre_comp && pre_comp->numblocks
				471	&& (EC_POINT_cmp(group, generator, pre_comp->points[0], ctx) ==
				472	0)) {
				473	blocksize = pre_comp->blocksize;
				474
				475	/*
				476	* determine maximum number of blocks that wNAF splitting may
				477	* yield (NB: maximum wNAF length is bit length plus one)
				478	*/
				479	numblocks = (BN_num_bits(scalar) / blocksize) + 1;
				480
				481	/*
				482	* we cannot use more blocks than we have precomputation for
				483	*/
				484	if (numblocks > pre_comp->numblocks)
				485	numblocks = pre_comp->numblocks;
				486
				487	pre_points_per_block = (size_t)1 << (pre_comp->w - 1);
				488
				489	/* check that pre_comp looks sane */
				490	if (pre_comp->num != (pre_comp->numblocks * pre_points_per_block)) {
				491	ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR);
				492	goto err;
				493	}
				494	} else {
				495	/* can't use precomputation */
				496	pre_comp = NULL;
				497	numblocks = 1;
				498	num_scalar = 1; /* treat 'scalar' like 'num'-th element of
				499	* 'scalars' */
				500	}
				501	}
				502
				503	totalnum = num + numblocks;
				504
				505	wsize = OPENSSL_malloc(totalnum * sizeof(wsize[0]));
				506	wNAF_len = OPENSSL_malloc(totalnum * sizeof(wNAF_len[0]));
				507	/* include space for pivot */
				508	wNAF = OPENSSL_malloc((totalnum + 1) * sizeof(wNAF[0]));
				509	val_sub = OPENSSL_malloc(totalnum * sizeof(val_sub[0]));
				510
				511	/* Ensure wNAF is initialised in case we end up going to err */
				512	if (wNAF != NULL)
				513	wNAF[0] = NULL; /* preliminary pivot */
				514
				515	if (wsize == NULL \|\| wNAF_len == NULL \|\| wNAF == NULL \|\| val_sub == NULL) {
				516	ECerr(EC_F_EC_WNAF_MUL, ERR_R_MALLOC_FAILURE);
				517	goto err;
				518	}
				519
				520	/*
				521	* num_val will be the total number of temporarily precomputed points
				522	*/
				523	num_val = 0;
				524
				525	for (i = 0; i < num + num_scalar; i++) {
				526	size_t bits;
				527
				528	bits = i < num ? BN_num_bits(scalars[i]) : BN_num_bits(scalar);
				529	wsize[i] = EC_window_bits_for_scalar_size(bits);
				530	num_val += (size_t)1 << (wsize[i] - 1);
				531	wNAF[i + 1] = NULL; /* make sure we always have a pivot */
				532	wNAF[i] =
				533	bn_compute_wNAF((i < num ? scalars[i] : scalar), wsize[i],
				534	&wNAF_len[i]);
				535	if (wNAF[i] == NULL)
				536	goto err;
				537	if (wNAF_len[i] > max_len)
				538	max_len = wNAF_len[i];
				539	}
				540
				541	if (numblocks) {
				542	/* we go here iff scalar != NULL */
				543
				544	if (pre_comp == NULL) {
				545	if (num_scalar != 1) {
				546	ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR);
				547	goto err;
				548	}
				549	/* we have already generated a wNAF for 'scalar' */
				550	} else {
				551	signed char *tmp_wNAF = NULL;
				552	size_t tmp_len = 0;
				553
				554	if (num_scalar != 0) {
				555	ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR);
				556	goto err;
				557	}
				558
				559	/*
				560	* use the window size for which we have precomputation
				561	*/
				562	wsize[num] = pre_comp->w;
				563	tmp_wNAF = bn_compute_wNAF(scalar, wsize[num], &tmp_len);
				564	if (!tmp_wNAF)
				565	goto err;
				566
				567	if (tmp_len <= max_len) {
				568	/*
				569	* One of the other wNAFs is at least as long as the wNAF
				570	* belonging to the generator, so wNAF splitting will not buy
				571	* us anything.
				572	*/
				573
				574	numblocks = 1;
				575	totalnum = num + 1; /* don't use wNAF splitting */
				576	wNAF[num] = tmp_wNAF;
				577	wNAF[num + 1] = NULL;
				578	wNAF_len[num] = tmp_len;
				579	/*
				580	* pre_comp->points starts with the points that we need here:
				581	*/
				582	val_sub[num] = pre_comp->points;
				583	} else {
				584	/*
				585	* don't include tmp_wNAF directly into wNAF array - use wNAF
				586	* splitting and include the blocks
				587	*/
				588
				589	signed char *pp;
				590	EC_POINT **tmp_points;
				591
				592	if (tmp_len < numblocks * blocksize) {
				593	/*
				594	* possibly we can do with fewer blocks than estimated
				595	*/
				596	numblocks = (tmp_len + blocksize - 1) / blocksize;
				597	if (numblocks > pre_comp->numblocks) {
				598	ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR);
				599	OPENSSL_free(tmp_wNAF);
				600	goto err;
				601	}
				602	totalnum = num + numblocks;
				603	}
				604
				605	/* split wNAF in 'numblocks' parts */
				606	pp = tmp_wNAF;
				607	tmp_points = pre_comp->points;
				608
				609	for (i = num; i < totalnum; i++) {
				610	if (i < totalnum - 1) {
				611	wNAF_len[i] = blocksize;
				612	if (tmp_len < blocksize) {
				613	ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR);
				614	OPENSSL_free(tmp_wNAF);
				615	goto err;
				616	}
				617	tmp_len -= blocksize;
				618	} else
				619	/*
				620	* last block gets whatever is left (this could be
				621	* more or less than 'blocksize'!)
				622	*/
				623	wNAF_len[i] = tmp_len;
				624
				625	wNAF[i + 1] = NULL;
				626	wNAF[i] = OPENSSL_malloc(wNAF_len[i]);
				627	if (wNAF[i] == NULL) {
				628	ECerr(EC_F_EC_WNAF_MUL, ERR_R_MALLOC_FAILURE);
				629	OPENSSL_free(tmp_wNAF);
				630	goto err;
				631	}
				632	memcpy(wNAF[i], pp, wNAF_len[i]);
				633	if (wNAF_len[i] > max_len)
				634	max_len = wNAF_len[i];
				635
				636	if (*tmp_points == NULL) {
				637	ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR);
				638	OPENSSL_free(tmp_wNAF);
				639	goto err;
				640	}
				641	val_sub[i] = tmp_points;
				642	tmp_points += pre_points_per_block;
				643	pp += blocksize;
				644	}
				645	OPENSSL_free(tmp_wNAF);
				646	}
				647	}
				648	}
				649
				650	/*
				651	* All points we precompute now go into a single array 'val'.
				652	* 'val_sub[i]' is a pointer to the subarray for the i-th point, or to a
				653	* subarray of 'pre_comp->points' if we already have precomputation.
				654	*/
				655	val = OPENSSL_malloc((num_val + 1) * sizeof(val[0]));
				656	if (val == NULL) {
				657	ECerr(EC_F_EC_WNAF_MUL, ERR_R_MALLOC_FAILURE);
				658	goto err;
				659	}
				660	val[num_val] = NULL; /* pivot element */
				661
				662	/* allocate points for precomputation */
				663	v = val;
				664	for (i = 0; i < num + num_scalar; i++) {
				665	val_sub[i] = v;
				666	for (j = 0; j < ((size_t)1 << (wsize[i] - 1)); j++) {
				667	*v = EC_POINT_new(group);
				668	if (*v == NULL)
				669	goto err;
				670	v++;
				671	}
				672	}
				673	if (!(v == val + num_val)) {
				674	ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR);
				675	goto err;
				676	}
				677
				678	if ((tmp = EC_POINT_new(group)) == NULL)
				679	goto err;
				680
				681	/*-
				682	* prepare precomputed values:
				683	* val_sub[i][0] := points[i]
				684	* val_sub[i][1] := 3 * points[i]
				685	* val_sub[i][2] := 5 * points[i]
				686	* ...
				687	*/
				688	for (i = 0; i < num + num_scalar; i++) {
				689	if (i < num) {
				690	if (!EC_POINT_copy(val_sub[i][0], points[i]))
				691	goto err;
				692	} else {
				693	if (!EC_POINT_copy(val_sub[i][0], generator))
				694	goto err;
				695	}
				696
				697	if (wsize[i] > 1) {
				698	if (!EC_POINT_dbl(group, tmp, val_sub[i][0], ctx))
				699	goto err;
				700	for (j = 1; j < ((size_t)1 << (wsize[i] - 1)); j++) {
				701	if (!EC_POINT_add
				702	(group, val_sub[i][j], val_sub[i][j - 1], tmp, ctx))
				703	goto err;
				704	}
				705	}
				706	}
				707
				708	if (!EC_POINTs_make_affine(group, num_val, val, ctx))
				709	goto err;
				710
				711	r_is_at_infinity = 1;
				712
				713	for (k = max_len - 1; k >= 0; k--) {
				714	if (!r_is_at_infinity) {
				715	if (!EC_POINT_dbl(group, r, r, ctx))
				716	goto err;
				717	}
				718
				719	for (i = 0; i < totalnum; i++) {
				720	if (wNAF_len[i] > (size_t)k) {
				721	int digit = wNAF[i][k];
				722	int is_neg;
				723
				724	if (digit) {
				725	is_neg = digit < 0;
				726
				727	if (is_neg)
				728	digit = -digit;
				729
				730	if (is_neg != r_is_inverted) {
				731	if (!r_is_at_infinity) {
				732	if (!EC_POINT_invert(group, r, ctx))
				733	goto err;
				734	}
				735	r_is_inverted = !r_is_inverted;
				736	}
				737
				738	/* digit > 0 */
				739
				740	if (r_is_at_infinity) {
				741	if (!EC_POINT_copy(r, val_sub[i][digit >> 1]))
				742	goto err;
				743
				744	/*-
				745	* Apply coordinate blinding for EC_POINT.
				746	*
				747	* The underlying EC_METHOD can optionally implement this function:
				748	* ec_point_blind_coordinates() returns 0 in case of errors or 1 on
				749	* success or if coordinate blinding is not implemented for this
				750	* group.
				751	*/
				752	if (!ec_point_blind_coordinates(group, r, ctx)) {
				753	ECerr(EC_F_EC_WNAF_MUL, EC_R_POINT_COORDINATES_BLIND_FAILURE);
				754	goto err;
				755	}
				756
				757	r_is_at_infinity = 0;
				758	} else {
				759	if (!EC_POINT_add
				760	(group, r, r, val_sub[i][digit >> 1], ctx))
				761	goto err;
				762	}
				763	}
				764	}
				765	}
				766	}
				767
				768	if (r_is_at_infinity) {
				769	if (!EC_POINT_set_to_infinity(group, r))
				770	goto err;
				771	} else {
				772	if (r_is_inverted)
				773	if (!EC_POINT_invert(group, r, ctx))
				774	goto err;
				775	}
				776
				777	ret = 1;
				778
				779	err:
				780	EC_POINT_free(tmp);
				781	OPENSSL_free(wsize);
				782	OPENSSL_free(wNAF_len);
				783	if (wNAF != NULL) {
				784	signed char **w;
				785
				786	for (w = wNAF; *w != NULL; w++)
				787	OPENSSL_free(*w);
				788
				789	OPENSSL_free(wNAF);
				790	}
				791	if (val != NULL) {
				792	for (v = val; *v != NULL; v++)
				793	EC_POINT_clear_free(*v);
				794
				795	OPENSSL_free(val);
				796	}
				797	OPENSSL_free(val_sub);
				798	return ret;
				799	}
				800
				801	/*-
				802	* ec_wNAF_precompute_mult()
				803	* creates an EC_PRE_COMP object with preprecomputed multiples of the generator
				804	* for use with wNAF splitting as implemented in ec_wNAF_mul().
				805	*
				806	* 'pre_comp->points' is an array of multiples of the generator
				807	* of the following form:
				808	* points[0] = generator;
				809	* points[1] = 3 * generator;
				810	* ...
				811	* points[2^(w-1)-1] = (2^(w-1)-1) * generator;
				812	* points[2^(w-1)] = 2^blocksize * generator;
				813	* points[2^(w-1)+1] = 3 * 2^blocksize * generator;
				814	* ...
				815	* points[2^(w-1)(numblocks-1)-1] = (2^(w-1)) 2^(blocksize(numblocks-2)) generator
				816	* points[2^(w-1)(numblocks-1)] = 2^(blocksize(numblocks-1)) * generator
				817	* ...
				818	* points[2^(w-1)numblocks-1] = (2^(w-1)) 2^(blocksize(numblocks-1)) generator
				819	* points[2^(w-1)*numblocks] = NULL
				820	*/
				821	int ec_wNAF_precompute_mult(EC_GROUP group, BN_CTX ctx)
				822	{
				823	const EC_POINT *generator;
				824	EC_POINT tmp_point = NULL, base = NULL, **var;
				825	BN_CTX *new_ctx = NULL;
				826	const BIGNUM *order;
				827	size_t i, bits, w, pre_points_per_block, blocksize, numblocks, num;
				828	EC_POINT **points = NULL;
				829	EC_PRE_COMP *pre_comp;
				830	int ret = 0;
				831
				832	/* if there is an old EC_PRE_COMP object, throw it away */
				833	EC_pre_comp_free(group);
				834	if ((pre_comp = ec_pre_comp_new(group)) == NULL)
				835	return 0;
				836
				837	generator = EC_GROUP_get0_generator(group);
				838	if (generator == NULL) {
				839	ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, EC_R_UNDEFINED_GENERATOR);
				840	goto err;
				841	}
				842
				843	if (ctx == NULL) {
				844	ctx = new_ctx = BN_CTX_new();
				845	if (ctx == NULL)
				846	goto err;
				847	}
				848
				849	BN_CTX_start(ctx);
				850
				851	order = EC_GROUP_get0_order(group);
				852	if (order == NULL)
				853	goto err;
				854	if (BN_is_zero(order)) {
				855	ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, EC_R_UNKNOWN_ORDER);
				856	goto err;
				857	}
				858
				859	bits = BN_num_bits(order);
				860	/*
				861	* The following parameters mean we precompute (approximately) one point
				862	* per bit. TBD: The combination 8, 4 is perfect for 160 bits; for other
				863	* bit lengths, other parameter combinations might provide better
				864	* efficiency.
				865	*/
				866	blocksize = 8;
				867	w = 4;
				868	if (EC_window_bits_for_scalar_size(bits) > w) {
				869	/* let's not make the window too small ... */
				870	w = EC_window_bits_for_scalar_size(bits);
				871	}
				872
				873	numblocks = (bits + blocksize - 1) / blocksize; /* max. number of blocks
				874	* to use for wNAF
				875	* splitting */
				876
				877	pre_points_per_block = (size_t)1 << (w - 1);
				878	num = pre_points_per_block * numblocks; /* number of points to compute
				879	* and store */
				880
				881	points = OPENSSL_malloc(sizeof(points) (num + 1));
				882	if (points == NULL) {
				883	ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, ERR_R_MALLOC_FAILURE);
				884	goto err;
				885	}
				886
				887	var = points;
				888	var[num] = NULL; /* pivot */
				889	for (i = 0; i < num; i++) {
				890	if ((var[i] = EC_POINT_new(group)) == NULL) {
				891	ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, ERR_R_MALLOC_FAILURE);
				892	goto err;
				893	}
				894	}
				895
				896	if ((tmp_point = EC_POINT_new(group)) == NULL
				897	\|\| (base = EC_POINT_new(group)) == NULL) {
				898	ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, ERR_R_MALLOC_FAILURE);
				899	goto err;
				900	}
				901
				902	if (!EC_POINT_copy(base, generator))
				903	goto err;
				904
				905	/* do the precomputation */
				906	for (i = 0; i < numblocks; i++) {
				907	size_t j;
				908
				909	if (!EC_POINT_dbl(group, tmp_point, base, ctx))
				910	goto err;
				911
				912	if (!EC_POINT_copy(*var++, base))
				913	goto err;
				914
				915	for (j = 1; j < pre_points_per_block; j++, var++) {
				916	/*
				917	* calculate odd multiples of the current base point
				918	*/
				919	if (!EC_POINT_add(group, var, tmp_point, (var - 1), ctx))
				920	goto err;
				921	}
				922
				923	if (i < numblocks - 1) {
				924	/*
				925	* get the next base (multiply current one by 2^blocksize)
				926	*/
				927	size_t k;
				928
				929	if (blocksize <= 2) {
				930	ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, ERR_R_INTERNAL_ERROR);
				931	goto err;
				932	}
				933
				934	if (!EC_POINT_dbl(group, base, tmp_point, ctx))
				935	goto err;
				936	for (k = 2; k < blocksize; k++) {
				937	if (!EC_POINT_dbl(group, base, base, ctx))
				938	goto err;
				939	}
				940	}
				941	}
				942
				943	if (!EC_POINTs_make_affine(group, num, points, ctx))
				944	goto err;
				945
				946	pre_comp->group = group;
				947	pre_comp->blocksize = blocksize;
				948	pre_comp->numblocks = numblocks;
				949	pre_comp->w = w;
				950	pre_comp->points = points;
				951	points = NULL;
				952	pre_comp->num = num;
				953	SETPRECOMP(group, ec, pre_comp);
				954	pre_comp = NULL;
				955	ret = 1;
				956
				957	err:
				958	BN_CTX_end(ctx);
				959	BN_CTX_free(new_ctx);
				960	EC_ec_pre_comp_free(pre_comp);
				961	if (points) {
				962	EC_POINT **p;
				963
				964	for (p = points; *p != NULL; p++)
				965	EC_POINT_free(*p);
				966	OPENSSL_free(points);
				967	}
				968	EC_POINT_free(tmp_point);
				969	EC_POINT_free(base);
				970	return ret;
				971	}
				972
				973	int ec_wNAF_have_precompute_mult(const EC_GROUP *group)
				974	{
				975	return HAVEPRECOMP(group, ec);
				976	}