Blame - ap/lib/libssl/openssl-1.1.1o/crypto/bn/bn_div.c - R306

blob: 4273618825cc165771993b677123426c5887941b [file] [log] [blame]

yuezonghe	824eb0c	2024-06-27 02:32:26 -0700	[diff] [blame]	1	/*
				2	* Copyright 1995-2022 The OpenSSL Project Authors. All Rights Reserved.
				3	*
				4	* Licensed under the OpenSSL license (the "License"). You may not use
				5	* this file except in compliance with the License. You can obtain a copy
				6	* in the file LICENSE in the source distribution or at
				7	* https://www.openssl.org/source/license.html
				8	*/
				9
				10	#include <assert.h>
				11	#include <openssl/bn.h>
				12	#include "internal/cryptlib.h"
				13	#include "bn_local.h"
				14
				15	/* The old slow way */
				16	#if 0
				17	int BN_div(BIGNUM dv, BIGNUM rem, const BIGNUM m, const BIGNUM d,
				18	BN_CTX *ctx)
				19	{
				20	int i, nm, nd;
				21	int ret = 0;
				22	BIGNUM *D;
				23
				24	bn_check_top(m);
				25	bn_check_top(d);
				26	if (BN_is_zero(d)) {
				27	BNerr(BN_F_BN_DIV, BN_R_DIV_BY_ZERO);
				28	return 0;
				29	}
				30
				31	if (BN_ucmp(m, d) < 0) {
				32	if (rem != NULL) {
				33	if (BN_copy(rem, m) == NULL)
				34	return 0;
				35	}
				36	if (dv != NULL)
				37	BN_zero(dv);
				38	return 1;
				39	}
				40
				41	BN_CTX_start(ctx);
				42	D = BN_CTX_get(ctx);
				43	if (dv == NULL)
				44	dv = BN_CTX_get(ctx);
				45	if (rem == NULL)
				46	rem = BN_CTX_get(ctx);
				47	if (D == NULL \|\| dv == NULL \|\| rem == NULL)
				48	goto end;
				49
				50	nd = BN_num_bits(d);
				51	nm = BN_num_bits(m);
				52	if (BN_copy(D, d) == NULL)
				53	goto end;
				54	if (BN_copy(rem, m) == NULL)
				55	goto end;
				56
				57	/*
				58	* The next 2 are needed so we can do a dv->d[0]\|=1 later since
				59	* BN_lshift1 will only work once there is a value :-)
				60	*/
				61	BN_zero(dv);
				62	if (bn_wexpand(dv, 1) == NULL)
				63	goto end;
				64	dv->top = 1;
				65
				66	if (!BN_lshift(D, D, nm - nd))
				67	goto end;
				68	for (i = nm - nd; i >= 0; i--) {
				69	if (!BN_lshift1(dv, dv))
				70	goto end;
				71	if (BN_ucmp(rem, D) >= 0) {
				72	dv->d[0] \|= 1;
				73	if (!BN_usub(rem, rem, D))
				74	goto end;
				75	}
				76	/* CAN IMPROVE (and have now :=) */
				77	if (!BN_rshift1(D, D))
				78	goto end;
				79	}
				80	rem->neg = BN_is_zero(rem) ? 0 : m->neg;
				81	dv->neg = m->neg ^ d->neg;
				82	ret = 1;
				83	end:
				84	BN_CTX_end(ctx);
				85	return ret;
				86	}
				87
				88	#else
				89
				90	# if defined(BN_DIV3W)
				91	BN_ULONG bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0);
				92	# elif 0
				93	/*
				94	* This is #if-ed away, because it's a reference for assembly implementations,
				95	* where it can and should be made constant-time. But if you want to test it,
				96	* just replace 0 with 1.
				97	*/
				98	# if BN_BITS2 == 64 && defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
				99	# undef BN_ULLONG
				100	# define BN_ULLONG __uint128_t
				101	# define BN_LLONG
				102	# endif
				103
				104	# ifdef BN_LLONG
				105	# define BN_DIV3W
				106	/*
				107	* Interface is somewhat quirky, \|m\| is pointer to most significant limb,
				108	* and less significant limb is referred at \|m[-1]\|. This means that caller
				109	* is responsible for ensuring that \|m[-1]\| is valid. Second condition that
				110	* has to be met is that \|d0\|'s most significant bit has to be set. Or in
				111	* other words divisor has to be "bit-aligned to the left." bn_div_fixed_top
				112	* does all this. The subroutine considers four limbs, two of which are
				113	* "overlapping," hence the name...
				114	*/
				115	static BN_ULONG bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0)
				116	{
				117	BN_ULLONG R = ((BN_ULLONG)m[0] << BN_BITS2) \| m[-1];
				118	BN_ULLONG D = ((BN_ULLONG)d0 << BN_BITS2) \| d1;
				119	BN_ULONG Q = 0, mask;
				120	int i;
				121
				122	for (i = 0; i < BN_BITS2; i++) {
				123	Q <<= 1;
				124	if (R >= D) {
				125	Q \|= 1;
				126	R -= D;
				127	}
				128	D >>= 1;
				129	}
				130
				131	mask = 0 - (Q >> (BN_BITS2 - 1)); /* does it overflow? */
				132
				133	Q <<= 1;
				134	Q \|= (R >= D);
				135
				136	return (Q \| mask) & BN_MASK2;
				137	}
				138	# endif
				139	# endif
				140
				141	static int bn_left_align(BIGNUM *num)
				142	{
				143	BN_ULONG *d = num->d, n, m, rmask;
				144	int top = num->top;
				145	int rshift = BN_num_bits_word(d[top - 1]), lshift, i;
				146
				147	lshift = BN_BITS2 - rshift;
				148	rshift %= BN_BITS2; /* say no to undefined behaviour */
				149	rmask = (BN_ULONG)0 - rshift; /* rmask = 0 - (rshift != 0) */
				150	rmask \|= rmask >> 8;
				151
				152	for (i = 0, m = 0; i < top; i++) {
				153	n = d[i];
				154	d[i] = ((n << lshift) \| m) & BN_MASK2;
				155	m = (n >> rshift) & rmask;
				156	}
				157
				158	return lshift;
				159	}
				160
				161	# if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) \
				162	&& !defined(PEDANTIC) && !defined(BN_DIV3W)
				163	# if defined(__GNUC__) && __GNUC__>=2
				164	# if defined(__i386) \|\| defined (__i386__)
				165	/*-
				166	* There were two reasons for implementing this template:
				167	* - GNU C generates a call to a function (__udivdi3 to be exact)
				168	* in reply to ((((BN_ULLONG)n0)<<BN_BITS2)\|n1)/d0 (I fail to
				169	* understand why...);
				170	* - divl doesn't only calculate quotient, but also leaves
				171	* remainder in %edx which we can definitely use here:-)
				172	*/
				173	# undef bn_div_words
				174	# define bn_div_words(n0,n1,d0) \
				175	({ asm volatile ( \
				176	"divl %4" \
				177	: "=a"(q), "=d"(rem) \
				178	: "a"(n1), "d"(n0), "r"(d0) \
				179	: "cc"); \
				180	q; \
				181	})
				182	# define REMAINDER_IS_ALREADY_CALCULATED
				183	# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
				184	/*
				185	* Same story here, but it's 128-bit by 64-bit division. Wow!
				186	*/
				187	# undef bn_div_words
				188	# define bn_div_words(n0,n1,d0) \
				189	({ asm volatile ( \
				190	"divq %4" \
				191	: "=a"(q), "=d"(rem) \
				192	: "a"(n1), "d"(n0), "r"(d0) \
				193	: "cc"); \
				194	q; \
				195	})
				196	# define REMAINDER_IS_ALREADY_CALCULATED
				197	# endif /* __<cpu> */
				198	# endif /* __GNUC__ */
				199	# endif /* OPENSSL_NO_ASM */
				200
				201	/*-
				202	* BN_div computes dv := num / divisor, rounding towards
				203	* zero, and sets up rm such that dv*divisor + rm = num holds.
				204	* Thus:
				205	* dv->neg == num->neg ^ divisor->neg (unless the result is zero)
				206	* rm->neg == num->neg (unless the remainder is zero)
				207	* If 'dv' or 'rm' is NULL, the respective value is not returned.
				208	*/
				209	int BN_div(BIGNUM dv, BIGNUM rm, const BIGNUM num, const BIGNUM divisor,
				210	BN_CTX *ctx)
				211	{
				212	int ret;
				213
				214	if (BN_is_zero(divisor)) {
				215	BNerr(BN_F_BN_DIV, BN_R_DIV_BY_ZERO);
				216	return 0;
				217	}
				218
				219	/*
				220	* Invalid zero-padding would have particularly bad consequences so don't
				221	* just rely on bn_check_top() here (bn_check_top() works only for
				222	* BN_DEBUG builds)
				223	*/
				224	if (divisor->d[divisor->top - 1] == 0) {
				225	BNerr(BN_F_BN_DIV, BN_R_NOT_INITIALIZED);
				226	return 0;
				227	}
				228
				229	ret = bn_div_fixed_top(dv, rm, num, divisor, ctx);
				230
				231	if (ret) {
				232	if (dv != NULL)
				233	bn_correct_top(dv);
				234	if (rm != NULL)
				235	bn_correct_top(rm);
				236	}
				237
				238	return ret;
				239	}
				240
				241	/*
				242	* It's argued that length of significant part of divisor is public.
				243	* Even if it's private modulus that is. Again, length is assumed
				244	* public, but not value. Former is likely to be pre-defined by
				245	* algorithm with bit granularity, though below subroutine is invariant
				246	* of limb length. Thanks to this assumption we can require that \|divisor\|
				247	* may not be zero-padded, yet claim this subroutine "constant-time"(*).
				248	* This is because zero-padded dividend, \|num\|, is tolerated, so that
				249	* caller can pass dividend of public length(*), but with smaller amount
				250	* of significant limbs. This naturally means that quotient, \|dv\|, would
				251	* contain correspongly less significant limbs as well, and will be zero-
				252	* padded accordingly. Returned remainder, \|rm\|, will have same bit length
				253	* as divisor, also zero-padded if needed. These actually leave sign bits
				254	* in ambiguous state. In sense that we try to avoid negative zeros, while
				255	* zero-padded zeros would retain sign.
				256	*
				257	* (*) "Constant-time-ness" has two pre-conditions:
				258	*
				259	* - availability of constant-time bn_div_3_words;
				260	* - dividend is at least as "wide" as divisor, limb-wise, zero-padded
				261	* if so required, which shouldn't be a privacy problem, because
				262	* divisor's length is considered public;
				263	*/
				264	int bn_div_fixed_top(BIGNUM dv, BIGNUM rm, const BIGNUM *num,
				265	const BIGNUM divisor, BN_CTX ctx)
				266	{
				267	int norm_shift, i, j, loop;
				268	BIGNUM tmp, snum, sdiv, res;
				269	BN_ULONG resp, wnum, *wnumtop;
				270	BN_ULONG d0, d1;
				271	int num_n, div_n, num_neg;
				272
				273	assert(divisor->top > 0 && divisor->d[divisor->top - 1] != 0);
				274
				275	bn_check_top(num);
				276	bn_check_top(divisor);
				277	bn_check_top(dv);
				278	bn_check_top(rm);
				279
				280	BN_CTX_start(ctx);
				281	res = (dv == NULL) ? BN_CTX_get(ctx) : dv;
				282	tmp = BN_CTX_get(ctx);
				283	snum = BN_CTX_get(ctx);
				284	sdiv = BN_CTX_get(ctx);
				285	if (sdiv == NULL)
				286	goto err;
				287
				288	/* First we normalise the numbers */
				289	if (!BN_copy(sdiv, divisor))
				290	goto err;
				291	norm_shift = bn_left_align(sdiv);
				292	sdiv->neg = 0;
				293	/*
				294	* Note that bn_lshift_fixed_top's output is always one limb longer
				295	* than input, even when norm_shift is zero. This means that amount of
				296	* inner loop iterations is invariant of dividend value, and that one
				297	* doesn't need to compare dividend and divisor if they were originally
				298	* of the same bit length.
				299	*/
				300	if (!(bn_lshift_fixed_top(snum, num, norm_shift)))
				301	goto err;
				302
				303	div_n = sdiv->top;
				304	num_n = snum->top;
				305
				306	if (num_n <= div_n) {
				307	/* caller didn't pad dividend -> no constant-time guarantee... */
				308	if (bn_wexpand(snum, div_n + 1) == NULL)
				309	goto err;
				310	memset(&(snum->d[num_n]), 0, (div_n - num_n + 1) * sizeof(BN_ULONG));
				311	snum->top = num_n = div_n + 1;
				312	}
				313
				314	loop = num_n - div_n;
				315	/*
				316	* Lets setup a 'window' into snum This is the part that corresponds to
				317	* the current 'area' being divided
				318	*/
				319	wnum = &(snum->d[loop]);
				320	wnumtop = &(snum->d[num_n - 1]);
				321
				322	/* Get the top 2 words of sdiv */
				323	d0 = sdiv->d[div_n - 1];
				324	d1 = (div_n == 1) ? 0 : sdiv->d[div_n - 2];
				325
				326	/* Setup quotient */
				327	if (!bn_wexpand(res, loop))
				328	goto err;
				329	num_neg = num->neg;
				330	res->neg = (num_neg ^ divisor->neg);
				331	res->top = loop;
				332	res->flags \|= BN_FLG_FIXED_TOP;
				333	resp = &(res->d[loop]);
				334
				335	/* space for temp */
				336	if (!bn_wexpand(tmp, (div_n + 1)))
				337	goto err;
				338
				339	for (i = 0; i < loop; i++, wnumtop--) {
				340	BN_ULONG q, l0;
				341	/*
				342	* the first part of the loop uses the top two words of snum and sdiv
				343	* to calculate a BN_ULONG q such that \| wnum - sdiv * q \| < sdiv
				344	*/
				345	# if defined(BN_DIV3W)
				346	q = bn_div_3_words(wnumtop, d1, d0);
				347	# else
				348	BN_ULONG n0, n1, rem = 0;
				349
				350	n0 = wnumtop[0];
				351	n1 = wnumtop[-1];
				352	if (n0 == d0)
				353	q = BN_MASK2;
				354	else { /* n0 < d0 */
				355	BN_ULONG n2 = (wnumtop == wnum) ? 0 : wnumtop[-2];
				356	# ifdef BN_LLONG
				357	BN_ULLONG t2;
				358
				359	# if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words)
				360	q = (BN_ULONG)(((((BN_ULLONG) n0) << BN_BITS2) \| n1) / d0);
				361	# else
				362	q = bn_div_words(n0, n1, d0);
				363	# endif
				364
				365	# ifndef REMAINDER_IS_ALREADY_CALCULATED
				366	/*
				367	* rem doesn't have to be BN_ULLONG. The least we
				368	* know it's less that d0, isn't it?
				369	*/
				370	rem = (n1 - q * d0) & BN_MASK2;
				371	# endif
				372	t2 = (BN_ULLONG) d1 *q;
				373
				374	for (;;) {
				375	if (t2 <= ((((BN_ULLONG) rem) << BN_BITS2) \| n2))
				376	break;
				377	q--;
				378	rem += d0;
				379	if (rem < d0)
				380	break; /* don't let rem overflow */
				381	t2 -= d1;
				382	}
				383	# else /* !BN_LLONG */
				384	BN_ULONG t2l, t2h;
				385
				386	q = bn_div_words(n0, n1, d0);
				387	# ifndef REMAINDER_IS_ALREADY_CALCULATED
				388	rem = (n1 - q * d0) & BN_MASK2;
				389	# endif
				390
				391	# if defined(BN_UMULT_LOHI)
				392	BN_UMULT_LOHI(t2l, t2h, d1, q);
				393	# elif defined(BN_UMULT_HIGH)
				394	t2l = d1 * q;
				395	t2h = BN_UMULT_HIGH(d1, q);
				396	# else
				397	{
				398	BN_ULONG ql, qh;
				399	t2l = LBITS(d1);
				400	t2h = HBITS(d1);
				401	ql = LBITS(q);
				402	qh = HBITS(q);
				403	mul64(t2l, t2h, ql, qh); /* t2=(BN_ULLONG)d1q; /
				404	}
				405	# endif
				406
				407	for (;;) {
				408	if ((t2h < rem) \|\| ((t2h == rem) && (t2l <= n2)))
				409	break;
				410	q--;
				411	rem += d0;
				412	if (rem < d0)
				413	break; /* don't let rem overflow */
				414	if (t2l < d1)
				415	t2h--;
				416	t2l -= d1;
				417	}
				418	# endif /* !BN_LLONG */
				419	}
				420	# endif /* !BN_DIV3W */
				421
				422	l0 = bn_mul_words(tmp->d, sdiv->d, div_n, q);
				423	tmp->d[div_n] = l0;
				424	wnum--;
				425	/*
				426	* ignore top values of the bignums just sub the two BN_ULONG arrays
				427	* with bn_sub_words
				428	*/
				429	l0 = bn_sub_words(wnum, wnum, tmp->d, div_n + 1);
				430	q -= l0;
				431	/*
				432	* Note: As we have considered only the leading two BN_ULONGs in
				433	* the calculation of q, sdiv * q might be greater than wnum (but
				434	* then (q-1) * sdiv is less or equal than wnum)
				435	*/
				436	for (l0 = 0 - l0, j = 0; j < div_n; j++)
				437	tmp->d[j] = sdiv->d[j] & l0;
				438	l0 = bn_add_words(wnum, wnum, tmp->d, div_n);
				439	(*wnumtop) += l0;
				440	assert((*wnumtop) == 0);
				441
				442	/* store part of the result */
				443	*--resp = q;
				444	}
				445	/* snum holds remainder, it's as wide as divisor */
				446	snum->neg = num_neg;
				447	snum->top = div_n;
				448	snum->flags \|= BN_FLG_FIXED_TOP;
				449
				450	if (rm != NULL && bn_rshift_fixed_top(rm, snum, norm_shift) == 0)
				451	goto err;
				452
				453	BN_CTX_end(ctx);
				454	return 1;
				455	err:
				456	bn_check_top(rm);
				457	BN_CTX_end(ctx);
				458	return 0;
				459	}
				460	#endif