xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame^] | 1 | /* mpn_mul -- Multiply two natural numbers. |
| 2 | |
| 3 | Copyright (C) 1991-2016 Free Software Foundation, Inc. |
| 4 | |
| 5 | This file is part of the GNU MP Library. |
| 6 | |
| 7 | The GNU MP Library is free software; you can redistribute it and/or modify |
| 8 | it under the terms of the GNU Lesser General Public License as published by |
| 9 | the Free Software Foundation; either version 2.1 of the License, or (at your |
| 10 | option) any later version. |
| 11 | |
| 12 | The GNU MP Library is distributed in the hope that it will be useful, but |
| 13 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 14 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
| 15 | License for more details. |
| 16 | |
| 17 | You should have received a copy of the GNU Lesser General Public License |
| 18 | along with the GNU MP Library; see the file COPYING.LIB. If not, see |
| 19 | <http://www.gnu.org/licenses/>. */ |
| 20 | |
| 21 | #include <gmp.h> |
| 22 | #include "gmp-impl.h" |
| 23 | |
| 24 | /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs) |
| 25 | and v (pointed to by VP, with VSIZE limbs), and store the result at |
| 26 | PRODP. USIZE + VSIZE limbs are always stored, but if the input |
| 27 | operands are normalized. Return the most significant limb of the |
| 28 | result. |
| 29 | |
| 30 | NOTE: The space pointed to by PRODP is overwritten before finished |
| 31 | with U and V, so overlap is an error. |
| 32 | |
| 33 | Argument constraints: |
| 34 | 1. USIZE >= VSIZE. |
| 35 | 2. PRODP != UP and PRODP != VP, i.e. the destination |
| 36 | must be distinct from the multiplier and the multiplicand. */ |
| 37 | |
| 38 | /* If KARATSUBA_THRESHOLD is not already defined, define it to a |
| 39 | value which is good on most machines. */ |
| 40 | #ifndef KARATSUBA_THRESHOLD |
| 41 | #define KARATSUBA_THRESHOLD 32 |
| 42 | #endif |
| 43 | |
| 44 | mp_limb_t |
| 45 | mpn_mul (mp_ptr prodp, |
| 46 | mp_srcptr up, mp_size_t usize, |
| 47 | mp_srcptr vp, mp_size_t vsize) |
| 48 | { |
| 49 | mp_ptr prod_endp = prodp + usize + vsize - 1; |
| 50 | mp_limb_t cy; |
| 51 | mp_ptr tspace; |
| 52 | TMP_DECL (marker); |
| 53 | |
| 54 | if (vsize < KARATSUBA_THRESHOLD) |
| 55 | { |
| 56 | /* Handle simple cases with traditional multiplication. |
| 57 | |
| 58 | This is the most critical code of the entire function. All |
| 59 | multiplies rely on this, both small and huge. Small ones arrive |
| 60 | here immediately. Huge ones arrive here as this is the base case |
| 61 | for Karatsuba's recursive algorithm below. */ |
| 62 | mp_size_t i; |
| 63 | mp_limb_t cy_limb; |
| 64 | mp_limb_t v_limb; |
| 65 | |
| 66 | if (vsize == 0) |
| 67 | return 0; |
| 68 | |
| 69 | /* Multiply by the first limb in V separately, as the result can be |
| 70 | stored (not added) to PROD. We also avoid a loop for zeroing. */ |
| 71 | v_limb = vp[0]; |
| 72 | if (v_limb <= 1) |
| 73 | { |
| 74 | if (v_limb == 1) |
| 75 | MPN_COPY (prodp, up, usize); |
| 76 | else |
| 77 | MPN_ZERO (prodp, usize); |
| 78 | cy_limb = 0; |
| 79 | } |
| 80 | else |
| 81 | cy_limb = mpn_mul_1 (prodp, up, usize, v_limb); |
| 82 | |
| 83 | prodp[usize] = cy_limb; |
| 84 | prodp++; |
| 85 | |
| 86 | /* For each iteration in the outer loop, multiply one limb from |
| 87 | U with one limb from V, and add it to PROD. */ |
| 88 | for (i = 1; i < vsize; i++) |
| 89 | { |
| 90 | v_limb = vp[i]; |
| 91 | if (v_limb <= 1) |
| 92 | { |
| 93 | cy_limb = 0; |
| 94 | if (v_limb == 1) |
| 95 | cy_limb = mpn_add_n (prodp, prodp, up, usize); |
| 96 | } |
| 97 | else |
| 98 | cy_limb = mpn_addmul_1 (prodp, up, usize, v_limb); |
| 99 | |
| 100 | prodp[usize] = cy_limb; |
| 101 | prodp++; |
| 102 | } |
| 103 | return cy_limb; |
| 104 | } |
| 105 | |
| 106 | TMP_MARK (marker); |
| 107 | |
| 108 | tspace = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB); |
| 109 | MPN_MUL_N_RECURSE (prodp, up, vp, vsize, tspace); |
| 110 | |
| 111 | prodp += vsize; |
| 112 | up += vsize; |
| 113 | usize -= vsize; |
| 114 | if (usize >= vsize) |
| 115 | { |
| 116 | mp_ptr tp = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB); |
| 117 | do |
| 118 | { |
| 119 | MPN_MUL_N_RECURSE (tp, up, vp, vsize, tspace); |
| 120 | cy = mpn_add_n (prodp, prodp, tp, vsize); |
| 121 | mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy); |
| 122 | prodp += vsize; |
| 123 | up += vsize; |
| 124 | usize -= vsize; |
| 125 | } |
| 126 | while (usize >= vsize); |
| 127 | } |
| 128 | |
| 129 | /* True: usize < vsize. */ |
| 130 | |
| 131 | /* Make life simple: Recurse. */ |
| 132 | |
| 133 | if (usize != 0) |
| 134 | { |
| 135 | mpn_mul (tspace, vp, vsize, up, usize); |
| 136 | cy = mpn_add_n (prodp, prodp, tspace, vsize); |
| 137 | mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy); |
| 138 | } |
| 139 | |
| 140 | TMP_FREE (marker); |
| 141 | return *prod_endp; |
| 142 | } |