blob: f6906fd1582394d716737a4f7179b1c9c93b38f7 [file] [log] [blame]
rjw1f884582022-01-06 17:20:42 +08001From 8cbd1d71caf56d45c54b1d8d073b330c07c66d12 Mon Sep 17 00:00:00 2001
2From: Armin Kuster <akuster@mvista.com>
3Date: Thu, 2 Mar 2017 12:24:31 +0000
4Subject: [PATCH] Create meta-gplv2 from files from OE-Core
5
6Upstream-Status: Backport
7 https://git.lysator.liu.se/nettle/nettle/commit/fa269b6ad06dd13c901dbd84a12e52b918a09cd7
8
9CVE: CVE-2015-8804
10Signed-off-by: Armin Kuster <akuster@mvista.com>
11
12---
13 ChangeLog | 8 +++
14 x86_64/ecc-384-modp.asm | 169 +++++++++++++++++++++---------------------------
15 2 files changed, 83 insertions(+), 94 deletions(-)
16
17diff --git a/ChangeLog b/ChangeLog
18index abdd974..c81168b 100644
19--- a/ChangeLog
20+++ b/ChangeLog
21@@ -1,3 +1,11 @@
22+2015-12-15 Niels Möller <nisse@lysator.liu.se>
23+
24+ * x86_64/ecc-384-modp.asm: Fixed carry propagation bug. Problem
25+ reported by Hanno Böck. Simplified the folding to always use
26+ non-negative carry, the old code attempted to add in a carry which
27+ could be either positive or negative, but didn't get that case
28+ right.
29+
30 2015-12-10 Niels Möller <nisse@lysator.liu.se>
31
32 * ecc-256.c (ecc_256_modp): Fixed carry propagation bug. Problem
33diff --git a/x86_64/ecc-384-modp.asm b/x86_64/ecc-384-modp.asm
34index 698838f..31b739e 100644
35--- a/x86_64/ecc-384-modp.asm
36+++ b/x86_64/ecc-384-modp.asm
37@@ -20,7 +20,7 @@ C MA 02111-1301, USA.
38 .file "ecc-384-modp.asm"
39
40 define(<RP>, <%rsi>)
41-define(<D4>, <%rax>)
42+define(<D5>, <%rax>)
43 define(<T0>, <%rbx>)
44 define(<T1>, <%rcx>)
45 define(<T2>, <%rdx>)
46@@ -35,8 +35,8 @@ define(<H4>, <%r13>)
47 define(<H5>, <%r14>)
48 define(<C2>, <%r15>)
49 define(<C0>, H5) C Overlap
50-define(<D0>, RP) C Overlap
51-define(<TMP>, H4) C Overlap
52+define(<TMP>, RP) C Overlap
53+
54
55 PROLOGUE(nettle_ecc_384_modp)
56 W64_ENTRY(2, 0)
57@@ -48,34 +48,38 @@ PROLOGUE(nettle_ecc_384_modp)
58 push %r14
59 push %r15
60
61- C First get top 2 limbs, which need folding twice
62+ C First get top 2 limbs, which need folding twice.
63+ C B^10 = B^6 + B^4 + 2^32 (B-1)B^4.
64+ C We handle the terms as follow:
65 C
66- C H5 H4
67- C -H5
68- C ------
69- C H0 D4
70+ C B^6: Folded immediatly.
71 C
72- C Then shift right, (H1,H0,D4) <-- (H0,D4) << 32
73- C and add
74+ C B^4: Delayed, added in in the next folding.
75 C
76- C H5 H4
77- C H1 H0
78- C ----------
79- C C2 H1 H0
80-
81- mov 80(RP), D4
82- mov 88(RP), H0
83- mov D4, H4
84- mov H0, H5
85- sub H0, D4
86- sbb $0, H0
87-
88- mov D4, T2
89- mov H0, H1
90- shl $32, H0
91- shr $32, T2
92+ C 2^32(B-1) B^4: Low half limb delayed until the next
93+ C folding. Top 1.5 limbs subtracted and shifter now, resulting
94+ C in 2.5 limbs. The low limb saved in D5, high 1.5 limbs added
95+ C in.
96+
97+ mov 80(RP), H4
98+ mov 88(RP), H5
99+ C Shift right 32 bits, into H1, H0
100+ mov H4, H0
101+ mov H5, H1
102+ mov H5, D5
103 shr $32, H1
104- or T2, H0
105+ shl $32, D5
106+ shr $32, H0
107+ or D5, H0
108+
109+ C H1 H0
110+ C - H1 H0
111+ C --------
112+ C H1 H0 D5
113+ mov H0, D5
114+ neg D5
115+ sbb H1, H0
116+ sbb $0, H1
117
118 xor C2, C2
119 add H4, H0
120@@ -114,118 +118,95 @@ PROLOGUE(nettle_ecc_384_modp)
121 adc H3, T5
122 adc $0, C0
123
124- C H3 H2 H1 H0 0
125- C - H4 H3 H2 H1 H0
126- C ---------------
127- C H3 H2 H1 H0 D0
128-
129- mov XREG(D4), XREG(D4)
130- mov H0, D0
131- neg D0
132- sbb H1, H0
133- sbb H2, H1
134- sbb H3, H2
135- sbb H4, H3
136- sbb $0, D4
137-
138- C Shift right. High bits are sign, to be added to C0.
139- mov D4, TMP
140- sar $32, TMP
141- shl $32, D4
142- add TMP, C0
143-
144+ C Shift left, including low half of H4
145 mov H3, TMP
146+ shl $32, H4
147 shr $32, TMP
148- shl $32, H3
149- or TMP, D4
150+ or TMP, H4
151
152 mov H2, TMP
153+ shl $32, H3
154 shr $32, TMP
155- shl $32, H2
156 or TMP, H3
157
158 mov H1, TMP
159+ shl $32, H2
160 shr $32, TMP
161- shl $32, H1
162 or TMP, H2
163
164 mov H0, TMP
165+ shl $32, H1
166 shr $32, TMP
167- shl $32, H0
168 or TMP, H1
169
170- mov D0, TMP
171- shr $32, TMP
172- shl $32, D0
173- or TMP, H0
174+ shl $32, H0
175+
176+ C H4 H3 H2 H1 H0 0
177+ C - H4 H3 H2 H1 H0
178+ C ---------------
179+ C H4 H3 H2 H1 H0 TMP
180
181- add D0, T0
182+ mov H0, TMP
183+ neg TMP
184+ sbb H1, H0
185+ sbb H2, H1
186+ sbb H3, H2
187+ sbb H4, H3
188+ sbb $0, H4
189+
190+ add TMP, T0
191 adc H0, T1
192 adc H1, T2
193 adc H2, T3
194 adc H3, T4
195- adc D4, T5
196+ adc H4, T5
197 adc $0, C0
198
199 C Remains to add in C2 and C0
200- C C0 C0<<32 (-2^32+1)C0
201- C C2 C2<<32 (-2^32+1)C2
202- C where C2 is always positive, while C0 may be -1.
203+ C Set H1, H0 = (2^96 - 2^32 + 1) C0
204 mov C0, H0
205 mov C0, H1
206- mov C0, H2
207- sar $63, C0 C Get sign
208 shl $32, H1
209- sub H1, H0 C Gives borrow iff C0 > 0
210+ sub H1, H0
211 sbb $0, H1
212- add C0, H2
213
214+ C Set H3, H2 = (2^96 - 2^32 + 1) C2
215+ mov C2, H2
216+ mov C2, H3
217+ shl $32, H3
218+ sub H3, H2
219+ sbb $0, H3
220+ add C0, H2 C No carry. Could use lea trick
221+
222+ xor C0, C0
223 add H0, T0
224 adc H1, T1
225- adc $0, H2
226- adc $0, C0
227-
228- C Set (H1 H0) <-- C2 << 96 - C2 << 32 + 1
229- mov C2, H0
230- mov C2, H1
231- shl $32, H1
232- sub H1, H0
233- sbb $0, H1
234-
235- add H2, H0
236- adc C0, H1
237- adc C2, C0
238- mov C0, H2
239- sar $63, C0
240- add H0, T2
241- adc H1, T3
242- adc H2, T4
243- adc C0, T5
244- sbb C0, C0
245+ adc H2, T2
246+ adc H3, T3
247+ adc C2, T4
248+ adc D5, T5 C Value delayed from initial folding
249+ adc $0, C0 C Use sbb and switch sign?
250
251 C Final unlikely carry
252 mov C0, H0
253 mov C0, H1
254- mov C0, H2
255- sar $63, C0
256 shl $32, H1
257 sub H1, H0
258 sbb $0, H1
259- add C0, H2
260
261 pop RP
262
263- sub H0, T0
264+ add H0, T0
265 mov T0, (RP)
266- sbb H1, T1
267+ adc H1, T1
268 mov T1, 8(RP)
269- sbb H2, T2
270+ adc C0, T2
271 mov T2, 16(RP)
272- sbb C0, T3
273+ adc $0, T3
274 mov T3, 24(RP)
275- sbb C0, T4
276+ adc $0, T4
277 mov T4, 32(RP)
278- sbb C0, T5
279+ adc $0, T5
280 mov T5, 40(RP)
281
282 pop %r15