blob: 1834065362c72fbfd0df023224c25e6181a24ed0 [file] [log] [blame]
xjb04a4022021-11-25 15:01:52 +08001/*
2 * Fast MD5 implementation for PPC
3 *
4 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 */
12#include <asm/ppc_asm.h>
13#include <asm/asm-offsets.h>
14#include <asm/asm-compat.h>
15
16#define rHP r3
17#define rWP r4
18
19#define rH0 r0
20#define rH1 r6
21#define rH2 r7
22#define rH3 r5
23
24#define rW00 r8
25#define rW01 r9
26#define rW02 r10
27#define rW03 r11
28#define rW04 r12
29#define rW05 r14
30#define rW06 r15
31#define rW07 r16
32#define rW08 r17
33#define rW09 r18
34#define rW10 r19
35#define rW11 r20
36#define rW12 r21
37#define rW13 r22
38#define rW14 r23
39#define rW15 r24
40
41#define rT0 r25
42#define rT1 r26
43
44#define INITIALIZE \
45 PPC_STLU r1,-INT_FRAME_SIZE(r1); \
46 SAVE_8GPRS(14, r1); /* push registers onto stack */ \
47 SAVE_4GPRS(22, r1); \
48 SAVE_GPR(26, r1)
49
50#define FINALIZE \
51 REST_8GPRS(14, r1); /* pop registers from stack */ \
52 REST_4GPRS(22, r1); \
53 REST_GPR(26, r1); \
54 addi r1,r1,INT_FRAME_SIZE;
55
56#ifdef __BIG_ENDIAN__
57#define LOAD_DATA(reg, off) \
58 lwbrx reg,0,rWP; /* load data */
59#define INC_PTR \
60 addi rWP,rWP,4; /* increment per word */
61#define NEXT_BLOCK /* nothing to do */
62#else
63#define LOAD_DATA(reg, off) \
64 lwz reg,off(rWP); /* load data */
65#define INC_PTR /* nothing to do */
66#define NEXT_BLOCK \
67 addi rWP,rWP,64; /* increment per block */
68#endif
69
70#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \
71 LOAD_DATA(w0, off) /* W */ \
72 and rT0,b,c; /* 1: f = b and c */ \
73 INC_PTR /* ptr++ */ \
74 andc rT1,d,b; /* 1: f' = ~b and d */ \
75 LOAD_DATA(w1, off+4) /* W */ \
76 or rT0,rT0,rT1; /* 1: f = f or f' */ \
77 addi w0,w0,k0l; /* 1: wk = w + k */ \
78 add a,a,rT0; /* 1: a = a + f */ \
79 addis w0,w0,k0h; /* 1: wk = w + k' */ \
80 addis w1,w1,k1h; /* 2: wk = w + k */ \
81 add a,a,w0; /* 1: a = a + wk */ \
82 addi w1,w1,k1l; /* 2: wk = w + k' */ \
83 rotrwi a,a,p; /* 1: a = a rotl x */ \
84 add d,d,w1; /* 2: a = a + wk */ \
85 add a,a,b; /* 1: a = a + b */ \
86 and rT0,a,b; /* 2: f = b and c */ \
87 andc rT1,c,a; /* 2: f' = ~b and d */ \
88 or rT0,rT0,rT1; /* 2: f = f or f' */ \
89 add d,d,rT0; /* 2: a = a + f */ \
90 INC_PTR /* ptr++ */ \
91 rotrwi d,d,q; /* 2: a = a rotl x */ \
92 add d,d,a; /* 2: a = a + b */
93
94#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
95 andc rT0,c,d; /* 1: f = c and ~d */ \
96 and rT1,b,d; /* 1: f' = b and d */ \
97 addi w0,w0,k0l; /* 1: wk = w + k */ \
98 or rT0,rT0,rT1; /* 1: f = f or f' */ \
99 addis w0,w0,k0h; /* 1: wk = w + k' */ \
100 add a,a,rT0; /* 1: a = a + f */ \
101 addi w1,w1,k1l; /* 2: wk = w + k */ \
102 add a,a,w0; /* 1: a = a + wk */ \
103 addis w1,w1,k1h; /* 2: wk = w + k' */ \
104 andc rT0,b,c; /* 2: f = c and ~d */ \
105 rotrwi a,a,p; /* 1: a = a rotl x */ \
106 add a,a,b; /* 1: a = a + b */ \
107 add d,d,w1; /* 2: a = a + wk */ \
108 and rT1,a,c; /* 2: f' = b and d */ \
109 or rT0,rT0,rT1; /* 2: f = f or f' */ \
110 add d,d,rT0; /* 2: a = a + f */ \
111 rotrwi d,d,q; /* 2: a = a rotl x */ \
112 add d,d,a; /* 2: a = a +b */
113
114#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
115 xor rT0,b,c; /* 1: f' = b xor c */ \
116 addi w0,w0,k0l; /* 1: wk = w + k */ \
117 xor rT1,rT0,d; /* 1: f = f xor f' */ \
118 addis w0,w0,k0h; /* 1: wk = w + k' */ \
119 add a,a,rT1; /* 1: a = a + f */ \
120 addi w1,w1,k1l; /* 2: wk = w + k */ \
121 add a,a,w0; /* 1: a = a + wk */ \
122 addis w1,w1,k1h; /* 2: wk = w + k' */ \
123 rotrwi a,a,p; /* 1: a = a rotl x */ \
124 add d,d,w1; /* 2: a = a + wk */ \
125 add a,a,b; /* 1: a = a + b */ \
126 xor rT1,rT0,a; /* 2: f = b xor f' */ \
127 add d,d,rT1; /* 2: a = a + f */ \
128 rotrwi d,d,q; /* 2: a = a rotl x */ \
129 add d,d,a; /* 2: a = a + b */
130
131#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
132 addi w0,w0,k0l; /* 1: w = w + k */ \
133 orc rT0,b,d; /* 1: f = b or ~d */ \
134 addis w0,w0,k0h; /* 1: w = w + k' */ \
135 xor rT0,rT0,c; /* 1: f = f xor c */ \
136 add a,a,w0; /* 1: a = a + wk */ \
137 addi w1,w1,k1l; /* 2: w = w + k */ \
138 add a,a,rT0; /* 1: a = a + f */ \
139 addis w1,w1,k1h; /* 2: w = w + k' */ \
140 rotrwi a,a,p; /* 1: a = a rotl x */ \
141 add a,a,b; /* 1: a = a + b */ \
142 orc rT0,a,c; /* 2: f = b or ~d */ \
143 add d,d,w1; /* 2: a = a + wk */ \
144 xor rT0,rT0,b; /* 2: f = f xor c */ \
145 add d,d,rT0; /* 2: a = a + f */ \
146 rotrwi d,d,q; /* 2: a = a rotl x */ \
147 add d,d,a; /* 2: a = a + b */
148
149_GLOBAL(ppc_md5_transform)
150 INITIALIZE
151
152 mtctr r5
153 lwz rH0,0(rHP)
154 lwz rH1,4(rHP)
155 lwz rH2,8(rHP)
156 lwz rH3,12(rHP)
157
158ppc_md5_main:
159 R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0,
160 0xd76b, -23432, 0xe8c8, -18602)
161 R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8,
162 0x2420, 0x70db, 0xc1be, -12562)
163 R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16,
164 0xf57c, 0x0faf, 0x4788, -14806)
165 R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24,
166 0xa830, 0x4613, 0xfd47, -27391)
167 R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32,
168 0x6981, -26408, 0x8b45, -2129)
169 R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40,
170 0xffff, 0x5bb1, 0x895d, -10306)
171 R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48,
172 0x6b90, 0x1122, 0xfd98, 0x7193)
173 R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56,
174 0xa679, 0x438e, 0x49b4, 0x0821)
175
176 R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23,
177 0x0d56, 0x6e0c, 0x1810, 0x6d2d)
178 R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12,
179 0x9d02, -32109, 0x124c, 0x2332)
180 R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23,
181 0x8ea7, 0x4a33, 0x0245, -18270)
182 R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12,
183 0x8eee, -8608, 0xf258, -5095)
184 R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23,
185 0x969d, -10697, 0x1cbe, -15288)
186 R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12,
187 0x3317, 0x3e99, 0xdbd9, 0x7c15)
188 R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23,
189 0xac4b, 0x7772, 0xd8cf, 0x331d)
190 R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12,
191 0x6a28, 0x6dd8, 0x219a, 0x3b68)
192
193 R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21,
194 0x29cb, 0x28e5, 0x4218, -7788)
195 R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16, 9,
196 0x473f, 0x06d1, 0x3aae, 0x3036)
197 R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21,
198 0xaea1, -15134, 0x640b, -11295)
199 R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16, 9,
200 0x8f4c, 0x4887, 0xbc7c, -22499)
201 R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21,
202 0x7eb8, -27199, 0x00ea, 0x6050)
203 R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16, 9,
204 0xe01a, 0x22fe, 0x4447, 0x69c5)
205 R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21,
206 0xb7f3, 0x0253, 0x59b1, 0x4d5b)
207 R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16, 9,
208 0x4701, -27017, 0xc7bd, -19859)
209
210 R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22,
211 0x0988, -1462, 0x4c70, -19401)
212 R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11,
213 0xadaf, -5221, 0xfc99, 0x66f7)
214 R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22,
215 0x7e80, -16418, 0xba1e, -25587)
216 R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11,
217 0x4130, 0x380d, 0xe0c5, 0x738d)
218 lwz rW00,0(rHP)
219 R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22,
220 0xe837, -30770, 0xde8a, 0x69e8)
221 lwz rW14,4(rHP)
222 R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11,
223 0x9e79, 0x260f, 0x256d, -27941)
224 lwz rW12,8(rHP)
225 R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22,
226 0xab75, -20775, 0x4f9e, -28397)
227 lwz rW10,12(rHP)
228 R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11,
229 0x662b, 0x7c56, 0x11b2, 0x0358)
230
231 add rH0,rH0,rW00
232 stw rH0,0(rHP)
233 add rH1,rH1,rW14
234 stw rH1,4(rHP)
235 add rH2,rH2,rW12
236 stw rH2,8(rHP)
237 add rH3,rH3,rW10
238 stw rH3,12(rHP)
239 NEXT_BLOCK
240
241 bdnz ppc_md5_main
242
243 FINALIZE
244 blr