| lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame] | 1 | // Copyright 2004-2017 The OpenSSL Project Authors. All Rights Reserved. | 
|  | 2 | // | 
|  | 3 | // Licensed under the OpenSSL license (the "License").  You may not use | 
|  | 4 | // this file except in compliance with the License.  You can obtain a copy | 
|  | 5 | // in the file LICENSE in the source distribution or at | 
|  | 6 | // https://www.openssl.org/source/license.html | 
|  | 7 | // Works on all IA-64 platforms: Linux, HP-UX, Win64i... | 
|  | 8 | // On Win64i compile with ias.exe. | 
|  | 9 | .text | 
|  | 10 |  | 
|  | 11 | #if defined(_HPUX_SOURCE) && !defined(_LP64) | 
|  | 12 | #define	ADDP	addp4 | 
|  | 13 | #else | 
|  | 14 | #define	ADDP	add | 
|  | 15 | #endif | 
|  | 16 |  | 
|  | 17 | .global	OPENSSL_cpuid_setup# | 
|  | 18 | .proc	OPENSSL_cpuid_setup# | 
|  | 19 | OPENSSL_cpuid_setup: | 
|  | 20 | { .mib;	br.ret.sptk.many	b0		};; | 
|  | 21 | .endp	OPENSSL_cpuid_setup# | 
|  | 22 |  | 
|  | 23 | .global	OPENSSL_rdtsc# | 
|  | 24 | .proc	OPENSSL_rdtsc# | 
|  | 25 | OPENSSL_rdtsc: | 
|  | 26 | { .mib;	mov			r8=ar.itc | 
|  | 27 | br.ret.sptk.many	b0		};; | 
|  | 28 | .endp   OPENSSL_rdtsc# | 
|  | 29 |  | 
|  | 30 | .global	OPENSSL_atomic_add# | 
|  | 31 | .proc	OPENSSL_atomic_add# | 
|  | 32 | .align	32 | 
|  | 33 | OPENSSL_atomic_add: | 
|  | 34 | { .mii;	ld4		r2=[r32] | 
|  | 35 | nop.i		0 | 
|  | 36 | nop.i		0		};; | 
|  | 37 | .Lspin: | 
|  | 38 | { .mii;	mov		ar.ccv=r2 | 
|  | 39 | add		r8=r2,r33 | 
|  | 40 | mov		r3=r2		};; | 
|  | 41 | { .mmi;	mf;; | 
|  | 42 | cmpxchg4.acq	r2=[r32],r8,ar.ccv | 
|  | 43 | nop.i		0		};; | 
|  | 44 | { .mib;	cmp.ne		p6,p0=r2,r3 | 
|  | 45 | nop.i		0 | 
|  | 46 | (p6)	br.dpnt		.Lspin		};; | 
|  | 47 | { .mib;	nop.m		0 | 
|  | 48 | sxt4		r8=r8 | 
|  | 49 | br.ret.sptk.many	b0	};; | 
|  | 50 | .endp	OPENSSL_atomic_add# | 
|  | 51 |  | 
|  | 52 | // Returns a structure comprising pointer to the top of stack of | 
|  | 53 | // the caller and pointer beyond backing storage for the current | 
|  | 54 | // register frame. The latter is required, because it might be | 
|  | 55 | // insufficient to wipe backing storage for the current frame | 
|  | 56 | // (as this procedure does), one might have to go further, toward | 
|  | 57 | // higher addresses to reach for whole "retroactively" saved | 
|  | 58 | // context... | 
|  | 59 | .global	OPENSSL_wipe_cpu# | 
|  | 60 | .proc	OPENSSL_wipe_cpu# | 
|  | 61 | .align	32 | 
|  | 62 | OPENSSL_wipe_cpu: | 
|  | 63 | .prologue | 
|  | 64 | .fframe	0 | 
|  | 65 | .save	ar.pfs,r2 | 
|  | 66 | .save	ar.lc,r3 | 
|  | 67 | { .mib;	alloc		r2=ar.pfs,0,96,0,96 | 
|  | 68 | mov		r3=ar.lc | 
|  | 69 | brp.loop.imp	.L_wipe_top,.L_wipe_end-16 | 
|  | 70 | };; | 
|  | 71 | { .mii;	mov		r9=ar.bsp | 
|  | 72 | mov		r8=pr | 
|  | 73 | mov		ar.lc=96	};; | 
|  | 74 | .body | 
|  | 75 | { .mii;	add		r9=96*8-8,r9 | 
|  | 76 | mov		ar.ec=1		};; | 
|  | 77 |  | 
|  | 78 | // One can sweep double as fast, but then we can't guarantee | 
|  | 79 | // that backing storage is wiped... | 
|  | 80 | .L_wipe_top: | 
|  | 81 | { .mfi;	st8		[r9]=r0,-8 | 
|  | 82 | mov		f127=f0 | 
|  | 83 | mov		r127=r0		} | 
|  | 84 | { .mfb;	nop.m		0 | 
|  | 85 | nop.f		0 | 
|  | 86 | br.ctop.sptk	.L_wipe_top	};; | 
|  | 87 | .L_wipe_end: | 
|  | 88 |  | 
|  | 89 | { .mfi;	mov		r11=r0 | 
|  | 90 | mov		f6=f0 | 
|  | 91 | mov		r14=r0		} | 
|  | 92 | { .mfi;	mov		r15=r0 | 
|  | 93 | mov		f7=f0 | 
|  | 94 | mov		r16=r0		} | 
|  | 95 | { .mfi;	mov		r17=r0 | 
|  | 96 | mov		f8=f0 | 
|  | 97 | mov		r18=r0		} | 
|  | 98 | { .mfi;	mov		r19=r0 | 
|  | 99 | mov		f9=f0 | 
|  | 100 | mov		r20=r0		} | 
|  | 101 | { .mfi;	mov		r21=r0 | 
|  | 102 | mov		f10=f0 | 
|  | 103 | mov		r22=r0		} | 
|  | 104 | { .mfi;	mov		r23=r0 | 
|  | 105 | mov		f11=f0 | 
|  | 106 | mov		r24=r0		} | 
|  | 107 | { .mfi;	mov		r25=r0 | 
|  | 108 | mov		f12=f0 | 
|  | 109 | mov		r26=r0		} | 
|  | 110 | { .mfi;	mov		r27=r0 | 
|  | 111 | mov		f13=f0 | 
|  | 112 | mov		r28=r0		} | 
|  | 113 | { .mfi;	mov		r29=r0 | 
|  | 114 | mov		f14=f0 | 
|  | 115 | mov		r30=r0		} | 
|  | 116 | { .mfi;	mov		r31=r0 | 
|  | 117 | mov		f15=f0 | 
|  | 118 | nop.i		0		} | 
|  | 119 | { .mfi;	mov		f16=f0		} | 
|  | 120 | { .mfi;	mov		f17=f0		} | 
|  | 121 | { .mfi;	mov		f18=f0		} | 
|  | 122 | { .mfi;	mov		f19=f0		} | 
|  | 123 | { .mfi;	mov		f20=f0		} | 
|  | 124 | { .mfi;	mov		f21=f0		} | 
|  | 125 | { .mfi;	mov		f22=f0		} | 
|  | 126 | { .mfi;	mov		f23=f0		} | 
|  | 127 | { .mfi;	mov		f24=f0		} | 
|  | 128 | { .mfi;	mov		f25=f0		} | 
|  | 129 | { .mfi;	mov		f26=f0		} | 
|  | 130 | { .mfi;	mov		f27=f0		} | 
|  | 131 | { .mfi;	mov		f28=f0		} | 
|  | 132 | { .mfi;	mov		f29=f0		} | 
|  | 133 | { .mfi;	mov		f30=f0		} | 
|  | 134 | { .mfi;	add		r9=96*8+8,r9 | 
|  | 135 | mov		f31=f0 | 
|  | 136 | mov		pr=r8,0x1ffff	} | 
|  | 137 | { .mib;	mov		r8=sp | 
|  | 138 | mov		ar.lc=r3 | 
|  | 139 | br.ret.sptk	b0		};; | 
|  | 140 | .endp	OPENSSL_wipe_cpu# | 
|  | 141 |  | 
|  | 142 | .global	OPENSSL_cleanse# | 
|  | 143 | .proc	OPENSSL_cleanse# | 
|  | 144 | OPENSSL_cleanse: | 
|  | 145 | { .mib;	cmp.eq		p6,p0=0,r33	    // len==0 | 
|  | 146 | ADDP		r32=0,r32 | 
|  | 147 | (p6)	br.ret.spnt	b0		};; | 
|  | 148 | { .mib;	and		r2=7,r32 | 
|  | 149 | cmp.leu		p6,p0=15,r33	    // len>=15 | 
|  | 150 | (p6)	br.cond.dptk	.Lot		};; | 
|  | 151 |  | 
|  | 152 | .Little: | 
|  | 153 | { .mib;	st1		[r32]=r0,1 | 
|  | 154 | cmp.ltu		p6,p7=1,r33	}  // len>1 | 
|  | 155 | { .mbb;	add		r33=-1,r33	   // len-- | 
|  | 156 | (p6)	br.cond.dptk	.Little | 
|  | 157 | (p7)	br.ret.sptk.many	b0	};; | 
|  | 158 |  | 
|  | 159 | .Lot: | 
|  | 160 | { .mib;	cmp.eq		p6,p0=0,r2 | 
|  | 161 | (p6)	br.cond.dptk	.Laligned	};; | 
|  | 162 | { .mmi;	st1		[r32]=r0,1;; | 
|  | 163 | and		r2=7,r32	} | 
|  | 164 | { .mib;	add		r33=-1,r33 | 
|  | 165 | br		.Lot		};; | 
|  | 166 |  | 
|  | 167 | .Laligned: | 
|  | 168 | { .mmi;	st8		[r32]=r0,8 | 
|  | 169 | and		r2=-8,r33	    // len&~7 | 
|  | 170 | add		r33=-8,r33	};; // len-=8 | 
|  | 171 | { .mib;	cmp.ltu		p6,p0=8,r2	    // ((len+8)&~7)>8 | 
|  | 172 | (p6)	br.cond.dptk	.Laligned	};; | 
|  | 173 |  | 
|  | 174 | { .mbb;	cmp.eq		p6,p7=r0,r33 | 
|  | 175 | (p7)	br.cond.dpnt	.Little | 
|  | 176 | (p6)	br.ret.sptk.many	b0	};; | 
|  | 177 | .endp	OPENSSL_cleanse# | 
|  | 178 |  | 
|  | 179 | .global	CRYPTO_memcmp# | 
|  | 180 | .proc	CRYPTO_memcmp# | 
|  | 181 | .align	32 | 
|  | 182 | .skip	16 | 
|  | 183 | CRYPTO_memcmp: | 
|  | 184 | .prologue | 
|  | 185 | { .mib;	mov		r8=0 | 
|  | 186 | cmp.eq		p6,p0=0,r34	    // len==0? | 
|  | 187 | (p6)	br.ret.spnt	b0		};; | 
|  | 188 | .save		ar.pfs,r2 | 
|  | 189 | { .mib;	alloc		r2=ar.pfs,3,5,0,8 | 
|  | 190 | .save		ar.lc,r3 | 
|  | 191 | mov		r3=ar.lc | 
|  | 192 | brp.loop.imp	.Loop_cmp_ctop,.Loop_cmp_cend-16 | 
|  | 193 | } | 
|  | 194 | { .mib;	sub		r10=r34,r0,1 | 
|  | 195 | .save		pr,r9 | 
|  | 196 | mov		r9=pr		};; | 
|  | 197 | { .mii;	ADDP		r16=0,r32 | 
|  | 198 | mov		ar.lc=r10 | 
|  | 199 | mov		ar.ec=4		} | 
|  | 200 | { .mib;	ADDP		r17=0,r33 | 
|  | 201 | mov		pr.rot=1<<16	};; | 
|  | 202 |  | 
|  | 203 | .Loop_cmp_ctop: | 
|  | 204 | { .mib;	(p16)	ld1	r32=[r16],1 | 
|  | 205 | (p18)	xor	r34=r34,r38	} | 
|  | 206 | { .mib;	(p16)	ld1	r36=[r17],1 | 
|  | 207 | (p19)	or	r8=r8,r35 | 
|  | 208 | br.ctop.sptk	.Loop_cmp_ctop	};; | 
|  | 209 | .Loop_cmp_cend: | 
|  | 210 |  | 
|  | 211 | { .mib;	cmp.ne		p6,p0=0,r8 | 
|  | 212 | mov		ar.lc=r3	};; | 
|  | 213 | { .mib; | 
|  | 214 | (p6)	mov		r8=1 | 
|  | 215 | mov		pr=r9,0x1ffff | 
|  | 216 | br.ret.sptk.many	b0	};; | 
|  | 217 | .endp	CRYPTO_memcmp# | 
|  | 218 |  | 
|  | 219 | .global	OPENSSL_instrument_bus# | 
|  | 220 | .proc	OPENSSL_instrument_bus# | 
|  | 221 | OPENSSL_instrument_bus: | 
|  | 222 | { .mmi;	mov		r2=r33 | 
|  | 223 | ADDP		r32=0,r32	} | 
|  | 224 | { .mmi;	mov		r8=ar.itc;; | 
|  | 225 | mov		r10=r0 | 
|  | 226 | mov		r9=r8		};; | 
|  | 227 |  | 
|  | 228 | { .mmi;	fc		r32;; | 
|  | 229 | ld4		r8=[r32]	};; | 
|  | 230 | { .mmi;	mf | 
|  | 231 | mov		ar.ccv=r8 | 
|  | 232 | add		r8=r8,r10	};; | 
|  | 233 | { .mmi;	cmpxchg4.acq	r3=[r32],r8,ar.ccv | 
|  | 234 | };; | 
|  | 235 | .Loop: | 
|  | 236 | { .mmi;	mov		r8=ar.itc;; | 
|  | 237 | sub		r10=r8,r9		// diff=tick-lasttick | 
|  | 238 | mov		r9=r8		};;	// lasttick=tick | 
|  | 239 | { .mmi;	fc		r32;; | 
|  | 240 | ld4		r8=[r32]	};; | 
|  | 241 | { .mmi;	mf | 
|  | 242 | mov		ar.ccv=r8 | 
|  | 243 | add		r8=r8,r10	};; | 
|  | 244 | { .mmi;	cmpxchg4.acq	r3=[r32],r8,ar.ccv | 
|  | 245 | add		r33=-1,r33 | 
|  | 246 | add		r32=4,r32	};; | 
|  | 247 | { .mib;	cmp4.ne		p6,p0=0,r33 | 
|  | 248 | (p6)	br.cond.dptk	.Loop		};; | 
|  | 249 |  | 
|  | 250 | { .mib;	sub		r8=r2,r33 | 
|  | 251 | br.ret.sptk.many	b0	};; | 
|  | 252 | .endp	OPENSSL_instrument_bus# | 
|  | 253 |  | 
|  | 254 | .global	OPENSSL_instrument_bus2# | 
|  | 255 | .proc	OPENSSL_instrument_bus2# | 
|  | 256 | OPENSSL_instrument_bus2: | 
|  | 257 | { .mmi;	mov		r2=r33			// put aside cnt | 
|  | 258 | ADDP		r32=0,r32	} | 
|  | 259 | { .mmi;	mov		r8=ar.itc;; | 
|  | 260 | mov		r10=r0 | 
|  | 261 | mov		r9=r8		};; | 
|  | 262 |  | 
|  | 263 | { .mmi;	fc		r32;; | 
|  | 264 | ld4		r8=[r32]	};; | 
|  | 265 | { .mmi;	mf | 
|  | 266 | mov		ar.ccv=r8 | 
|  | 267 | add		r8=r8,r10	};; | 
|  | 268 | { .mmi;	cmpxchg4.acq	r3=[r32],r8,ar.ccv | 
|  | 269 | };; | 
|  | 270 |  | 
|  | 271 | { .mmi;	mov		r8=ar.itc;; | 
|  | 272 | sub		r10=r8,r9 | 
|  | 273 | mov		r9=r8		};; | 
|  | 274 | .Loop2: | 
|  | 275 | { .mmi;	mov		r11=r10			// lastdiff=diff | 
|  | 276 | add		r34=-1,r34	};;	// --max | 
|  | 277 | { .mmi;	fc		r32;; | 
|  | 278 | ld4		r8=[r32] | 
|  | 279 | cmp4.eq		p6,p0=0,r34	};; | 
|  | 280 | { .mmi;	mf | 
|  | 281 | mov		ar.ccv=r8 | 
|  | 282 | add		r8=r8,r10	};; | 
|  | 283 | { .mmb;	cmpxchg4.acq	r3=[r32],r8,ar.ccv | 
|  | 284 | (p6)	br.cond.spnt	.Ldone2		};; | 
|  | 285 |  | 
|  | 286 | { .mmi;	mov		r8=ar.itc;; | 
|  | 287 | sub		r10=r8,r9		// diff=tick-lasttick | 
|  | 288 | mov		r9=r8		};;	// lasttick=tick | 
|  | 289 | { .mmi;	cmp.ne		p6,p0=r10,r11;;		// diff!=lastdiff | 
|  | 290 | (p6)	add		r33=-1,r33	};;	// conditional --cnt | 
|  | 291 | { .mib;	cmp4.ne		p7,p0=0,r33 | 
|  | 292 | (p6)	add		r32=4,r32		// conditional ++out | 
|  | 293 | (p7)	br.cond.dptk	.Loop2		};; | 
|  | 294 | .Ldone2: | 
|  | 295 | { .mib;	sub		r8=r2,r33 | 
|  | 296 | br.ret.sptk.many	b0	};; | 
|  | 297 | .endp	OPENSSL_instrument_bus2# |