| lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame] | 1 | #! /usr/bin/env perl | 
 | 2 | # Copyright 2005-2020 The OpenSSL Project Authors. All Rights Reserved. | 
 | 3 | # | 
 | 4 | # Licensed under the OpenSSL license (the "License").  You may not use | 
 | 5 | # this file except in compliance with the License.  You can obtain a copy | 
 | 6 | # in the file LICENSE in the source distribution or at | 
 | 7 | # https://www.openssl.org/source/license.html | 
 | 8 |  | 
 | 9 |  | 
 | 10 | $flavour = shift; | 
 | 11 | $output  = shift; | 
 | 12 | if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } | 
 | 13 |  | 
 | 14 | $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); | 
 | 15 |  | 
 | 16 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | 
 | 17 | ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or | 
 | 18 | ( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or | 
 | 19 | die "can't locate x86_64-xlate.pl"; | 
 | 20 |  | 
 | 21 | open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; | 
 | 22 | *STDOUT=*OUT; | 
 | 23 |  | 
 | 24 | ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") :	# Win64 order | 
 | 25 | 				 ("%rdi","%rsi","%rdx","%rcx");	# Unix order | 
 | 26 |  | 
 | 27 | print<<___; | 
 | 28 | .extern		OPENSSL_cpuid_setup | 
 | 29 | .hidden		OPENSSL_cpuid_setup | 
 | 30 | .section	.init | 
 | 31 | 	call	OPENSSL_cpuid_setup | 
 | 32 |  | 
 | 33 | .hidden	OPENSSL_ia32cap_P | 
 | 34 | .comm	OPENSSL_ia32cap_P,16,4 | 
 | 35 |  | 
 | 36 | .text | 
 | 37 |  | 
 | 38 | .globl	OPENSSL_atomic_add | 
 | 39 | .type	OPENSSL_atomic_add,\@abi-omnipotent | 
 | 40 | .align	16 | 
 | 41 | OPENSSL_atomic_add: | 
 | 42 | .cfi_startproc | 
 | 43 | 	movl	($arg1),%eax | 
 | 44 | .Lspin:	leaq	($arg2,%rax),%r8 | 
 | 45 | 	.byte	0xf0		# lock | 
 | 46 | 	cmpxchgl	%r8d,($arg1) | 
 | 47 | 	jne	.Lspin | 
 | 48 | 	movl	%r8d,%eax | 
 | 49 | 	.byte	0x48,0x98	# cltq/cdqe | 
 | 50 | 	ret | 
 | 51 | .cfi_endproc | 
 | 52 | .size	OPENSSL_atomic_add,.-OPENSSL_atomic_add | 
 | 53 |  | 
 | 54 | .globl	OPENSSL_rdtsc | 
 | 55 | .type	OPENSSL_rdtsc,\@abi-omnipotent | 
 | 56 | .align	16 | 
 | 57 | OPENSSL_rdtsc: | 
 | 58 | .cfi_startproc | 
 | 59 | 	rdtsc | 
 | 60 | 	shl	\$32,%rdx | 
 | 61 | 	or	%rdx,%rax | 
 | 62 | 	ret | 
 | 63 | .cfi_endproc | 
 | 64 | .size	OPENSSL_rdtsc,.-OPENSSL_rdtsc | 
 | 65 |  | 
 | 66 | .globl	OPENSSL_ia32_cpuid | 
 | 67 | .type	OPENSSL_ia32_cpuid,\@function,1 | 
 | 68 | .align	16 | 
 | 69 | OPENSSL_ia32_cpuid: | 
 | 70 | .cfi_startproc | 
 | 71 | 	mov	%rbx,%r8		# save %rbx | 
 | 72 | .cfi_register	%rbx,%r8 | 
 | 73 |  | 
 | 74 | 	xor	%eax,%eax | 
 | 75 | 	mov	%rax,8(%rdi)		# clear extended feature flags | 
 | 76 | 	cpuid | 
 | 77 | 	mov	%eax,%r11d		# max value for standard query level | 
 | 78 |  | 
 | 79 | 	xor	%eax,%eax | 
 | 80 | 	cmp	\$0x756e6547,%ebx	# "Genu" | 
 | 81 | 	setne	%al | 
 | 82 | 	mov	%eax,%r9d | 
 | 83 | 	cmp	\$0x49656e69,%edx	# "ineI" | 
 | 84 | 	setne	%al | 
 | 85 | 	or	%eax,%r9d | 
 | 86 | 	cmp	\$0x6c65746e,%ecx	# "ntel" | 
 | 87 | 	setne	%al | 
 | 88 | 	or	%eax,%r9d		# 0 indicates Intel CPU | 
 | 89 | 	jz	.Lintel | 
 | 90 |  | 
 | 91 | 	cmp	\$0x68747541,%ebx	# "Auth" | 
 | 92 | 	setne	%al | 
 | 93 | 	mov	%eax,%r10d | 
 | 94 | 	cmp	\$0x69746E65,%edx	# "enti" | 
 | 95 | 	setne	%al | 
 | 96 | 	or	%eax,%r10d | 
 | 97 | 	cmp	\$0x444D4163,%ecx	# "cAMD" | 
 | 98 | 	setne	%al | 
 | 99 | 	or	%eax,%r10d		# 0 indicates AMD CPU | 
 | 100 | 	jnz	.Lintel | 
 | 101 |  | 
 | 102 | 	# AMD specific | 
 | 103 | 	mov	\$0x80000000,%eax | 
 | 104 | 	cpuid | 
 | 105 | 	cmp	\$0x80000001,%eax | 
 | 106 | 	jb	.Lintel | 
 | 107 | 	mov	%eax,%r10d | 
 | 108 | 	mov	\$0x80000001,%eax | 
 | 109 | 	cpuid | 
 | 110 | 	or	%ecx,%r9d | 
 | 111 | 	and	\$0x00000801,%r9d	# isolate AMD XOP bit, 1<<11 | 
 | 112 |  | 
 | 113 | 	cmp	\$0x80000008,%r10d | 
 | 114 | 	jb	.Lintel | 
 | 115 |  | 
 | 116 | 	mov	\$0x80000008,%eax | 
 | 117 | 	cpuid | 
 | 118 | 	movzb	%cl,%r10		# number of cores - 1 | 
 | 119 | 	inc	%r10			# number of cores | 
 | 120 |  | 
 | 121 | 	mov	\$1,%eax | 
 | 122 | 	cpuid | 
 | 123 | 	bt	\$28,%edx		# test hyper-threading bit | 
 | 124 | 	jnc	.Lgeneric | 
 | 125 | 	shr	\$16,%ebx		# number of logical processors | 
 | 126 | 	cmp	%r10b,%bl | 
 | 127 | 	ja	.Lgeneric | 
 | 128 | 	and	\$0xefffffff,%edx	# ~(1<<28) | 
 | 129 | 	jmp	.Lgeneric | 
 | 130 |  | 
 | 131 | .Lintel: | 
 | 132 | 	cmp	\$4,%r11d | 
 | 133 | 	mov	\$-1,%r10d | 
 | 134 | 	jb	.Lnocacheinfo | 
 | 135 |  | 
 | 136 | 	mov	\$4,%eax | 
 | 137 | 	mov	\$0,%ecx		# query L1D | 
 | 138 | 	cpuid | 
 | 139 | 	mov	%eax,%r10d | 
 | 140 | 	shr	\$14,%r10d | 
 | 141 | 	and	\$0xfff,%r10d		# number of cores -1 per L1D | 
 | 142 |  | 
 | 143 | .Lnocacheinfo: | 
 | 144 | 	mov	\$1,%eax | 
 | 145 | 	cpuid | 
 | 146 | 	movd	%eax,%xmm0		# put aside processor id | 
 | 147 | 	and	\$0xbfefffff,%edx	# force reserved bits to 0 | 
 | 148 | 	cmp	\$0,%r9d | 
 | 149 | 	jne	.Lnotintel | 
 | 150 | 	or	\$0x40000000,%edx	# set reserved bit#30 on Intel CPUs | 
 | 151 | 	and	\$15,%ah | 
 | 152 | 	cmp	\$15,%ah		# examine Family ID | 
 | 153 | 	jne	.LnotP4 | 
 | 154 | 	or	\$0x00100000,%edx	# set reserved bit#20 to engage RC4_CHAR | 
 | 155 | .LnotP4: | 
 | 156 | 	cmp	\$6,%ah | 
 | 157 | 	jne	.Lnotintel | 
 | 158 | 	and	\$0x0fff0ff0,%eax | 
 | 159 | 	cmp	\$0x00050670,%eax	# Knights Landing | 
 | 160 | 	je	.Lknights | 
 | 161 | 	cmp	\$0x00080650,%eax	# Knights Mill (according to sde) | 
 | 162 | 	jne	.Lnotintel | 
 | 163 | .Lknights: | 
 | 164 | 	and	\$0xfbffffff,%ecx	# clear XSAVE flag to mimic Silvermont | 
 | 165 |  | 
 | 166 | .Lnotintel: | 
 | 167 | 	bt	\$28,%edx		# test hyper-threading bit | 
 | 168 | 	jnc	.Lgeneric | 
 | 169 | 	and	\$0xefffffff,%edx	# ~(1<<28) | 
 | 170 | 	cmp	\$0,%r10d | 
 | 171 | 	je	.Lgeneric | 
 | 172 |  | 
 | 173 | 	or	\$0x10000000,%edx	# 1<<28 | 
 | 174 | 	shr	\$16,%ebx | 
 | 175 | 	cmp	\$1,%bl			# see if cache is shared | 
 | 176 | 	ja	.Lgeneric | 
 | 177 | 	and	\$0xefffffff,%edx	# ~(1<<28) | 
 | 178 | .Lgeneric: | 
 | 179 | 	and	\$0x00000800,%r9d	# isolate AMD XOP flag | 
 | 180 | 	and	\$0xfffff7ff,%ecx | 
 | 181 | 	or	%ecx,%r9d		# merge AMD XOP flag | 
 | 182 |  | 
 | 183 | 	mov	%edx,%r10d		# %r9d:%r10d is copy of %ecx:%edx | 
 | 184 |  | 
 | 185 | 	cmp	\$7,%r11d | 
 | 186 | 	jb	.Lno_extended_info | 
 | 187 | 	mov	\$7,%eax | 
 | 188 | 	xor	%ecx,%ecx | 
 | 189 | 	cpuid | 
 | 190 | 	bt	\$26,%r9d		# check XSAVE bit, cleared on Knights | 
 | 191 | 	jc	.Lnotknights | 
 | 192 | 	and	\$0xfff7ffff,%ebx	# clear ADCX/ADOX flag | 
 | 193 | .Lnotknights: | 
 | 194 | 	movd	%xmm0,%eax		# restore processor id | 
 | 195 | 	and	\$0x0fff0ff0,%eax | 
 | 196 | 	cmp	\$0x00050650,%eax	# Skylake-X | 
 | 197 | 	jne	.Lnotskylakex | 
 | 198 | 	and	\$0xfffeffff,%ebx	# ~(1<<16) | 
 | 199 | 					# suppress AVX512F flag on Skylake-X | 
 | 200 | .Lnotskylakex: | 
 | 201 | 	mov	%ebx,8(%rdi)		# save extended feature flags | 
 | 202 | 	mov	%ecx,12(%rdi) | 
 | 203 | .Lno_extended_info: | 
 | 204 |  | 
 | 205 | 	bt	\$27,%r9d		# check OSXSAVE bit | 
 | 206 | 	jnc	.Lclear_avx | 
 | 207 | 	xor	%ecx,%ecx		# XCR0 | 
 | 208 | 	.byte	0x0f,0x01,0xd0		# xgetbv | 
 | 209 | 	and	\$0xe6,%eax		# isolate XMM, YMM and ZMM state support | 
 | 210 | 	cmp	\$0xe6,%eax | 
 | 211 | 	je	.Ldone | 
 | 212 | 	andl	\$0x3fdeffff,8(%rdi)	# ~(1<<31|1<<30|1<<21|1<<16) | 
 | 213 | 					# clear AVX512F+BW+VL+FIMA, all of | 
 | 214 | 					# them are EVEX-encoded, which requires | 
 | 215 | 					# ZMM state support even if one uses | 
 | 216 | 					# only XMM and YMM :-( | 
 | 217 | 	and	\$6,%eax		# isolate XMM and YMM state support | 
 | 218 | 	cmp	\$6,%eax | 
 | 219 | 	je	.Ldone | 
 | 220 | .Lclear_avx: | 
 | 221 | 	mov	\$0xefffe7ff,%eax	# ~(1<<28|1<<12|1<<11) | 
 | 222 | 	and	%eax,%r9d		# clear AVX, FMA and AMD XOP bits | 
 | 223 | 	mov	\$0x3fdeffdf,%eax	# ~(1<<31|1<<30|1<<21|1<<16|1<<5) | 
 | 224 | 	and	%eax,8(%rdi)		# clear AVX2 and AVX512* bits | 
 | 225 | .Ldone: | 
 | 226 | 	shl	\$32,%r9 | 
 | 227 | 	mov	%r10d,%eax | 
 | 228 | 	mov	%r8,%rbx		# restore %rbx | 
 | 229 | .cfi_restore	%rbx | 
 | 230 | 	or	%r9,%rax | 
 | 231 | 	ret | 
 | 232 | .cfi_endproc | 
 | 233 | .size	OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid | 
 | 234 |  | 
 | 235 | .globl  OPENSSL_cleanse | 
 | 236 | .type   OPENSSL_cleanse,\@abi-omnipotent | 
 | 237 | .align  16 | 
 | 238 | OPENSSL_cleanse: | 
 | 239 | .cfi_startproc | 
 | 240 | 	xor	%rax,%rax | 
 | 241 | 	cmp	\$15,$arg2 | 
 | 242 | 	jae	.Lot | 
 | 243 | 	cmp	\$0,$arg2 | 
 | 244 | 	je	.Lret | 
 | 245 | .Little: | 
 | 246 | 	mov	%al,($arg1) | 
 | 247 | 	sub	\$1,$arg2 | 
 | 248 | 	lea	1($arg1),$arg1 | 
 | 249 | 	jnz	.Little | 
 | 250 | .Lret: | 
 | 251 | 	ret | 
 | 252 | .align	16 | 
 | 253 | .Lot: | 
 | 254 | 	test	\$7,$arg1 | 
 | 255 | 	jz	.Laligned | 
 | 256 | 	mov	%al,($arg1) | 
 | 257 | 	lea	-1($arg2),$arg2 | 
 | 258 | 	lea	1($arg1),$arg1 | 
 | 259 | 	jmp	.Lot | 
 | 260 | .Laligned: | 
 | 261 | 	mov	%rax,($arg1) | 
 | 262 | 	lea	-8($arg2),$arg2 | 
 | 263 | 	test	\$-8,$arg2 | 
 | 264 | 	lea	8($arg1),$arg1 | 
 | 265 | 	jnz	.Laligned | 
 | 266 | 	cmp	\$0,$arg2 | 
 | 267 | 	jne	.Little | 
 | 268 | 	ret | 
 | 269 | .cfi_endproc | 
 | 270 | .size	OPENSSL_cleanse,.-OPENSSL_cleanse | 
 | 271 |  | 
 | 272 | .globl  CRYPTO_memcmp | 
 | 273 | .type   CRYPTO_memcmp,\@abi-omnipotent | 
 | 274 | .align  16 | 
 | 275 | CRYPTO_memcmp: | 
 | 276 | .cfi_startproc | 
 | 277 | 	xor	%rax,%rax | 
 | 278 | 	xor	%r10,%r10 | 
 | 279 | 	cmp	\$0,$arg3 | 
 | 280 | 	je	.Lno_data | 
 | 281 | 	cmp	\$16,$arg3 | 
 | 282 | 	jne	.Loop_cmp | 
 | 283 | 	mov	($arg1),%r10 | 
 | 284 | 	mov	8($arg1),%r11 | 
 | 285 | 	mov	\$1,$arg3 | 
 | 286 | 	xor	($arg2),%r10 | 
 | 287 | 	xor	8($arg2),%r11 | 
 | 288 | 	or	%r11,%r10 | 
 | 289 | 	cmovnz	$arg3,%rax | 
 | 290 | 	ret | 
 | 291 |  | 
 | 292 | .align	16 | 
 | 293 | .Loop_cmp: | 
 | 294 | 	mov	($arg1),%r10b | 
 | 295 | 	lea	1($arg1),$arg1 | 
 | 296 | 	xor	($arg2),%r10b | 
 | 297 | 	lea	1($arg2),$arg2 | 
 | 298 | 	or	%r10b,%al | 
 | 299 | 	dec	$arg3 | 
 | 300 | 	jnz	.Loop_cmp | 
 | 301 | 	neg	%rax | 
 | 302 | 	shr	\$63,%rax | 
 | 303 | .Lno_data: | 
 | 304 | 	ret | 
 | 305 | .cfi_endproc | 
 | 306 | .size	CRYPTO_memcmp,.-CRYPTO_memcmp | 
 | 307 | ___ | 
 | 308 |  | 
 | 309 | print<<___ if (!$win64); | 
 | 310 | .globl	OPENSSL_wipe_cpu | 
 | 311 | .type	OPENSSL_wipe_cpu,\@abi-omnipotent | 
 | 312 | .align	16 | 
 | 313 | OPENSSL_wipe_cpu: | 
 | 314 | .cfi_startproc | 
 | 315 | 	pxor	%xmm0,%xmm0 | 
 | 316 | 	pxor	%xmm1,%xmm1 | 
 | 317 | 	pxor	%xmm2,%xmm2 | 
 | 318 | 	pxor	%xmm3,%xmm3 | 
 | 319 | 	pxor	%xmm4,%xmm4 | 
 | 320 | 	pxor	%xmm5,%xmm5 | 
 | 321 | 	pxor	%xmm6,%xmm6 | 
 | 322 | 	pxor	%xmm7,%xmm7 | 
 | 323 | 	pxor	%xmm8,%xmm8 | 
 | 324 | 	pxor	%xmm9,%xmm9 | 
 | 325 | 	pxor	%xmm10,%xmm10 | 
 | 326 | 	pxor	%xmm11,%xmm11 | 
 | 327 | 	pxor	%xmm12,%xmm12 | 
 | 328 | 	pxor	%xmm13,%xmm13 | 
 | 329 | 	pxor	%xmm14,%xmm14 | 
 | 330 | 	pxor	%xmm15,%xmm15 | 
 | 331 | 	xorq	%rcx,%rcx | 
 | 332 | 	xorq	%rdx,%rdx | 
 | 333 | 	xorq	%rsi,%rsi | 
 | 334 | 	xorq	%rdi,%rdi | 
 | 335 | 	xorq	%r8,%r8 | 
 | 336 | 	xorq	%r9,%r9 | 
 | 337 | 	xorq	%r10,%r10 | 
 | 338 | 	xorq	%r11,%r11 | 
 | 339 | 	leaq	8(%rsp),%rax | 
 | 340 | 	ret | 
 | 341 | .cfi_endproc | 
 | 342 | .size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu | 
 | 343 | ___ | 
 | 344 | print<<___ if ($win64); | 
 | 345 | .globl	OPENSSL_wipe_cpu | 
 | 346 | .type	OPENSSL_wipe_cpu,\@abi-omnipotent | 
 | 347 | .align	16 | 
 | 348 | OPENSSL_wipe_cpu: | 
 | 349 | 	pxor	%xmm0,%xmm0 | 
 | 350 | 	pxor	%xmm1,%xmm1 | 
 | 351 | 	pxor	%xmm2,%xmm2 | 
 | 352 | 	pxor	%xmm3,%xmm3 | 
 | 353 | 	pxor	%xmm4,%xmm4 | 
 | 354 | 	pxor	%xmm5,%xmm5 | 
 | 355 | 	xorq	%rcx,%rcx | 
 | 356 | 	xorq	%rdx,%rdx | 
 | 357 | 	xorq	%r8,%r8 | 
 | 358 | 	xorq	%r9,%r9 | 
 | 359 | 	xorq	%r10,%r10 | 
 | 360 | 	xorq	%r11,%r11 | 
 | 361 | 	leaq	8(%rsp),%rax | 
 | 362 | 	ret | 
 | 363 | .size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu | 
 | 364 | ___ | 
 | 365 | { | 
 | 366 | my $out="%r10"; | 
 | 367 | my $cnt="%rcx"; | 
 | 368 | my $max="%r11"; | 
 | 369 | my $lasttick="%r8d"; | 
 | 370 | my $lastdiff="%r9d"; | 
 | 371 | my $redzone=win64?8:-8; | 
 | 372 |  | 
 | 373 | print<<___; | 
 | 374 | .globl	OPENSSL_instrument_bus | 
 | 375 | .type	OPENSSL_instrument_bus,\@abi-omnipotent | 
 | 376 | .align	16 | 
 | 377 | OPENSSL_instrument_bus: | 
 | 378 | .cfi_startproc | 
 | 379 | 	mov	$arg1,$out	# tribute to Win64 | 
 | 380 | 	mov	$arg2,$cnt | 
 | 381 | 	mov	$arg2,$max | 
 | 382 |  | 
 | 383 | 	rdtsc			# collect 1st tick | 
 | 384 | 	mov	%eax,$lasttick	# lasttick = tick | 
 | 385 | 	mov	\$0,$lastdiff	# lastdiff = 0 | 
 | 386 | 	clflush	($out) | 
 | 387 | 	.byte	0xf0		# lock | 
 | 388 | 	add	$lastdiff,($out) | 
 | 389 | 	jmp	.Loop | 
 | 390 | .align	16 | 
 | 391 | .Loop:	rdtsc | 
 | 392 | 	mov	%eax,%edx | 
 | 393 | 	sub	$lasttick,%eax | 
 | 394 | 	mov	%edx,$lasttick | 
 | 395 | 	mov	%eax,$lastdiff | 
 | 396 | 	clflush	($out) | 
 | 397 | 	.byte	0xf0		# lock | 
 | 398 | 	add	%eax,($out) | 
 | 399 | 	lea	4($out),$out | 
 | 400 | 	sub	\$1,$cnt | 
 | 401 | 	jnz	.Loop | 
 | 402 |  | 
 | 403 | 	mov	$max,%rax | 
 | 404 | 	ret | 
 | 405 | .cfi_endproc | 
 | 406 | .size	OPENSSL_instrument_bus,.-OPENSSL_instrument_bus | 
 | 407 |  | 
 | 408 | .globl	OPENSSL_instrument_bus2 | 
 | 409 | .type	OPENSSL_instrument_bus2,\@abi-omnipotent | 
 | 410 | .align	16 | 
 | 411 | OPENSSL_instrument_bus2: | 
 | 412 | .cfi_startproc | 
 | 413 | 	mov	$arg1,$out	# tribute to Win64 | 
 | 414 | 	mov	$arg2,$cnt | 
 | 415 | 	mov	$arg3,$max | 
 | 416 | 	mov	$cnt,$redzone(%rsp) | 
 | 417 |  | 
 | 418 | 	rdtsc			# collect 1st tick | 
 | 419 | 	mov	%eax,$lasttick	# lasttick = tick | 
 | 420 | 	mov	\$0,$lastdiff	# lastdiff = 0 | 
 | 421 |  | 
 | 422 | 	clflush	($out) | 
 | 423 | 	.byte	0xf0		# lock | 
 | 424 | 	add	$lastdiff,($out) | 
 | 425 |  | 
 | 426 | 	rdtsc			# collect 1st diff | 
 | 427 | 	mov	%eax,%edx | 
 | 428 | 	sub	$lasttick,%eax	# diff | 
 | 429 | 	mov	%edx,$lasttick	# lasttick = tick | 
 | 430 | 	mov	%eax,$lastdiff	# lastdiff = diff | 
 | 431 | .Loop2: | 
 | 432 | 	clflush	($out) | 
 | 433 | 	.byte	0xf0		# lock | 
 | 434 | 	add	%eax,($out)	# accumulate diff | 
 | 435 |  | 
 | 436 | 	sub	\$1,$max | 
 | 437 | 	jz	.Ldone2 | 
 | 438 |  | 
 | 439 | 	rdtsc | 
 | 440 | 	mov	%eax,%edx | 
 | 441 | 	sub	$lasttick,%eax	# diff | 
 | 442 | 	mov	%edx,$lasttick	# lasttick = tick | 
 | 443 | 	cmp	$lastdiff,%eax | 
 | 444 | 	mov	%eax,$lastdiff	# lastdiff = diff | 
 | 445 | 	mov	\$0,%edx | 
 | 446 | 	setne	%dl | 
 | 447 | 	sub	%rdx,$cnt	# conditional --$cnt | 
 | 448 | 	lea	($out,%rdx,4),$out	# conditional ++$out | 
 | 449 | 	jnz	.Loop2 | 
 | 450 |  | 
 | 451 | .Ldone2: | 
 | 452 | 	mov	$redzone(%rsp),%rax | 
 | 453 | 	sub	$cnt,%rax | 
 | 454 | 	ret | 
 | 455 | .cfi_endproc | 
 | 456 | .size	OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2 | 
 | 457 | ___ | 
 | 458 | } | 
 | 459 |  | 
 | 460 | sub gen_random { | 
 | 461 | my $rdop = shift; | 
 | 462 | print<<___; | 
 | 463 | .globl	OPENSSL_ia32_${rdop}_bytes | 
 | 464 | .type	OPENSSL_ia32_${rdop}_bytes,\@abi-omnipotent | 
 | 465 | .align	16 | 
 | 466 | OPENSSL_ia32_${rdop}_bytes: | 
 | 467 | .cfi_startproc | 
 | 468 | 	xor	%rax, %rax	# return value | 
 | 469 | 	cmp	\$0,$arg2 | 
 | 470 | 	je	.Ldone_${rdop}_bytes | 
 | 471 |  | 
 | 472 | 	mov	\$8,%r11 | 
 | 473 | .Loop_${rdop}_bytes: | 
 | 474 | 	${rdop}	%r10 | 
 | 475 | 	jc	.Lbreak_${rdop}_bytes | 
 | 476 | 	dec	%r11 | 
 | 477 | 	jnz	.Loop_${rdop}_bytes | 
 | 478 | 	jmp	.Ldone_${rdop}_bytes | 
 | 479 |  | 
 | 480 | .align	16 | 
 | 481 | .Lbreak_${rdop}_bytes: | 
 | 482 | 	cmp	\$8,$arg2 | 
 | 483 | 	jb	.Ltail_${rdop}_bytes | 
 | 484 | 	mov	%r10,($arg1) | 
 | 485 | 	lea	8($arg1),$arg1 | 
 | 486 | 	add	\$8,%rax | 
 | 487 | 	sub	\$8,$arg2 | 
 | 488 | 	jz	.Ldone_${rdop}_bytes | 
 | 489 | 	mov	\$8,%r11 | 
 | 490 | 	jmp	.Loop_${rdop}_bytes | 
 | 491 |  | 
 | 492 | .align	16 | 
 | 493 | .Ltail_${rdop}_bytes: | 
 | 494 | 	mov	%r10b,($arg1) | 
 | 495 | 	lea	1($arg1),$arg1 | 
 | 496 | 	inc	%rax | 
 | 497 | 	shr	\$8,%r10 | 
 | 498 | 	dec	$arg2 | 
 | 499 | 	jnz	.Ltail_${rdop}_bytes | 
 | 500 |  | 
 | 501 | .Ldone_${rdop}_bytes: | 
 | 502 | 	xor	%r10,%r10	# Clear sensitive data from register | 
 | 503 | 	ret | 
 | 504 | .cfi_endproc | 
 | 505 | .size	OPENSSL_ia32_${rdop}_bytes,.-OPENSSL_ia32_${rdop}_bytes | 
 | 506 | ___ | 
 | 507 | } | 
 | 508 | gen_random("rdrand"); | 
 | 509 | gen_random("rdseed"); | 
 | 510 |  | 
 | 511 | close STDOUT or die "error closing STDOUT: $!";	# flush |