| xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | /* Copyright (C) 1996-2016 Free Software Foundation, Inc. | 
|  | 2 | Contributed by Richard Henderson (rth@tamu.edu) | 
|  | 3 | This file is part of the GNU C Library. | 
|  | 4 |  | 
|  | 5 | The GNU C Library is free software; you can redistribute it and/or | 
|  | 6 | modify it under the terms of the GNU Lesser General Public | 
|  | 7 | License as published by the Free Software Foundation; either | 
|  | 8 | version 2.1 of the License, or (at your option) any later version. | 
|  | 9 |  | 
|  | 10 | The GNU C Library is distributed in the hope that it will be useful, | 
|  | 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|  | 13 | Lesser General Public License for more details. | 
|  | 14 |  | 
|  | 15 | You should have received a copy of the GNU Lesser General Public | 
|  | 16 | License along with the GNU C Library.  If not, see | 
|  | 17 | <http://www.gnu.org/licenses/>.  */ | 
|  | 18 |  | 
|  | 19 | /* Fill a block of memory with zeros.  Optimized for the Alpha architecture: | 
|  | 20 |  | 
|  | 21 | - memory accessed as aligned quadwords only | 
|  | 22 | - destination memory not read unless needed for good cache behaviour | 
|  | 23 | - basic blocks arranged to optimize branch prediction for full-quadword | 
|  | 24 | aligned memory blocks. | 
|  | 25 | - partial head and tail quadwords constructed with byte-mask instructions | 
|  | 26 |  | 
|  | 27 | This is generally scheduled for the EV5 (got to look out for my own | 
|  | 28 | interests :-), but with EV4 needs in mind.  There *should* be no more | 
|  | 29 | stalls for the EV4 than there are for the EV5. | 
|  | 30 | */ | 
|  | 31 |  | 
|  | 32 |  | 
|  | 33 | #include <sysdep.h> | 
|  | 34 |  | 
|  | 35 | .set noat | 
|  | 36 | .set noreorder | 
|  | 37 |  | 
|  | 38 | .text | 
|  | 39 | .type	__bzero, @function | 
|  | 40 | .globl	__bzero | 
|  | 41 | .usepv	__bzero, USEPV_PROF | 
|  | 42 |  | 
|  | 43 | cfi_startproc | 
|  | 44 |  | 
|  | 45 | /* On entry to this basic block: | 
|  | 46 | t3 == loop counter | 
|  | 47 | t4 == bytes in partial final word | 
|  | 48 | a0 == possibly misaligned destination pointer  */ | 
|  | 49 |  | 
|  | 50 | .align 3 | 
|  | 51 | bzero_loop: | 
|  | 52 | beq	t3, $tail	# | 
|  | 53 | blbc	t3, 0f		# skip single store if count even | 
|  | 54 |  | 
|  | 55 | stq_u	zero, 0(a0)	# e0    : store one word | 
|  | 56 | subq	t3, 1, t3	# .. e1 : | 
|  | 57 | addq	a0, 8, a0	# e0    : | 
|  | 58 | beq	t3, $tail	# .. e1 : | 
|  | 59 |  | 
|  | 60 | 0:	stq_u	zero, 0(a0)	# e0    : store two words | 
|  | 61 | subq	t3, 2, t3	# .. e1 : | 
|  | 62 | stq_u	zero, 8(a0)	# e0    : | 
|  | 63 | addq	a0, 16, a0	# .. e1 : | 
|  | 64 | bne	t3, 0b		# e1    : | 
|  | 65 |  | 
|  | 66 | $tail:	bne	t4, 1f		# is there a tail to do? | 
|  | 67 | ret			# no | 
|  | 68 |  | 
|  | 69 | 1:	ldq_u	t0, 0(a0)	# yes, load original data | 
|  | 70 | mskqh	t0, t4, t0	# | 
|  | 71 | stq_u	t0, 0(a0)	# | 
|  | 72 | ret			# | 
|  | 73 |  | 
|  | 74 | __bzero: | 
|  | 75 | #ifdef PROF | 
|  | 76 | ldgp	gp, 0(pv) | 
|  | 77 | lda	AT, _mcount | 
|  | 78 | jsr	AT, (AT), _mcount | 
|  | 79 | #endif | 
|  | 80 |  | 
|  | 81 | mov	a0, v0		# e0    : move return value in place | 
|  | 82 | beq	a1, $done	# .. e1 : early exit for zero-length store | 
|  | 83 | and	a0, 7, t1	# e0    : | 
|  | 84 | addq	a1, t1, a1	# e1    : add dest misalignment to count | 
|  | 85 | srl	a1, 3, t3	# e0    : loop = count >> 3 | 
|  | 86 | and	a1, 7, t4	# .. e1 : find number of bytes in tail | 
|  | 87 | unop			#       : | 
|  | 88 | beq	t1, bzero_loop	# e1    : aligned head, jump right in | 
|  | 89 |  | 
|  | 90 | ldq_u	t0, 0(a0)	# e0    : load original data to mask into | 
|  | 91 | cmpult	a1, 8, t2	# .. e1 : is this a sub-word set? | 
|  | 92 | bne	t2, $oneq	# e1    : | 
|  | 93 |  | 
|  | 94 | mskql	t0, a0, t0	# e0    : we span words.  finish this partial | 
|  | 95 | subq	t3, 1, t3	# e0    : | 
|  | 96 | addq	a0, 8, a0	# .. e1 : | 
|  | 97 | stq_u	t0, -8(a0)	# e0    : | 
|  | 98 | br 	bzero_loop	# .. e1 : | 
|  | 99 |  | 
|  | 100 | .align 3 | 
|  | 101 | $oneq: | 
|  | 102 | mskql	t0, a0, t2	# e0    : | 
|  | 103 | mskqh	t0, a1, t3	# e0    : | 
|  | 104 | or	t2, t3, t0	# e1    : | 
|  | 105 | stq_u	t0, 0(a0)	# e0    : | 
|  | 106 |  | 
|  | 107 | $done:	ret | 
|  | 108 |  | 
|  | 109 | cfi_endproc | 
|  | 110 | weak_alias (__bzero, bzero) |