| /* memset.S: optimised assembly memset |
| * |
| * Copyright (C) 2003, 2004 Red Hat, Inc. All Rights Reserved. |
| * Written by David Howells (dhowells@redhat.com) |
| * |
| * This library is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Library General Public |
| * License as published by the Free Software Foundation; either |
| * version 2 of the License, or (at your option) any later version. |
| * |
| * This library is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Library General Public License for more details. |
| * |
| * You should have received a copy of the GNU Library General Public |
| * License along with this library; if not, write to the Free |
| * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
| */ |
| |
| #include <features.h> |
| |
| .text |
| .p2align 4 |
| |
| ############################################################################### |
| # |
| # void *memset(void *p, char ch, size_t count) |
| # |
| # - NOTE: must not use any stack. exception detection performs function return |
| # to caller's fixup routine, aborting the remainder of the set |
| # GR4, GR7, GR8, and GR11 must be managed |
| # |
| ############################################################################### |
| .globl memset |
| .type memset,@function |
| memset: |
| orcc.p gr10,gr0,gr5,icc3 ; GR5 = count |
| andi gr9,#0xff,gr9 |
| or.p gr8,gr0,gr4 ; GR4 = address |
| beqlr icc3,#0 |
| |
| # conditionally write a byte to 2b-align the address |
| setlos.p #1,gr6 |
| andicc gr4,#1,gr0,icc0 |
| ckne icc0,cc7 |
| cstb.p gr9,@(gr4,gr0) ,cc7,#1 |
| csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 |
| cadd.p gr4,gr6,gr4 ,cc7,#1 |
| beqlr icc3,#0 |
| |
| # conditionally write a word to 4b-align the address |
| andicc.p gr4,#2,gr0,icc0 |
| subicc gr5,#2,gr0,icc1 |
| setlos.p #2,gr6 |
| ckne icc0,cc7 |
| slli.p gr9,#8,gr12 ; need to double up the pattern |
| cknc icc1,cc5 |
| or.p gr9,gr12,gr12 |
| andcr cc7,cc5,cc7 |
| |
| csth.p gr12,@(gr4,gr0) ,cc7,#1 |
| csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 |
| cadd.p gr4,gr6,gr4 ,cc7,#1 |
| beqlr icc3,#0 |
| |
| # conditionally write a dword to 8b-align the address |
| andicc.p gr4,#4,gr0,icc0 |
| subicc gr5,#4,gr0,icc1 |
| setlos.p #4,gr6 |
| ckne icc0,cc7 |
| slli.p gr12,#16,gr13 ; need to quadruple-up the pattern |
| cknc icc1,cc5 |
| or.p gr13,gr12,gr12 |
| andcr cc7,cc5,cc7 |
| |
| cst.p gr12,@(gr4,gr0) ,cc7,#1 |
| csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 |
| cadd.p gr4,gr6,gr4 ,cc7,#1 |
| beqlr icc3,#0 |
| |
| or.p gr12,gr12,gr13 ; need to octuple-up the pattern |
| |
| # the address is now 8b-aligned - loop around writing 64b chunks |
| setlos #8,gr7 |
| subi.p gr4,#8,gr4 ; store with update index does weird stuff |
| setlos #64,gr6 |
| |
| subicc gr5,#64,gr0,icc0 |
| 0: cknc icc0,cc7 |
| cstdu gr12,@(gr4,gr7) ,cc7,#1 |
| cstdu gr12,@(gr4,gr7) ,cc7,#1 |
| cstdu gr12,@(gr4,gr7) ,cc7,#1 |
| cstdu gr12,@(gr4,gr7) ,cc7,#1 |
| cstdu gr12,@(gr4,gr7) ,cc7,#1 |
| cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 |
| cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| subicc gr5,#64,gr0,icc0 |
| cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| beqlr icc3,#0 |
| bnc icc0,#2,0b |
| |
| # now do 32-byte remnant |
| subicc.p gr5,#32,gr0,icc0 |
| setlos #32,gr6 |
| cknc icc0,cc7 |
| cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 |
| cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| setlos #16,gr6 |
| cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| subicc gr5,#16,gr0,icc0 |
| cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| beqlr icc3,#0 |
| |
| # now do 16-byte remnant |
| cknc icc0,cc7 |
| cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 |
| cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| beqlr icc3,#0 |
| |
| # now do 8-byte remnant |
| subicc gr5,#8,gr0,icc1 |
| cknc icc1,cc7 |
| cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 |
| setlos.p #4,gr7 |
| beqlr icc3,#0 |
| |
| # now do 4-byte remnant |
| subicc gr5,#4,gr0,icc0 |
| addi.p gr4,#4,gr4 |
| cknc icc0,cc7 |
| cstu.p gr12,@(gr4,gr7) ,cc7,#1 |
| csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 |
| subicc.p gr5,#2,gr0,icc1 |
| beqlr icc3,#0 |
| |
| # now do 2-byte remnant |
| setlos #2,gr7 |
| addi.p gr4,#2,gr4 |
| cknc icc1,cc7 |
| csthu.p gr12,@(gr4,gr7) ,cc7,#1 |
| csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 |
| subicc.p gr5,#1,gr0,icc0 |
| beqlr icc3,#0 |
| |
| # now do 1-byte remnant |
| setlos #0,gr7 |
| addi.p gr4,#2,gr4 |
| cknc icc0,cc7 |
| cstb.p gr12,@(gr4,gr0) ,cc7,#1 |
| bralr |
| .size memset, .-memset |
| |
| libc_hidden_def(memset) |