yuezonghe | 824eb0c | 2024-06-27 02:32:26 -0700 | [diff] [blame] | 1 | /* Set a block of memory to some byte value. |
| 2 | For UltraSPARC. |
| 3 | Copyright (C) 1996, 97, 98, 99, 2003 Free Software Foundation, Inc. |
| 4 | This file is part of the GNU C Library. |
| 5 | Contributed by David S. Miller (davem@caip.rutgers.edu) and |
| 6 | Jakub Jelinek (jj@ultra.linux.cz). |
| 7 | |
| 8 | The GNU C Library is free software; you can redistribute it and/or |
| 9 | modify it under the terms of the GNU Lesser General Public |
| 10 | License as published by the Free Software Foundation; either |
| 11 | version 2.1 of the License, or (at your option) any later version. |
| 12 | |
| 13 | The GNU C Library is distributed in the hope that it will be useful, |
| 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 16 | Lesser General Public License for more details. |
| 17 | |
| 18 | You should have received a copy of the GNU Lesser General Public |
| 19 | License along with the GNU C Library; if not, write to the Free |
| 20 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 21 | 02111-1307 USA. */ |
| 22 | |
| 23 | #include <features.h> |
| 24 | #include <asm/asi.h> |
| 25 | #ifndef XCC |
| 26 | #define XCC xcc |
| 27 | #define USE_BPR |
| 28 | #endif |
| 29 | #define FPRS_FEF 4 |
| 30 | |
| 31 | #define SET_BLOCKS(base, offset, source) \ |
| 32 | stx source, [base - offset - 0x18]; \ |
| 33 | stx source, [base - offset - 0x10]; \ |
| 34 | stx source, [base - offset - 0x08]; \ |
| 35 | stx source, [base - offset - 0x00]; |
| 36 | |
| 37 | /* Well, memset is a lot easier to get right than bcopy... */ |
| 38 | .text |
| 39 | .align 32 |
| 40 | ENTRY(memset) |
| 41 | andcc %o1, 0xff, %o1 |
| 42 | mov %o0, %o5 |
| 43 | be,a,pt %icc, 50f |
| 44 | #ifndef USE_BPR |
| 45 | srl %o2, 0, %o1 |
| 46 | #else |
| 47 | mov %o2, %o1 |
| 48 | #endif |
| 49 | cmp %o2, 7 |
| 50 | #ifndef USE_BPR |
| 51 | srl %o2, 0, %o2 |
| 52 | #endif |
| 53 | bleu,pn %XCC, 17f |
| 54 | andcc %o0, 3, %g5 |
| 55 | be,pt %xcc, 4f |
| 56 | and %o1, 0xff, %o1 |
| 57 | cmp %g5, 3 |
| 58 | be,pn %xcc, 2f |
| 59 | stb %o1, [%o0 + 0x00] |
| 60 | cmp %g5, 2 |
| 61 | be,pt %xcc, 2f |
| 62 | stb %o1, [%o0 + 0x01] |
| 63 | stb %o1, [%o0 + 0x02] |
| 64 | 2: sub %g5, 4, %g5 |
| 65 | sub %o0, %g5, %o0 |
| 66 | add %o2, %g5, %o2 |
| 67 | 4: sllx %o1, 8, %g1 |
| 68 | andcc %o0, 4, %g0 |
| 69 | or %o1, %g1, %o1 |
| 70 | sllx %o1, 16, %g1 |
| 71 | or %o1, %g1, %o1 |
| 72 | be,pt %xcc, 2f |
| 73 | sllx %o1, 32, %g1 |
| 74 | stw %o1, [%o0] |
| 75 | sub %o2, 4, %o2 |
| 76 | add %o0, 4, %o0 |
| 77 | 2: cmp %o2, 128 |
| 78 | or %o1, %g1, %o1 |
| 79 | blu,pn %xcc, 9f |
| 80 | andcc %o0, 0x38, %g5 |
| 81 | be,pn %icc, 6f |
| 82 | mov 64, %o4 |
| 83 | andcc %o0, 8, %g0 |
| 84 | be,pn %icc, 1f |
| 85 | sub %o4, %g5, %o4 |
| 86 | stx %o1, [%o0] |
| 87 | add %o0, 8, %o0 |
| 88 | 1: andcc %o4, 16, %g0 |
| 89 | be,pn %icc, 1f |
| 90 | sub %o2, %o4, %o2 |
| 91 | stx %o1, [%o0] |
| 92 | stx %o1, [%o0 + 8] |
| 93 | add %o0, 16, %o0 |
| 94 | 1: andcc %o4, 32, %g0 |
| 95 | be,pn %icc, 7f |
| 96 | andncc %o2, 0x3f, %o3 |
| 97 | stw %o1, [%o0] |
| 98 | stw %o1, [%o0 + 4] |
| 99 | stw %o1, [%o0 + 8] |
| 100 | stw %o1, [%o0 + 12] |
| 101 | stw %o1, [%o0 + 16] |
| 102 | stw %o1, [%o0 + 20] |
| 103 | stw %o1, [%o0 + 24] |
| 104 | stw %o1, [%o0 + 28] |
| 105 | add %o0, 32, %o0 |
| 106 | 7: be,pn %xcc, 9f |
| 107 | nop |
| 108 | ldd [%o0 - 8], %f0 |
| 109 | 18: wr %g0, ASI_BLK_P, %asi |
| 110 | membar #StoreStore | #LoadStore |
| 111 | andcc %o3, 0xc0, %g5 |
| 112 | and %o2, 0x3f, %o2 |
| 113 | fmovd %f0, %f2 |
| 114 | fmovd %f0, %f4 |
| 115 | andn %o3, 0xff, %o3 |
| 116 | fmovd %f0, %f6 |
| 117 | cmp %g5, 64 |
| 118 | fmovd %f0, %f8 |
| 119 | fmovd %f0, %f10 |
| 120 | fmovd %f0, %f12 |
| 121 | brz,pn %g5, 10f |
| 122 | fmovd %f0, %f14 |
| 123 | be,pn %icc, 2f |
| 124 | stda %f0, [%o0 + 0x00] %asi |
| 125 | cmp %g5, 128 |
| 126 | be,pn %icc, 2f |
| 127 | stda %f0, [%o0 + 0x40] %asi |
| 128 | stda %f0, [%o0 + 0x80] %asi |
| 129 | 2: brz,pn %o3, 12f |
| 130 | add %o0, %g5, %o0 |
| 131 | 10: stda %f0, [%o0 + 0x00] %asi |
| 132 | stda %f0, [%o0 + 0x40] %asi |
| 133 | stda %f0, [%o0 + 0x80] %asi |
| 134 | stda %f0, [%o0 + 0xc0] %asi |
| 135 | 11: subcc %o3, 256, %o3 |
| 136 | bne,pt %xcc, 10b |
| 137 | add %o0, 256, %o0 |
| 138 | 12: wr %g0, FPRS_FEF, %fprs |
| 139 | membar #StoreLoad | #StoreStore |
| 140 | 9: andcc %o2, 0x78, %g5 |
| 141 | be,pn %xcc, 13f |
| 142 | andcc %o2, 7, %o2 |
| 143 | 14: rd %pc, %o4 |
| 144 | srl %g5, 1, %o3 |
| 145 | sub %o4, %o3, %o4 |
| 146 | jmpl %o4 + (13f - 14b), %g0 |
| 147 | add %o0, %g5, %o0 |
| 148 | 12: SET_BLOCKS (%o0, 0x68, %o1) |
| 149 | SET_BLOCKS (%o0, 0x48, %o1) |
| 150 | SET_BLOCKS (%o0, 0x28, %o1) |
| 151 | SET_BLOCKS (%o0, 0x08, %o1) |
| 152 | 13: be,pn %xcc, 8f |
| 153 | andcc %o2, 4, %g0 |
| 154 | be,pn %xcc, 1f |
| 155 | andcc %o2, 2, %g0 |
| 156 | stw %o1, [%o0] |
| 157 | add %o0, 4, %o0 |
| 158 | 1: be,pn %xcc, 1f |
| 159 | andcc %o2, 1, %g0 |
| 160 | sth %o1, [%o0] |
| 161 | add %o0, 2, %o0 |
| 162 | 1: bne,a,pn %xcc, 8f |
| 163 | stb %o1, [%o0] |
| 164 | 8: retl |
| 165 | mov %o5, %o0 |
| 166 | 17: brz,pn %o2, 0f |
| 167 | 8: add %o0, 1, %o0 |
| 168 | subcc %o2, 1, %o2 |
| 169 | bne,pt %xcc, 8b |
| 170 | stb %o1, [%o0 - 1] |
| 171 | 0: retl |
| 172 | mov %o5, %o0 |
| 173 | |
| 174 | 6: stx %o1, [%o0] |
| 175 | andncc %o2, 0x3f, %o3 |
| 176 | be,pn %xcc, 9b |
| 177 | nop |
| 178 | ba,pt %xcc, 18b |
| 179 | ldd [%o0], %f0 |
| 180 | END(memset) |
| 181 | libc_hidden_def(memset) |
| 182 | |
| 183 | #define ZERO_BLOCKS(base, offset, source) \ |
| 184 | stx source, [base - offset - 0x38]; \ |
| 185 | stx source, [base - offset - 0x30]; \ |
| 186 | stx source, [base - offset - 0x28]; \ |
| 187 | stx source, [base - offset - 0x20]; \ |
| 188 | stx source, [base - offset - 0x18]; \ |
| 189 | stx source, [base - offset - 0x10]; \ |
| 190 | stx source, [base - offset - 0x08]; \ |
| 191 | stx source, [base - offset - 0x00]; |
| 192 | |
| 193 | .text |
| 194 | .align 32 |
| 195 | #ifdef __UCLIBC_SUSV3_LEGACY__ |
| 196 | ENTRY(bzero) |
| 197 | #ifndef USE_BPR |
| 198 | srl %o1, 0, %o1 |
| 199 | #endif |
| 200 | mov %o0, %o5 |
| 201 | #endif |
| 202 | 50: cmp %o1, 7 |
| 203 | bleu,pn %xcc, 17f |
| 204 | andcc %o0, 3, %o2 |
| 205 | be,a,pt %xcc, 4f |
| 206 | andcc %o0, 4, %g0 |
| 207 | cmp %o2, 3 |
| 208 | be,pn %xcc, 2f |
| 209 | stb %g0, [%o0 + 0x00] |
| 210 | cmp %o2, 2 |
| 211 | be,pt %xcc, 2f |
| 212 | stb %g0, [%o0 + 0x01] |
| 213 | stb %g0, [%o0 + 0x02] |
| 214 | 2: sub %o2, 4, %o2 |
| 215 | sub %o0, %o2, %o0 |
| 216 | add %o1, %o2, %o1 |
| 217 | andcc %o0, 4, %g0 |
| 218 | 4: be,pt %xcc, 2f |
| 219 | cmp %o1, 128 |
| 220 | stw %g0, [%o0] |
| 221 | sub %o1, 4, %o1 |
| 222 | add %o0, 4, %o0 |
| 223 | 2: blu,pn %xcc, 9f |
| 224 | andcc %o0, 0x38, %o2 |
| 225 | be,pn %icc, 6f |
| 226 | mov 64, %o4 |
| 227 | andcc %o0, 8, %g0 |
| 228 | be,pn %icc, 1f |
| 229 | sub %o4, %o2, %o4 |
| 230 | stx %g0, [%o0] |
| 231 | add %o0, 8, %o0 |
| 232 | 1: andcc %o4, 16, %g0 |
| 233 | be,pn %icc, 1f |
| 234 | sub %o1, %o4, %o1 |
| 235 | stx %g0, [%o0] |
| 236 | stx %g0, [%o0 + 8] |
| 237 | add %o0, 16, %o0 |
| 238 | 1: andcc %o4, 32, %g0 |
| 239 | be,pn %icc, 7f |
| 240 | andncc %o1, 0x3f, %o3 |
| 241 | stx %g0, [%o0] |
| 242 | stx %g0, [%o0 + 8] |
| 243 | stx %g0, [%o0 + 16] |
| 244 | stx %g0, [%o0 + 24] |
| 245 | add %o0, 32, %o0 |
| 246 | 6: andncc %o1, 0x3f, %o3 |
| 247 | 7: be,pn %xcc, 9f |
| 248 | wr %g0, ASI_BLK_P, %asi |
| 249 | membar #StoreLoad | #StoreStore | #LoadStore |
| 250 | fzero %f0 |
| 251 | andcc %o3, 0xc0, %o2 |
| 252 | and %o1, 0x3f, %o1 |
| 253 | fzero %f2 |
| 254 | andn %o3, 0xff, %o3 |
| 255 | faddd %f0, %f2, %f4 |
| 256 | fmuld %f0, %f2, %f6 |
| 257 | cmp %o2, 64 |
| 258 | faddd %f0, %f2, %f8 |
| 259 | fmuld %f0, %f2, %f10 |
| 260 | faddd %f0, %f2, %f12 |
| 261 | brz,pn %o2, 10f |
| 262 | fmuld %f0, %f2, %f14 |
| 263 | be,pn %icc, 2f |
| 264 | stda %f0, [%o0 + 0x00] %asi |
| 265 | cmp %o2, 128 |
| 266 | be,pn %icc, 2f |
| 267 | stda %f0, [%o0 + 0x40] %asi |
| 268 | stda %f0, [%o0 + 0x80] %asi |
| 269 | 2: brz,pn %o3, 12f |
| 270 | add %o0, %o2, %o0 |
| 271 | 10: stda %f0, [%o0 + 0x00] %asi |
| 272 | stda %f0, [%o0 + 0x40] %asi |
| 273 | stda %f0, [%o0 + 0x80] %asi |
| 274 | stda %f0, [%o0 + 0xc0] %asi |
| 275 | 11: subcc %o3, 256, %o3 |
| 276 | bne,pt %xcc, 10b |
| 277 | add %o0, 256, %o0 |
| 278 | 12: wr %g0, FPRS_FEF, %fprs |
| 279 | membar #StoreLoad | #StoreStore |
| 280 | 9: andcc %o1, 0xf8, %o2 |
| 281 | be,pn %xcc, 13f |
| 282 | andcc %o1, 7, %o1 |
| 283 | 14: rd %pc, %o4 |
| 284 | srl %o2, 1, %o3 |
| 285 | sub %o4, %o3, %o4 |
| 286 | jmpl %o4 + (13f - 14b), %g0 |
| 287 | add %o0, %o2, %o0 |
| 288 | 12: ZERO_BLOCKS (%o0, 0xc8, %g0) |
| 289 | ZERO_BLOCKS (%o0, 0x88, %g0) |
| 290 | ZERO_BLOCKS (%o0, 0x48, %g0) |
| 291 | ZERO_BLOCKS (%o0, 0x08, %g0) |
| 292 | 13: be,pn %xcc, 8f |
| 293 | andcc %o1, 4, %g0 |
| 294 | be,pn %xcc, 1f |
| 295 | andcc %o1, 2, %g0 |
| 296 | stw %g0, [%o0] |
| 297 | add %o0, 4, %o0 |
| 298 | 1: be,pn %xcc, 1f |
| 299 | andcc %o1, 1, %g0 |
| 300 | sth %g0, [%o0] |
| 301 | add %o0, 2, %o0 |
| 302 | 1: bne,a,pn %xcc, 8f |
| 303 | stb %g0, [%o0] |
| 304 | 8: retl |
| 305 | mov %o5, %o0 |
| 306 | 17: be,pn %xcc, 13b |
| 307 | orcc %o1, 0, %g0 |
| 308 | be,pn %xcc, 0f |
| 309 | 8: add %o0, 1, %o0 |
| 310 | subcc %o1, 1, %o1 |
| 311 | bne,pt %xcc, 8b |
| 312 | stb %g0, [%o0 - 1] |
| 313 | 0: retl |
| 314 | mov %o5, %o0 |
| 315 | #ifdef __UCLIBC_SUSV3_LEGACY__ |
| 316 | END(bzero) |
| 317 | #endif |