xf.li | bfc6e71 | 2025-02-07 01:54:34 -0800 | [diff] [blame^] | 1 | /* Set a block of memory to some byte value. |
| 2 | For UltraSPARC. |
| 3 | Copyright (C) 1996-2016 Free Software Foundation, Inc. |
| 4 | This file is part of the GNU C Library. |
| 5 | Contributed by David S. Miller (davem@caip.rutgers.edu) and |
| 6 | Jakub Jelinek (jj@ultra.linux.cz). |
| 7 | |
| 8 | The GNU C Library is free software; you can redistribute it and/or |
| 9 | modify it under the terms of the GNU Lesser General Public |
| 10 | License as published by the Free Software Foundation; either |
| 11 | version 2.1 of the License, or (at your option) any later version. |
| 12 | |
| 13 | The GNU C Library is distributed in the hope that it will be useful, |
| 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 16 | Lesser General Public License for more details. |
| 17 | |
| 18 | You should have received a copy of the GNU Lesser General Public |
| 19 | License along with the GNU C Library; if not, see |
| 20 | <http://www.gnu.org/licenses/>. */ |
| 21 | |
| 22 | #include <sysdep.h> |
| 23 | #include <asm/asi.h> |
| 24 | #ifndef XCC |
| 25 | #define XCC xcc |
| 26 | #define USE_BPR |
| 27 | #endif |
| 28 | #define FPRS_FEF 4 |
| 29 | |
| 30 | #define SET_BLOCKS(base, offset, source) \ |
| 31 | stx source, [base - offset - 0x18]; \ |
| 32 | stx source, [base - offset - 0x10]; \ |
| 33 | stx source, [base - offset - 0x08]; \ |
| 34 | stx source, [base - offset - 0x00]; |
| 35 | |
| 36 | /* Well, memset is a lot easier to get right than bcopy... */ |
| 37 | .text |
| 38 | .align 32 |
| 39 | ENTRY(memset) |
| 40 | andcc %o1, 0xff, %o1 |
| 41 | mov %o0, %o5 |
| 42 | be,a,pt %icc, 50f |
| 43 | #ifndef USE_BPR |
| 44 | srl %o2, 0, %o1 |
| 45 | #else |
| 46 | mov %o2, %o1 |
| 47 | #endif |
| 48 | cmp %o2, 7 |
| 49 | #ifndef USE_BPR |
| 50 | srl %o2, 0, %o2 |
| 51 | #endif |
| 52 | bleu,pn %XCC, 17f |
| 53 | andcc %o0, 3, %g5 |
| 54 | be,pt %xcc, 4f |
| 55 | and %o1, 0xff, %o1 |
| 56 | cmp %g5, 3 |
| 57 | be,pn %xcc, 2f |
| 58 | stb %o1, [%o0 + 0x00] |
| 59 | cmp %g5, 2 |
| 60 | be,pt %xcc, 2f |
| 61 | stb %o1, [%o0 + 0x01] |
| 62 | stb %o1, [%o0 + 0x02] |
| 63 | 2: sub %g5, 4, %g5 |
| 64 | sub %o0, %g5, %o0 |
| 65 | add %o2, %g5, %o2 |
| 66 | 4: sllx %o1, 8, %g1 |
| 67 | andcc %o0, 4, %g0 |
| 68 | or %o1, %g1, %o1 |
| 69 | sllx %o1, 16, %g1 |
| 70 | or %o1, %g1, %o1 |
| 71 | be,pt %xcc, 2f |
| 72 | sllx %o1, 32, %g1 |
| 73 | stw %o1, [%o0] |
| 74 | sub %o2, 4, %o2 |
| 75 | add %o0, 4, %o0 |
| 76 | 2: cmp %o2, 128 |
| 77 | or %o1, %g1, %o1 |
| 78 | blu,pn %xcc, 9f |
| 79 | andcc %o0, 0x38, %g5 |
| 80 | be,pn %icc, 6f |
| 81 | mov 64, %o4 |
| 82 | andcc %o0, 8, %g0 |
| 83 | be,pn %icc, 1f |
| 84 | sub %o4, %g5, %o4 |
| 85 | stx %o1, [%o0] |
| 86 | add %o0, 8, %o0 |
| 87 | 1: andcc %o4, 16, %g0 |
| 88 | be,pn %icc, 1f |
| 89 | sub %o2, %o4, %o2 |
| 90 | stx %o1, [%o0] |
| 91 | stx %o1, [%o0 + 8] |
| 92 | add %o0, 16, %o0 |
| 93 | 1: andcc %o4, 32, %g0 |
| 94 | be,pn %icc, 7f |
| 95 | andncc %o2, 0x3f, %o3 |
| 96 | stw %o1, [%o0] |
| 97 | stw %o1, [%o0 + 4] |
| 98 | stw %o1, [%o0 + 8] |
| 99 | stw %o1, [%o0 + 12] |
| 100 | stw %o1, [%o0 + 16] |
| 101 | stw %o1, [%o0 + 20] |
| 102 | stw %o1, [%o0 + 24] |
| 103 | stw %o1, [%o0 + 28] |
| 104 | add %o0, 32, %o0 |
| 105 | 7: be,pn %xcc, 9f |
| 106 | nop |
| 107 | ldd [%o0 - 8], %f0 |
| 108 | 18: wr %g0, ASI_BLK_P, %asi |
| 109 | membar #StoreStore | #LoadStore |
| 110 | andcc %o3, 0xc0, %g5 |
| 111 | and %o2, 0x3f, %o2 |
| 112 | fsrc2 %f0, %f2 |
| 113 | fsrc2 %f0, %f4 |
| 114 | andn %o3, 0xff, %o3 |
| 115 | fsrc2 %f0, %f6 |
| 116 | cmp %g5, 64 |
| 117 | fsrc2 %f0, %f8 |
| 118 | fsrc2 %f0, %f10 |
| 119 | fsrc2 %f0, %f12 |
| 120 | brz,pn %g5, 10f |
| 121 | fsrc2 %f0, %f14 |
| 122 | be,pn %icc, 2f |
| 123 | stda %f0, [%o0 + 0x00] %asi |
| 124 | cmp %g5, 128 |
| 125 | be,pn %icc, 2f |
| 126 | stda %f0, [%o0 + 0x40] %asi |
| 127 | stda %f0, [%o0 + 0x80] %asi |
| 128 | 2: brz,pn %o3, 12f |
| 129 | add %o0, %g5, %o0 |
| 130 | 10: stda %f0, [%o0 + 0x00] %asi |
| 131 | stda %f0, [%o0 + 0x40] %asi |
| 132 | stda %f0, [%o0 + 0x80] %asi |
| 133 | stda %f0, [%o0 + 0xc0] %asi |
| 134 | 11: subcc %o3, 256, %o3 |
| 135 | bne,pt %xcc, 10b |
| 136 | add %o0, 256, %o0 |
| 137 | 12: wr %g0, FPRS_FEF, %fprs |
| 138 | membar #StoreLoad | #StoreStore |
| 139 | 9: andcc %o2, 0x78, %g5 |
| 140 | be,pn %xcc, 13f |
| 141 | andcc %o2, 7, %o2 |
| 142 | 14: rd %pc, %o4 |
| 143 | srl %g5, 1, %o3 |
| 144 | sub %o4, %o3, %o4 |
| 145 | jmpl %o4 + (13f - 14b), %g0 |
| 146 | add %o0, %g5, %o0 |
| 147 | 12: SET_BLOCKS (%o0, 0x68, %o1) |
| 148 | SET_BLOCKS (%o0, 0x48, %o1) |
| 149 | SET_BLOCKS (%o0, 0x28, %o1) |
| 150 | SET_BLOCKS (%o0, 0x08, %o1) |
| 151 | 13: be,pn %xcc, 8f |
| 152 | andcc %o2, 4, %g0 |
| 153 | be,pn %xcc, 1f |
| 154 | andcc %o2, 2, %g0 |
| 155 | stw %o1, [%o0] |
| 156 | add %o0, 4, %o0 |
| 157 | 1: be,pn %xcc, 1f |
| 158 | andcc %o2, 1, %g0 |
| 159 | sth %o1, [%o0] |
| 160 | add %o0, 2, %o0 |
| 161 | 1: bne,a,pn %xcc, 8f |
| 162 | stb %o1, [%o0] |
| 163 | 8: retl |
| 164 | mov %o5, %o0 |
| 165 | 17: brz,pn %o2, 0f |
| 166 | 8: add %o0, 1, %o0 |
| 167 | subcc %o2, 1, %o2 |
| 168 | bne,pt %xcc, 8b |
| 169 | stb %o1, [%o0 - 1] |
| 170 | 0: retl |
| 171 | mov %o5, %o0 |
| 172 | |
| 173 | 6: stx %o1, [%o0] |
| 174 | andncc %o2, 0x3f, %o3 |
| 175 | be,pn %xcc, 9b |
| 176 | nop |
| 177 | ba,pt %xcc, 18b |
| 178 | ldd [%o0], %f0 |
| 179 | END(memset) |
| 180 | libc_hidden_builtin_def (memset) |
| 181 | |
| 182 | #define ZERO_BLOCKS(base, offset, source) \ |
| 183 | stx source, [base - offset - 0x38]; \ |
| 184 | stx source, [base - offset - 0x30]; \ |
| 185 | stx source, [base - offset - 0x28]; \ |
| 186 | stx source, [base - offset - 0x20]; \ |
| 187 | stx source, [base - offset - 0x18]; \ |
| 188 | stx source, [base - offset - 0x10]; \ |
| 189 | stx source, [base - offset - 0x08]; \ |
| 190 | stx source, [base - offset - 0x00]; |
| 191 | |
| 192 | .text |
| 193 | .align 32 |
| 194 | ENTRY(__bzero) |
| 195 | #ifndef USE_BPR |
| 196 | srl %o1, 0, %o1 |
| 197 | #endif |
| 198 | mov %o0, %o5 |
| 199 | 50: cmp %o1, 7 |
| 200 | bleu,pn %xcc, 17f |
| 201 | andcc %o0, 3, %o2 |
| 202 | be,a,pt %xcc, 4f |
| 203 | andcc %o0, 4, %g0 |
| 204 | cmp %o2, 3 |
| 205 | be,pn %xcc, 2f |
| 206 | stb %g0, [%o0 + 0x00] |
| 207 | cmp %o2, 2 |
| 208 | be,pt %xcc, 2f |
| 209 | stb %g0, [%o0 + 0x01] |
| 210 | stb %g0, [%o0 + 0x02] |
| 211 | 2: sub %o2, 4, %o2 |
| 212 | sub %o0, %o2, %o0 |
| 213 | add %o1, %o2, %o1 |
| 214 | andcc %o0, 4, %g0 |
| 215 | 4: be,pt %xcc, 2f |
| 216 | cmp %o1, 128 |
| 217 | stw %g0, [%o0] |
| 218 | sub %o1, 4, %o1 |
| 219 | add %o0, 4, %o0 |
| 220 | 2: blu,pn %xcc, 9f |
| 221 | andcc %o0, 0x38, %o2 |
| 222 | be,pn %icc, 6f |
| 223 | mov 64, %o4 |
| 224 | andcc %o0, 8, %g0 |
| 225 | be,pn %icc, 1f |
| 226 | sub %o4, %o2, %o4 |
| 227 | stx %g0, [%o0] |
| 228 | add %o0, 8, %o0 |
| 229 | 1: andcc %o4, 16, %g0 |
| 230 | be,pn %icc, 1f |
| 231 | sub %o1, %o4, %o1 |
| 232 | stx %g0, [%o0] |
| 233 | stx %g0, [%o0 + 8] |
| 234 | add %o0, 16, %o0 |
| 235 | 1: andcc %o4, 32, %g0 |
| 236 | be,pn %icc, 7f |
| 237 | andncc %o1, 0x3f, %o3 |
| 238 | stx %g0, [%o0] |
| 239 | stx %g0, [%o0 + 8] |
| 240 | stx %g0, [%o0 + 16] |
| 241 | stx %g0, [%o0 + 24] |
| 242 | add %o0, 32, %o0 |
| 243 | 6: andncc %o1, 0x3f, %o3 |
| 244 | 7: be,pn %xcc, 9f |
| 245 | wr %g0, ASI_BLK_P, %asi |
| 246 | membar #StoreLoad | #StoreStore | #LoadStore |
| 247 | fzero %f0 |
| 248 | andcc %o3, 0xc0, %o2 |
| 249 | and %o1, 0x3f, %o1 |
| 250 | fzero %f2 |
| 251 | andn %o3, 0xff, %o3 |
| 252 | faddd %f0, %f2, %f4 |
| 253 | fmuld %f0, %f2, %f6 |
| 254 | cmp %o2, 64 |
| 255 | faddd %f0, %f2, %f8 |
| 256 | fmuld %f0, %f2, %f10 |
| 257 | faddd %f0, %f2, %f12 |
| 258 | brz,pn %o2, 10f |
| 259 | fmuld %f0, %f2, %f14 |
| 260 | be,pn %icc, 2f |
| 261 | stda %f0, [%o0 + 0x00] %asi |
| 262 | cmp %o2, 128 |
| 263 | be,pn %icc, 2f |
| 264 | stda %f0, [%o0 + 0x40] %asi |
| 265 | stda %f0, [%o0 + 0x80] %asi |
| 266 | 2: brz,pn %o3, 12f |
| 267 | add %o0, %o2, %o0 |
| 268 | 10: stda %f0, [%o0 + 0x00] %asi |
| 269 | stda %f0, [%o0 + 0x40] %asi |
| 270 | stda %f0, [%o0 + 0x80] %asi |
| 271 | stda %f0, [%o0 + 0xc0] %asi |
| 272 | 11: subcc %o3, 256, %o3 |
| 273 | bne,pt %xcc, 10b |
| 274 | add %o0, 256, %o0 |
| 275 | 12: wr %g0, FPRS_FEF, %fprs |
| 276 | membar #StoreLoad | #StoreStore |
| 277 | 9: andcc %o1, 0xf8, %o2 |
| 278 | be,pn %xcc, 13f |
| 279 | andcc %o1, 7, %o1 |
| 280 | 14: rd %pc, %o4 |
| 281 | srl %o2, 1, %o3 |
| 282 | sub %o4, %o3, %o4 |
| 283 | jmpl %o4 + (13f - 14b), %g0 |
| 284 | add %o0, %o2, %o0 |
| 285 | 12: ZERO_BLOCKS (%o0, 0xc8, %g0) |
| 286 | ZERO_BLOCKS (%o0, 0x88, %g0) |
| 287 | ZERO_BLOCKS (%o0, 0x48, %g0) |
| 288 | ZERO_BLOCKS (%o0, 0x08, %g0) |
| 289 | 13: be,pn %xcc, 8f |
| 290 | andcc %o1, 4, %g0 |
| 291 | be,pn %xcc, 1f |
| 292 | andcc %o1, 2, %g0 |
| 293 | stw %g0, [%o0] |
| 294 | add %o0, 4, %o0 |
| 295 | 1: be,pn %xcc, 1f |
| 296 | andcc %o1, 1, %g0 |
| 297 | sth %g0, [%o0] |
| 298 | add %o0, 2, %o0 |
| 299 | 1: bne,a,pn %xcc, 8f |
| 300 | stb %g0, [%o0] |
| 301 | 8: retl |
| 302 | mov %o5, %o0 |
| 303 | 17: be,pn %xcc, 13b |
| 304 | orcc %o1, 0, %g0 |
| 305 | be,pn %xcc, 0f |
| 306 | 8: add %o0, 1, %o0 |
| 307 | subcc %o1, 1, %o1 |
| 308 | bne,pt %xcc, 8b |
| 309 | stb %g0, [%o0 - 1] |
| 310 | 0: retl |
| 311 | mov %o5, %o0 |
| 312 | END(__bzero) |
| 313 | |
| 314 | weak_alias (__bzero, bzero) |