yuezonghe | 824eb0c | 2024-06-27 02:32:26 -0700 | [diff] [blame] | 1 | /* Copy SIZE bytes from SRC to DEST. |
| 2 | For UltraSPARC. |
| 3 | Copyright (C) 1996, 97, 98, 99, 2003 Free Software Foundation, Inc. |
| 4 | This file is part of the GNU C Library. |
| 5 | Contributed by David S. Miller (davem@caip.rutgers.edu) and |
| 6 | Jakub Jelinek (jakub@redhat.com). |
| 7 | |
| 8 | The GNU C Library is free software; you can redistribute it and/or |
| 9 | modify it under the terms of the GNU Lesser General Public |
| 10 | License as published by the Free Software Foundation; either |
| 11 | version 2.1 of the License, or (at your option) any later version. |
| 12 | |
| 13 | The GNU C Library is distributed in the hope that it will be useful, |
| 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 16 | Lesser General Public License for more details. |
| 17 | |
| 18 | You should have received a copy of the GNU Lesser General Public |
| 19 | License along with the GNU C Library; if not, write to the Free |
| 20 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 21 | 02111-1307 USA. */ |
| 22 | |
| 23 | #include <features.h> |
| 24 | #include <asm/asi.h> |
| 25 | #ifndef XCC |
| 26 | #define USE_BPR |
| 27 | .register %g2, #scratch |
| 28 | .register %g3, #scratch |
| 29 | .register %g6, #scratch |
| 30 | #define XCC xcc |
| 31 | #endif |
| 32 | #define FPRS_FEF 4 |
| 33 | |
| 34 | #define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9) \ |
| 35 | faligndata %f1, %f2, %f48; \ |
| 36 | faligndata %f2, %f3, %f50; \ |
| 37 | faligndata %f3, %f4, %f52; \ |
| 38 | faligndata %f4, %f5, %f54; \ |
| 39 | faligndata %f5, %f6, %f56; \ |
| 40 | faligndata %f6, %f7, %f58; \ |
| 41 | faligndata %f7, %f8, %f60; \ |
| 42 | faligndata %f8, %f9, %f62; |
| 43 | |
| 44 | #define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ |
| 45 | ldda [%src] %asi, %fdest; \ |
| 46 | add %src, 0x40, %src; \ |
| 47 | add %dest, 0x40, %dest; \ |
| 48 | subcc %len, 0x40, %len; \ |
| 49 | be,pn %xcc, jmptgt; \ |
| 50 | stda %fsrc, [%dest - 0x40] %asi; |
| 51 | |
| 52 | #define LOOP_CHUNK1(src, dest, len, branch_dest) \ |
| 53 | MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest) |
| 54 | #define LOOP_CHUNK2(src, dest, len, branch_dest) \ |
| 55 | MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest) |
| 56 | #define LOOP_CHUNK3(src, dest, len, branch_dest) \ |
| 57 | MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) |
| 58 | |
| 59 | #define STORE_SYNC(dest, fsrc) \ |
| 60 | stda %fsrc, [%dest] %asi; \ |
| 61 | add %dest, 0x40, %dest; |
| 62 | |
| 63 | #define STORE_JUMP(dest, fsrc, target) \ |
| 64 | stda %fsrc, [%dest] %asi; \ |
| 65 | add %dest, 0x40, %dest; \ |
| 66 | ba,pt %xcc, target; |
| 67 | |
| 68 | #define VISLOOP_PAD nop; nop; nop; nop; \ |
| 69 | nop; nop; nop; nop; \ |
| 70 | nop; nop; nop; nop; \ |
| 71 | nop; nop; nop; |
| 72 | |
| 73 | #define FINISH_VISCHUNK(dest, f0, f1, left) \ |
| 74 | subcc %left, 8, %left; \ |
| 75 | bl,pn %xcc, 205f; \ |
| 76 | faligndata %f0, %f1, %f48; \ |
| 77 | std %f48, [%dest]; \ |
| 78 | add %dest, 8, %dest; |
| 79 | |
| 80 | #define UNEVEN_VISCHUNK(dest, f0, f1, left) \ |
| 81 | subcc %left, 8, %left; \ |
| 82 | bl,pn %xcc, 205f; \ |
| 83 | fsrc1 %f0, %f1; \ |
| 84 | ba,a,pt %xcc, 204f; |
| 85 | |
| 86 | /* Macros for non-VIS memcpy code. */ |
| 87 | #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ |
| 88 | ldx [%src + offset + 0x00], %t0; \ |
| 89 | ldx [%src + offset + 0x08], %t1; \ |
| 90 | ldx [%src + offset + 0x10], %t2; \ |
| 91 | ldx [%src + offset + 0x18], %t3; \ |
| 92 | stw %t0, [%dst + offset + 0x04]; \ |
| 93 | srlx %t0, 32, %t0; \ |
| 94 | stw %t0, [%dst + offset + 0x00]; \ |
| 95 | stw %t1, [%dst + offset + 0x0c]; \ |
| 96 | srlx %t1, 32, %t1; \ |
| 97 | stw %t1, [%dst + offset + 0x08]; \ |
| 98 | stw %t2, [%dst + offset + 0x14]; \ |
| 99 | srlx %t2, 32, %t2; \ |
| 100 | stw %t2, [%dst + offset + 0x10]; \ |
| 101 | stw %t3, [%dst + offset + 0x1c]; \ |
| 102 | srlx %t3, 32, %t3; \ |
| 103 | stw %t3, [%dst + offset + 0x18]; |
| 104 | |
| 105 | #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ |
| 106 | ldx [%src + offset + 0x00], %t0; \ |
| 107 | ldx [%src + offset + 0x08], %t1; \ |
| 108 | ldx [%src + offset + 0x10], %t2; \ |
| 109 | ldx [%src + offset + 0x18], %t3; \ |
| 110 | stx %t0, [%dst + offset + 0x00]; \ |
| 111 | stx %t1, [%dst + offset + 0x08]; \ |
| 112 | stx %t2, [%dst + offset + 0x10]; \ |
| 113 | stx %t3, [%dst + offset + 0x18]; \ |
| 114 | ldx [%src + offset + 0x20], %t0; \ |
| 115 | ldx [%src + offset + 0x28], %t1; \ |
| 116 | ldx [%src + offset + 0x30], %t2; \ |
| 117 | ldx [%src + offset + 0x38], %t3; \ |
| 118 | stx %t0, [%dst + offset + 0x20]; \ |
| 119 | stx %t1, [%dst + offset + 0x28]; \ |
| 120 | stx %t2, [%dst + offset + 0x30]; \ |
| 121 | stx %t3, [%dst + offset + 0x38]; |
| 122 | |
| 123 | #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ |
| 124 | ldx [%src - offset - 0x10], %t0; \ |
| 125 | ldx [%src - offset - 0x08], %t1; \ |
| 126 | stw %t0, [%dst - offset - 0x0c]; \ |
| 127 | srlx %t0, 32, %t2; \ |
| 128 | stw %t2, [%dst - offset - 0x10]; \ |
| 129 | stw %t1, [%dst - offset - 0x04]; \ |
| 130 | srlx %t1, 32, %t3; \ |
| 131 | stw %t3, [%dst - offset - 0x08]; |
| 132 | |
| 133 | #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ |
| 134 | ldx [%src - offset - 0x10], %t0; \ |
| 135 | ldx [%src - offset - 0x08], %t1; \ |
| 136 | stx %t0, [%dst - offset - 0x10]; \ |
| 137 | stx %t1, [%dst - offset - 0x08]; |
| 138 | |
| 139 | /* Macros for non-VIS memmove code. */ |
| 140 | #define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ |
| 141 | ldx [%src - offset - 0x20], %t0; \ |
| 142 | ldx [%src - offset - 0x18], %t1; \ |
| 143 | ldx [%src - offset - 0x10], %t2; \ |
| 144 | ldx [%src - offset - 0x08], %t3; \ |
| 145 | stw %t0, [%dst - offset - 0x1c]; \ |
| 146 | srlx %t0, 32, %t0; \ |
| 147 | stw %t0, [%dst - offset - 0x20]; \ |
| 148 | stw %t1, [%dst - offset - 0x14]; \ |
| 149 | srlx %t1, 32, %t1; \ |
| 150 | stw %t1, [%dst - offset - 0x18]; \ |
| 151 | stw %t2, [%dst - offset - 0x0c]; \ |
| 152 | srlx %t2, 32, %t2; \ |
| 153 | stw %t2, [%dst - offset - 0x10]; \ |
| 154 | stw %t3, [%dst - offset - 0x04]; \ |
| 155 | srlx %t3, 32, %t3; \ |
| 156 | stw %t3, [%dst - offset - 0x08]; |
| 157 | |
| 158 | #define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ |
| 159 | ldx [%src - offset - 0x20], %t0; \ |
| 160 | ldx [%src - offset - 0x18], %t1; \ |
| 161 | ldx [%src - offset - 0x10], %t2; \ |
| 162 | ldx [%src - offset - 0x08], %t3; \ |
| 163 | stx %t0, [%dst - offset - 0x20]; \ |
| 164 | stx %t1, [%dst - offset - 0x18]; \ |
| 165 | stx %t2, [%dst - offset - 0x10]; \ |
| 166 | stx %t3, [%dst - offset - 0x08]; \ |
| 167 | ldx [%src - offset - 0x40], %t0; \ |
| 168 | ldx [%src - offset - 0x38], %t1; \ |
| 169 | ldx [%src - offset - 0x30], %t2; \ |
| 170 | ldx [%src - offset - 0x28], %t3; \ |
| 171 | stx %t0, [%dst - offset - 0x40]; \ |
| 172 | stx %t1, [%dst - offset - 0x38]; \ |
| 173 | stx %t2, [%dst - offset - 0x30]; \ |
| 174 | stx %t3, [%dst - offset - 0x28]; |
| 175 | |
| 176 | #define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ |
| 177 | ldx [%src + offset + 0x00], %t0; \ |
| 178 | ldx [%src + offset + 0x08], %t1; \ |
| 179 | stw %t0, [%dst + offset + 0x04]; \ |
| 180 | srlx %t0, 32, %t2; \ |
| 181 | stw %t2, [%dst + offset + 0x00]; \ |
| 182 | stw %t1, [%dst + offset + 0x0c]; \ |
| 183 | srlx %t1, 32, %t3; \ |
| 184 | stw %t3, [%dst + offset + 0x08]; |
| 185 | |
| 186 | #define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ |
| 187 | ldx [%src + offset + 0x00], %t0; \ |
| 188 | ldx [%src + offset + 0x08], %t1; \ |
| 189 | stx %t0, [%dst + offset + 0x00]; \ |
| 190 | stx %t1, [%dst + offset + 0x08]; |
| 191 | |
| 192 | .text |
| 193 | .align 32 |
| 194 | |
| 195 | #ifdef __UCLIBC_SUSV3_LEGACY__ |
| 196 | ENTRY(bcopy) |
| 197 | sub %o1, %o0, %o4 /* IEU0 Group */ |
| 198 | mov %o0, %g3 /* IEU1 */ |
| 199 | cmp %o4, %o2 /* IEU1 Group */ |
| 200 | mov %o1, %o0 /* IEU0 */ |
| 201 | bgeu,pt %XCC, 210f /* CTI */ |
| 202 | mov %g3, %o1 /* IEU0 Group */ |
| 203 | #ifndef USE_BPR |
| 204 | srl %o2, 0, %o2 /* IEU1 */ |
| 205 | #endif |
| 206 | brnz,pn %o2, 220f /* CTI Group */ |
| 207 | add %o0, %o2, %o0 /* IEU0 */ |
| 208 | retl |
| 209 | nop |
| 210 | END(bcopy) |
| 211 | #endif |
| 212 | |
| 213 | .align 32 |
| 214 | 200: be,pt %xcc, 201f /* CTI */ |
| 215 | andcc %o0, 0x38, %g5 /* IEU1 Group */ |
| 216 | mov 8, %g1 /* IEU0 */ |
| 217 | sub %g1, %g2, %g2 /* IEU0 Group */ |
| 218 | andcc %o0, 1, %g0 /* IEU1 */ |
| 219 | be,pt %icc, 2f /* CTI */ |
| 220 | sub %o2, %g2, %o2 /* IEU0 Group */ |
| 221 | 1: ldub [%o1], %o5 /* Load Group */ |
| 222 | add %o1, 1, %o1 /* IEU0 */ |
| 223 | add %o0, 1, %o0 /* IEU1 */ |
| 224 | subcc %g2, 1, %g2 /* IEU1 Group */ |
| 225 | be,pn %xcc, 3f /* CTI */ |
| 226 | stb %o5, [%o0 - 1] /* Store */ |
| 227 | 2: ldub [%o1], %o5 /* Load Group */ |
| 228 | add %o0, 2, %o0 /* IEU0 */ |
| 229 | ldub [%o1 + 1], %g3 /* Load Group */ |
| 230 | subcc %g2, 2, %g2 /* IEU1 Group */ |
| 231 | stb %o5, [%o0 - 2] /* Store */ |
| 232 | add %o1, 2, %o1 /* IEU0 */ |
| 233 | bne,pt %xcc, 2b /* CTI Group */ |
| 234 | stb %g3, [%o0 - 1] /* Store */ |
| 235 | 3: andcc %o0, 0x38, %g5 /* IEU1 Group */ |
| 236 | 201: be,pt %icc, 202f /* CTI */ |
| 237 | mov 64, %g1 /* IEU0 */ |
| 238 | fmovd %f0, %f2 /* FPU */ |
| 239 | sub %g1, %g5, %g5 /* IEU0 Group */ |
| 240 | alignaddr %o1, %g0, %g1 /* GRU Group */ |
| 241 | ldd [%g1], %f4 /* Load Group */ |
| 242 | sub %o2, %g5, %o2 /* IEU0 */ |
| 243 | 1: ldd [%g1 + 0x8], %f6 /* Load Group */ |
| 244 | add %g1, 0x8, %g1 /* IEU0 Group */ |
| 245 | subcc %g5, 8, %g5 /* IEU1 */ |
| 246 | faligndata %f4, %f6, %f0 /* GRU Group */ |
| 247 | std %f0, [%o0] /* Store */ |
| 248 | add %o1, 8, %o1 /* IEU0 Group */ |
| 249 | be,pn %xcc, 202f /* CTI */ |
| 250 | add %o0, 8, %o0 /* IEU1 */ |
| 251 | ldd [%g1 + 0x8], %f4 /* Load Group */ |
| 252 | add %g1, 8, %g1 /* IEU0 */ |
| 253 | subcc %g5, 8, %g5 /* IEU1 */ |
| 254 | faligndata %f6, %f4, %f0 /* GRU Group */ |
| 255 | std %f0, [%o0] /* Store */ |
| 256 | add %o1, 8, %o1 /* IEU0 */ |
| 257 | bne,pt %xcc, 1b /* CTI Group */ |
| 258 | add %o0, 8, %o0 /* IEU0 */ |
| 259 | 202: membar #LoadStore | #StoreStore | #StoreLoad /* LSU Group */ |
| 260 | wr %g0, ASI_BLK_P, %asi /* LSU Group */ |
| 261 | subcc %o2, 0x40, %g6 /* IEU1 Group */ |
| 262 | mov %o1, %g1 /* IEU0 */ |
| 263 | andncc %g6, (0x40 - 1), %g6 /* IEU1 Group */ |
| 264 | srl %g1, 3, %g2 /* IEU0 */ |
| 265 | sub %o2, %g6, %g3 /* IEU0 Group */ |
| 266 | andn %o1, (0x40 - 1), %o1 /* IEU1 */ |
| 267 | and %g2, 7, %g2 /* IEU0 Group */ |
| 268 | andncc %g3, 0x7, %g3 /* IEU1 */ |
| 269 | fmovd %f0, %f2 /* FPU */ |
| 270 | sub %g3, 0x10, %g3 /* IEU0 Group */ |
| 271 | sub %o2, %g6, %o2 /* IEU1 */ |
| 272 | alignaddr %g1, %g0, %g0 /* GRU Group */ |
| 273 | add %g1, %g6, %g1 /* IEU0 Group */ |
| 274 | subcc %o2, %g3, %o2 /* IEU1 */ |
| 275 | ldda [%o1 + 0x00] %asi, %f0 /* LSU Group */ |
| 276 | add %g1, %g3, %g1 /* IEU0 */ |
| 277 | ldda [%o1 + 0x40] %asi, %f16 /* LSU Group */ |
| 278 | sub %g6, 0x80, %g6 /* IEU0 */ |
| 279 | ldda [%o1 + 0x80] %asi, %f32 /* LSU Group */ |
| 280 | /* Clk1 Group 8-( */ |
| 281 | /* Clk2 Group 8-( */ |
| 282 | /* Clk3 Group 8-( */ |
| 283 | /* Clk4 Group 8-( */ |
| 284 | 203: rd %pc, %g5 /* PDU Group 8-( */ |
| 285 | addcc %g5, %lo(300f - 203b), %g5 /* IEU1 Group */ |
| 286 | sll %g2, 9, %g2 /* IEU0 */ |
| 287 | jmpl %g5 + %g2, %g0 /* CTI Group brk forced*/ |
| 288 | addcc %o1, 0xc0, %o1 /* IEU1 Group */ |
| 289 | |
| 290 | .align 512 /* OK, here comes the fun part... */ |
| 291 | 300: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) LOOP_CHUNK1(o1, o0, g6, 301f) |
| 292 | FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) LOOP_CHUNK2(o1, o0, g6, 302f) |
| 293 | FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) LOOP_CHUNK3(o1, o0, g6, 303f) |
| 294 | b,pt %xcc, 300b+4; faligndata %f0, %f2, %f48 |
| 295 | 301: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) STORE_SYNC(o0, f48) membar #Sync |
| 296 | FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) STORE_JUMP(o0, f48, 400f) membar #Sync |
| 297 | 302: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) STORE_SYNC(o0, f48) membar #Sync |
| 298 | FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) STORE_JUMP(o0, f48, 416f) membar #Sync |
| 299 | 303: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) STORE_SYNC(o0, f48) membar #Sync |
| 300 | FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) STORE_JUMP(o0, f48, 432f) membar #Sync |
| 301 | VISLOOP_PAD |
| 302 | 310: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) LOOP_CHUNK1(o1, o0, g6, 311f) |
| 303 | FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) LOOP_CHUNK2(o1, o0, g6, 312f) |
| 304 | FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) LOOP_CHUNK3(o1, o0, g6, 313f) |
| 305 | b,pt %xcc, 310b+4; faligndata %f2, %f4, %f48 |
| 306 | 311: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) STORE_SYNC(o0, f48) membar #Sync |
| 307 | FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) STORE_JUMP(o0, f48, 402f) membar #Sync |
| 308 | 312: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) STORE_SYNC(o0, f48) membar #Sync |
| 309 | FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) STORE_JUMP(o0, f48, 418f) membar #Sync |
| 310 | 313: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) STORE_SYNC(o0, f48) membar #Sync |
| 311 | FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) STORE_JUMP(o0, f48, 434f) membar #Sync |
| 312 | VISLOOP_PAD |
| 313 | 320: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) LOOP_CHUNK1(o1, o0, g6, 321f) |
| 314 | FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) LOOP_CHUNK2(o1, o0, g6, 322f) |
| 315 | FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) LOOP_CHUNK3(o1, o0, g6, 323f) |
| 316 | b,pt %xcc, 320b+4; faligndata %f4, %f6, %f48 |
| 317 | 321: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) STORE_SYNC(o0, f48) membar #Sync |
| 318 | FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) STORE_JUMP(o0, f48, 404f) membar #Sync |
| 319 | 322: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) STORE_SYNC(o0, f48) membar #Sync |
| 320 | FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) STORE_JUMP(o0, f48, 420f) membar #Sync |
| 321 | 323: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) STORE_SYNC(o0, f48) membar #Sync |
| 322 | FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) STORE_JUMP(o0, f48, 436f) membar #Sync |
| 323 | VISLOOP_PAD |
| 324 | 330: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) LOOP_CHUNK1(o1, o0, g6, 331f) |
| 325 | FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) LOOP_CHUNK2(o1, o0, g6, 332f) |
| 326 | FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) LOOP_CHUNK3(o1, o0, g6, 333f) |
| 327 | b,pt %xcc, 330b+4; faligndata %f6, %f8, %f48 |
| 328 | 331: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) STORE_SYNC(o0, f48) membar #Sync |
| 329 | FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) STORE_JUMP(o0, f48, 406f) membar #Sync |
| 330 | 332: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) STORE_SYNC(o0, f48) membar #Sync |
| 331 | FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) STORE_JUMP(o0, f48, 422f) membar #Sync |
| 332 | 333: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) STORE_SYNC(o0, f48) membar #Sync |
| 333 | FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) STORE_JUMP(o0, f48, 438f) membar #Sync |
| 334 | VISLOOP_PAD |
| 335 | 340: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) LOOP_CHUNK1(o1, o0, g6, 341f) |
| 336 | FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) LOOP_CHUNK2(o1, o0, g6, 342f) |
| 337 | FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) LOOP_CHUNK3(o1, o0, g6, 343f) |
| 338 | b,pt %xcc, 340b+4; faligndata %f8, %f10, %f48 |
| 339 | 341: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) STORE_SYNC(o0, f48) membar #Sync |
| 340 | FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) STORE_JUMP(o0, f48, 408f) membar #Sync |
| 341 | 342: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) STORE_SYNC(o0, f48) membar #Sync |
| 342 | FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) STORE_JUMP(o0, f48, 424f) membar #Sync |
| 343 | 343: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) STORE_SYNC(o0, f48) membar #Sync |
| 344 | FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) STORE_JUMP(o0, f48, 440f) membar #Sync |
| 345 | VISLOOP_PAD |
| 346 | 350: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) LOOP_CHUNK1(o1, o0, g6, 351f) |
| 347 | FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) LOOP_CHUNK2(o1, o0, g6, 352f) |
| 348 | FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) LOOP_CHUNK3(o1, o0, g6, 353f) |
| 349 | b,pt %xcc, 350b+4; faligndata %f10, %f12, %f48 |
| 350 | 351: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) STORE_SYNC(o0, f48) membar #Sync |
| 351 | FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) STORE_JUMP(o0, f48, 410f) membar #Sync |
| 352 | 352: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) STORE_SYNC(o0, f48) membar #Sync |
| 353 | FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) STORE_JUMP(o0, f48, 426f) membar #Sync |
| 354 | 353: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) STORE_SYNC(o0, f48) membar #Sync |
| 355 | FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) STORE_JUMP(o0, f48, 442f) membar #Sync |
| 356 | VISLOOP_PAD |
| 357 | 360: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) LOOP_CHUNK1(o1, o0, g6, 361f) |
| 358 | FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) LOOP_CHUNK2(o1, o0, g6, 362f) |
| 359 | FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) LOOP_CHUNK3(o1, o0, g6, 363f) |
| 360 | b,pt %xcc, 360b+4; faligndata %f12, %f14, %f48 |
| 361 | 361: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) STORE_SYNC(o0, f48) membar #Sync |
| 362 | FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) STORE_JUMP(o0, f48, 412f) membar #Sync |
| 363 | 362: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) STORE_SYNC(o0, f48) membar #Sync |
| 364 | FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) STORE_JUMP(o0, f48, 428f) membar #Sync |
| 365 | 363: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) STORE_SYNC(o0, f48) membar #Sync |
| 366 | FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) STORE_JUMP(o0, f48, 444f) membar #Sync |
| 367 | VISLOOP_PAD |
| 368 | 370: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) LOOP_CHUNK1(o1, o0, g6, 371f) |
| 369 | FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) LOOP_CHUNK2(o1, o0, g6, 372f) |
| 370 | FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) LOOP_CHUNK3(o1, o0, g6, 373f) |
| 371 | b,pt %xcc, 370b+4; faligndata %f14, %f16, %f48 |
| 372 | 371: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_SYNC(o0, f48) membar #Sync |
| 373 | FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) STORE_JUMP(o0, f48, 414f) membar #Sync |
| 374 | 372: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) STORE_SYNC(o0, f48) membar #Sync |
| 375 | FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) STORE_JUMP(o0, f48, 430f) membar #Sync |
| 376 | 373: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) STORE_SYNC(o0, f48) membar #Sync |
| 377 | FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_JUMP(o0, f48, 446f) membar #Sync |
| 378 | VISLOOP_PAD |
| 379 | 400: FINISH_VISCHUNK(o0, f0, f2, g3) |
| 380 | 402: FINISH_VISCHUNK(o0, f2, f4, g3) |
| 381 | 404: FINISH_VISCHUNK(o0, f4, f6, g3) |
| 382 | 406: FINISH_VISCHUNK(o0, f6, f8, g3) |
| 383 | 408: FINISH_VISCHUNK(o0, f8, f10, g3) |
| 384 | 410: FINISH_VISCHUNK(o0, f10, f12, g3) |
| 385 | 412: FINISH_VISCHUNK(o0, f12, f14, g3) |
| 386 | 414: UNEVEN_VISCHUNK(o0, f14, f0, g3) |
| 387 | 416: FINISH_VISCHUNK(o0, f16, f18, g3) |
| 388 | 418: FINISH_VISCHUNK(o0, f18, f20, g3) |
| 389 | 420: FINISH_VISCHUNK(o0, f20, f22, g3) |
| 390 | 422: FINISH_VISCHUNK(o0, f22, f24, g3) |
| 391 | 424: FINISH_VISCHUNK(o0, f24, f26, g3) |
| 392 | 426: FINISH_VISCHUNK(o0, f26, f28, g3) |
| 393 | 428: FINISH_VISCHUNK(o0, f28, f30, g3) |
| 394 | 430: UNEVEN_VISCHUNK(o0, f30, f0, g3) |
| 395 | 432: FINISH_VISCHUNK(o0, f32, f34, g3) |
| 396 | 434: FINISH_VISCHUNK(o0, f34, f36, g3) |
| 397 | 436: FINISH_VISCHUNK(o0, f36, f38, g3) |
| 398 | 438: FINISH_VISCHUNK(o0, f38, f40, g3) |
| 399 | 440: FINISH_VISCHUNK(o0, f40, f42, g3) |
| 400 | 442: FINISH_VISCHUNK(o0, f42, f44, g3) |
| 401 | 444: FINISH_VISCHUNK(o0, f44, f46, g3) |
| 402 | 446: UNEVEN_VISCHUNK(o0, f46, f0, g3) |
| 403 | 204: ldd [%o1], %f2 /* Load Group */ |
| 404 | add %o1, 8, %o1 /* IEU0 */ |
| 405 | subcc %g3, 8, %g3 /* IEU1 */ |
| 406 | faligndata %f0, %f2, %f8 /* GRU Group */ |
| 407 | std %f8, [%o0] /* Store */ |
| 408 | bl,pn %xcc, 205f /* CTI */ |
| 409 | add %o0, 8, %o0 /* IEU0 Group */ |
| 410 | ldd [%o1], %f0 /* Load Group */ |
| 411 | add %o1, 8, %o1 /* IEU0 */ |
| 412 | subcc %g3, 8, %g3 /* IEU1 */ |
| 413 | faligndata %f2, %f0, %f8 /* GRU Group */ |
| 414 | std %f8, [%o0] /* Store */ |
| 415 | bge,pt %xcc, 204b /* CTI */ |
| 416 | add %o0, 8, %o0 /* IEU0 Group */ |
| 417 | 205: brz,pt %o2, 207f /* CTI Group */ |
| 418 | mov %g1, %o1 /* IEU0 */ |
| 419 | 206: ldub [%o1], %g5 /* LOAD */ |
| 420 | add %o1, 1, %o1 /* IEU0 */ |
| 421 | add %o0, 1, %o0 /* IEU1 */ |
| 422 | subcc %o2, 1, %o2 /* IEU1 */ |
| 423 | bne,pt %xcc, 206b /* CTI */ |
| 424 | stb %g5, [%o0 - 1] /* Store Group */ |
| 425 | 207: membar #StoreLoad | #StoreStore /* LSU Group */ |
| 426 | wr %g0, FPRS_FEF, %fprs |
| 427 | retl |
| 428 | mov %g4, %o0 |
| 429 | |
| 430 | 208: andcc %o2, 1, %g0 /* IEU1 Group */ |
| 431 | be,pt %icc, 2f+4 /* CTI */ |
| 432 | 1: ldub [%o1], %g5 /* LOAD Group */ |
| 433 | add %o1, 1, %o1 /* IEU0 */ |
| 434 | add %o0, 1, %o0 /* IEU1 */ |
| 435 | subcc %o2, 1, %o2 /* IEU1 Group */ |
| 436 | be,pn %xcc, 209f /* CTI */ |
| 437 | stb %g5, [%o0 - 1] /* Store */ |
| 438 | 2: ldub [%o1], %g5 /* LOAD Group */ |
| 439 | add %o0, 2, %o0 /* IEU0 */ |
| 440 | ldub [%o1 + 1], %o5 /* LOAD Group */ |
| 441 | add %o1, 2, %o1 /* IEU0 */ |
| 442 | subcc %o2, 2, %o2 /* IEU1 Group */ |
| 443 | stb %g5, [%o0 - 2] /* Store */ |
| 444 | bne,pt %xcc, 2b /* CTI */ |
| 445 | stb %o5, [%o0 - 1] /* Store */ |
| 446 | 209: retl |
| 447 | mov %g4, %o0 |
| 448 | |
| 449 | #ifdef USE_BPR |
| 450 | |
| 451 | /* void *__align_cpy_4(void *dest, void *src, size_t n) |
| 452 | * SPARC v9 SYSV ABI |
| 453 | * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 3)) |
| 454 | */ |
| 455 | |
| 456 | .align 32 |
| 457 | ENTRY(__align_cpy_4) |
| 458 | mov %o0, %g4 /* IEU0 Group */ |
| 459 | cmp %o2, 15 /* IEU1 */ |
| 460 | bleu,pn %xcc, 208b /* CTI */ |
| 461 | cmp %o2, (64 * 6) /* IEU1 Group */ |
| 462 | bgeu,pn %xcc, 200b /* CTI */ |
| 463 | andcc %o0, 7, %g2 /* IEU1 Group */ |
| 464 | ba,pt %xcc, 216f /* CTI */ |
| 465 | andcc %o1, 4, %g0 /* IEU1 Group */ |
| 466 | END(__align_cpy_4) |
| 467 | |
| 468 | /* void *__align_cpy_8(void *dest, void *src, size_t n) |
| 469 | * SPARC v9 SYSV ABI |
| 470 | * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 7)) |
| 471 | */ |
| 472 | |
| 473 | .align 32 |
| 474 | ENTRY(__align_cpy_8) |
| 475 | mov %o0, %g4 /* IEU0 Group */ |
| 476 | cmp %o2, 15 /* IEU1 */ |
| 477 | bleu,pn %xcc, 208b /* CTI */ |
| 478 | cmp %o2, (64 * 6) /* IEU1 Group */ |
| 479 | bgeu,pn %xcc, 201b /* CTI */ |
| 480 | andcc %o0, 0x38, %g5 /* IEU1 Group */ |
| 481 | andcc %o2, -128, %g6 /* IEU1 Group */ |
| 482 | bne,a,pt %xcc, 82f + 4 /* CTI */ |
| 483 | ldx [%o1], %g1 /* Load */ |
| 484 | ba,pt %xcc, 41f /* CTI Group */ |
| 485 | andcc %o2, 0x70, %g6 /* IEU1 */ |
| 486 | END(__align_cpy_8) |
| 487 | |
| 488 | /* void *__align_cpy_16(void *dest, void *src, size_t n) |
| 489 | * SPARC v9 SYSV ABI |
| 490 | * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 15)) |
| 491 | */ |
| 492 | |
| 493 | .align 32 |
| 494 | ENTRY(__align_cpy_16) |
| 495 | mov %o0, %g4 /* IEU0 Group */ |
| 496 | cmp %o2, (64 * 6) /* IEU1 */ |
| 497 | bgeu,pn %xcc, 201b /* CTI */ |
| 498 | andcc %o0, 0x38, %g5 /* IEU1 Group */ |
| 499 | andcc %o2, -128, %g6 /* IEU1 Group */ |
| 500 | bne,a,pt %xcc, 82f + 4 /* CTI */ |
| 501 | ldx [%o1], %g1 /* Load */ |
| 502 | ba,pt %xcc, 41f /* CTI Group */ |
| 503 | andcc %o2, 0x70, %g6 /* IEU1 */ |
| 504 | END(__align_cpy_16) |
| 505 | |
| 506 | #endif |
| 507 | |
| 508 | .align 32 |
| 509 | ENTRY(memcpy) |
| 510 | 210: |
| 511 | #ifndef USE_BPR |
| 512 | srl %o2, 0, %o2 /* IEU1 Group */ |
| 513 | #endif |
| 514 | brz,pn %o2, 209b /* CTI Group */ |
| 515 | mov %o0, %g4 /* IEU0 */ |
| 516 | 218: cmp %o2, 15 /* IEU1 Group */ |
| 517 | bleu,pn %xcc, 208b /* CTI */ |
| 518 | cmp %o2, (64 * 6) /* IEU1 Group */ |
| 519 | bgeu,pn %xcc, 200b /* CTI */ |
| 520 | andcc %o0, 7, %g2 /* IEU1 Group */ |
| 521 | sub %o0, %o1, %g5 /* IEU0 */ |
| 522 | andcc %g5, 3, %o5 /* IEU1 Group */ |
| 523 | bne,pn %xcc, 212f /* CTI */ |
| 524 | andcc %o1, 3, %g0 /* IEU1 Group */ |
| 525 | be,a,pt %xcc, 216f /* CTI */ |
| 526 | andcc %o1, 4, %g0 /* IEU1 Group */ |
| 527 | andcc %o1, 1, %g0 /* IEU1 Group */ |
| 528 | be,pn %xcc, 4f /* CTI */ |
| 529 | andcc %o1, 2, %g0 /* IEU1 Group */ |
| 530 | ldub [%o1], %g2 /* Load Group */ |
| 531 | add %o1, 1, %o1 /* IEU0 */ |
| 532 | add %o0, 1, %o0 /* IEU1 */ |
| 533 | sub %o2, 1, %o2 /* IEU0 Group */ |
| 534 | bne,pn %xcc, 5f /* CTI Group */ |
| 535 | stb %g2, [%o0 - 1] /* Store */ |
| 536 | 4: lduh [%o1], %g2 /* Load Group */ |
| 537 | add %o1, 2, %o1 /* IEU0 */ |
| 538 | add %o0, 2, %o0 /* IEU1 */ |
| 539 | sub %o2, 2, %o2 /* IEU0 */ |
| 540 | sth %g2, [%o0 - 2] /* Store Group + bubble */ |
| 541 | 5: andcc %o1, 4, %g0 /* IEU1 */ |
| 542 | 216: be,a,pn %xcc, 2f /* CTI */ |
| 543 | andcc %o2, -128, %g6 /* IEU1 Group */ |
| 544 | lduw [%o1], %g5 /* Load Group */ |
| 545 | add %o1, 4, %o1 /* IEU0 */ |
| 546 | add %o0, 4, %o0 /* IEU1 */ |
| 547 | sub %o2, 4, %o2 /* IEU0 Group */ |
| 548 | stw %g5, [%o0 - 4] /* Store */ |
| 549 | andcc %o2, -128, %g6 /* IEU1 Group */ |
| 550 | 2: be,pn %xcc, 215f /* CTI */ |
| 551 | andcc %o0, 4, %g0 /* IEU1 Group */ |
| 552 | be,pn %xcc, 82f + 4 /* CTI Group */ |
| 553 | 5: MOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5) |
| 554 | MOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5) |
| 555 | MOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5) |
| 556 | MOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5) |
| 557 | 35: subcc %g6, 128, %g6 /* IEU1 Group */ |
| 558 | add %o1, 128, %o1 /* IEU0 */ |
| 559 | bne,pt %xcc, 5b /* CTI */ |
| 560 | add %o0, 128, %o0 /* IEU0 Group */ |
| 561 | 215: andcc %o2, 0x70, %g6 /* IEU1 Group */ |
| 562 | 41: be,pn %xcc, 80f /* CTI */ |
| 563 | andcc %o2, 8, %g0 /* IEU1 Group */ |
| 564 | /* Clk1 8-( */ |
| 565 | /* Clk2 8-( */ |
| 566 | /* Clk3 8-( */ |
| 567 | /* Clk4 8-( */ |
| 568 | 79: rd %pc, %o5 /* PDU Group */ |
| 569 | sll %g6, 1, %g5 /* IEU0 Group */ |
| 570 | add %o1, %g6, %o1 /* IEU1 */ |
| 571 | sub %o5, %g5, %o5 /* IEU0 Group */ |
| 572 | jmpl %o5 + %lo(80f - 79b), %g0 /* CTI Group brk forced*/ |
| 573 | add %o0, %g6, %o0 /* IEU0 Group */ |
| 574 | 36: MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5) |
| 575 | MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5) |
| 576 | MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5) |
| 577 | MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5) |
| 578 | MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5) |
| 579 | MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5) |
| 580 | MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5) |
| 581 | 80: be,pt %xcc, 81f /* CTI */ |
| 582 | andcc %o2, 4, %g0 /* IEU1 */ |
| 583 | ldx [%o1], %g2 /* Load Group */ |
| 584 | add %o0, 8, %o0 /* IEU0 */ |
| 585 | stw %g2, [%o0 - 0x4] /* Store Group */ |
| 586 | add %o1, 8, %o1 /* IEU1 */ |
| 587 | srlx %g2, 32, %g2 /* IEU0 Group */ |
| 588 | stw %g2, [%o0 - 0x8] /* Store */ |
| 589 | 81: be,pt %xcc, 1f /* CTI */ |
| 590 | andcc %o2, 2, %g0 /* IEU1 Group */ |
| 591 | lduw [%o1], %g2 /* Load Group */ |
| 592 | add %o1, 4, %o1 /* IEU0 */ |
| 593 | stw %g2, [%o0] /* Store Group */ |
| 594 | add %o0, 4, %o0 /* IEU0 */ |
| 595 | 1: be,pt %xcc, 1f /* CTI */ |
| 596 | andcc %o2, 1, %g0 /* IEU1 Group */ |
| 597 | lduh [%o1], %g2 /* Load Group */ |
| 598 | add %o1, 2, %o1 /* IEU0 */ |
| 599 | sth %g2, [%o0] /* Store Group */ |
| 600 | add %o0, 2, %o0 /* IEU0 */ |
| 601 | 1: be,pt %xcc, 211f /* CTI */ |
| 602 | nop /* IEU1 */ |
| 603 | ldub [%o1], %g2 /* Load Group */ |
| 604 | stb %g2, [%o0] /* Store Group + bubble */ |
| 605 | 211: retl |
| 606 | mov %g4, %o0 |
| 607 | |
| 608 | 82: MOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5) |
| 609 | MOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5) |
| 610 | 37: subcc %g6, 128, %g6 /* IEU1 Group */ |
| 611 | add %o1, 128, %o1 /* IEU0 */ |
| 612 | bne,pt %xcc, 82b /* CTI */ |
| 613 | add %o0, 128, %o0 /* IEU0 Group */ |
| 614 | andcc %o2, 0x70, %g6 /* IEU1 */ |
| 615 | be,pn %xcc, 84f /* CTI */ |
| 616 | andcc %o2, 8, %g0 /* IEU1 Group */ |
| 617 | /* Clk1 8-( */ |
| 618 | /* Clk2 8-( */ |
| 619 | /* Clk3 8-( */ |
| 620 | /* Clk4 8-( */ |
| 621 | 83: rd %pc, %o5 /* PDU Group */ |
| 622 | add %o1, %g6, %o1 /* IEU0 Group */ |
| 623 | sub %o5, %g6, %o5 /* IEU1 */ |
| 624 | jmpl %o5 + %lo(84f - 83b), %g0 /* CTI Group brk forced*/ |
| 625 | add %o0, %g6, %o0 /* IEU0 Group */ |
| 626 | 38: MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3) |
| 627 | MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3) |
| 628 | MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3) |
| 629 | MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3) |
| 630 | MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3) |
| 631 | MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3) |
| 632 | MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3) |
| 633 | 84: be,pt %xcc, 85f /* CTI Group */ |
| 634 | andcc %o2, 4, %g0 /* IEU1 */ |
| 635 | ldx [%o1], %g2 /* Load Group */ |
| 636 | add %o0, 8, %o0 /* IEU0 */ |
| 637 | add %o1, 8, %o1 /* IEU0 Group */ |
| 638 | stx %g2, [%o0 - 0x8] /* Store */ |
| 639 | 85: be,pt %xcc, 1f /* CTI */ |
| 640 | andcc %o2, 2, %g0 /* IEU1 Group */ |
| 641 | lduw [%o1], %g2 /* Load Group */ |
| 642 | add %o0, 4, %o0 /* IEU0 */ |
| 643 | add %o1, 4, %o1 /* IEU0 Group */ |
| 644 | stw %g2, [%o0 - 0x4] /* Store */ |
| 645 | 1: be,pt %xcc, 1f /* CTI */ |
| 646 | andcc %o2, 1, %g0 /* IEU1 Group */ |
| 647 | lduh [%o1], %g2 /* Load Group */ |
| 648 | add %o0, 2, %o0 /* IEU0 */ |
| 649 | add %o1, 2, %o1 /* IEU0 Group */ |
| 650 | sth %g2, [%o0 - 0x2] /* Store */ |
| 651 | 1: be,pt %xcc, 1f /* CTI */ |
| 652 | nop /* IEU0 Group */ |
| 653 | ldub [%o1], %g2 /* Load Group */ |
| 654 | stb %g2, [%o0] /* Store Group + bubble */ |
| 655 | 1: retl |
| 656 | mov %g4, %o0 |
| 657 | |
| 658 | 212: brz,pt %g2, 2f /* CTI Group */ |
| 659 | mov 8, %g1 /* IEU0 */ |
| 660 | sub %g1, %g2, %g2 /* IEU0 Group */ |
| 661 | sub %o2, %g2, %o2 /* IEU0 Group */ |
| 662 | 1: ldub [%o1], %g5 /* Load Group */ |
| 663 | add %o1, 1, %o1 /* IEU0 */ |
| 664 | add %o0, 1, %o0 /* IEU1 */ |
| 665 | subcc %g2, 1, %g2 /* IEU1 Group */ |
| 666 | bne,pt %xcc, 1b /* CTI */ |
| 667 | stb %g5, [%o0 - 1] /* Store */ |
| 668 | 2: andn %o2, 7, %g5 /* IEU0 Group */ |
| 669 | and %o2, 7, %o2 /* IEU1 */ |
| 670 | fmovd %f0, %f2 /* FPU */ |
| 671 | alignaddr %o1, %g0, %g1 /* GRU Group */ |
| 672 | ldd [%g1], %f4 /* Load Group */ |
| 673 | 1: ldd [%g1 + 0x8], %f6 /* Load Group */ |
| 674 | add %g1, 0x8, %g1 /* IEU0 Group */ |
| 675 | subcc %g5, 8, %g5 /* IEU1 */ |
| 676 | faligndata %f4, %f6, %f0 /* GRU Group */ |
| 677 | std %f0, [%o0] /* Store */ |
| 678 | add %o1, 8, %o1 /* IEU0 Group */ |
| 679 | be,pn %xcc, 213f /* CTI */ |
| 680 | add %o0, 8, %o0 /* IEU1 */ |
| 681 | ldd [%g1 + 0x8], %f4 /* Load Group */ |
| 682 | add %g1, 8, %g1 /* IEU0 */ |
| 683 | subcc %g5, 8, %g5 /* IEU1 */ |
| 684 | faligndata %f6, %f4, %f0 /* GRU Group */ |
| 685 | std %f0, [%o0] /* Store */ |
| 686 | add %o1, 8, %o1 /* IEU0 */ |
| 687 | bne,pn %xcc, 1b /* CTI Group */ |
| 688 | add %o0, 8, %o0 /* IEU0 */ |
| 689 | 213: brz,pn %o2, 214f /* CTI Group */ |
| 690 | nop /* IEU0 */ |
| 691 | ldub [%o1], %g5 /* LOAD */ |
| 692 | add %o1, 1, %o1 /* IEU0 */ |
| 693 | add %o0, 1, %o0 /* IEU1 */ |
| 694 | subcc %o2, 1, %o2 /* IEU1 */ |
| 695 | bne,pt %xcc, 206b /* CTI */ |
| 696 | stb %g5, [%o0 - 1] /* Store Group */ |
| 697 | 214: wr %g0, FPRS_FEF, %fprs |
| 698 | retl |
| 699 | mov %g4, %o0 |
| 700 | END(memcpy) |
| 701 | libc_hidden_def(memcpy) |
| 702 | |
| 703 | .align 32 |
| 704 | 228: andcc %o2, 1, %g0 /* IEU1 Group */ |
| 705 | be,pt %icc, 2f+4 /* CTI */ |
| 706 | 1: ldub [%o1 - 1], %o5 /* LOAD Group */ |
| 707 | sub %o1, 1, %o1 /* IEU0 */ |
| 708 | sub %o0, 1, %o0 /* IEU1 */ |
| 709 | subcc %o2, 1, %o2 /* IEU1 Group */ |
| 710 | be,pn %xcc, 229f /* CTI */ |
| 711 | stb %o5, [%o0] /* Store */ |
| 712 | 2: ldub [%o1 - 1], %o5 /* LOAD Group */ |
| 713 | sub %o0, 2, %o0 /* IEU0 */ |
| 714 | ldub [%o1 - 2], %g5 /* LOAD Group */ |
| 715 | sub %o1, 2, %o1 /* IEU0 */ |
| 716 | subcc %o2, 2, %o2 /* IEU1 Group */ |
| 717 | stb %o5, [%o0 + 1] /* Store */ |
| 718 | bne,pt %xcc, 2b /* CTI */ |
| 719 | stb %g5, [%o0] /* Store */ |
| 720 | 229: retl |
| 721 | mov %g4, %o0 |
| 722 | 219: retl |
| 723 | nop |
| 724 | |
| 725 | .align 32 |
| 726 | ENTRY(memmove) |
| 727 | #ifndef USE_BPR |
| 728 | srl %o2, 0, %o2 /* IEU1 Group */ |
| 729 | #endif |
| 730 | brz,pn %o2, 219b /* CTI Group */ |
| 731 | sub %o0, %o1, %o4 /* IEU0 */ |
| 732 | cmp %o4, %o2 /* IEU1 Group */ |
| 733 | bgeu,pt %XCC, 218b /* CTI */ |
| 734 | mov %o0, %g4 /* IEU0 */ |
| 735 | add %o0, %o2, %o0 /* IEU0 Group */ |
| 736 | 220: add %o1, %o2, %o1 /* IEU1 */ |
| 737 | cmp %o2, 15 /* IEU1 Group */ |
| 738 | bleu,pn %xcc, 228b /* CTI */ |
| 739 | andcc %o0, 7, %g2 /* IEU1 Group */ |
| 740 | sub %o0, %o1, %g5 /* IEU0 */ |
| 741 | andcc %g5, 3, %o5 /* IEU1 Group */ |
| 742 | bne,pn %xcc, 232f /* CTI */ |
| 743 | andcc %o1, 3, %g0 /* IEU1 Group */ |
| 744 | be,a,pt %xcc, 236f /* CTI */ |
| 745 | andcc %o1, 4, %g0 /* IEU1 Group */ |
| 746 | andcc %o1, 1, %g0 /* IEU1 Group */ |
| 747 | be,pn %xcc, 4f /* CTI */ |
| 748 | andcc %o1, 2, %g0 /* IEU1 Group */ |
| 749 | ldub [%o1 - 1], %g2 /* Load Group */ |
| 750 | sub %o1, 1, %o1 /* IEU0 */ |
| 751 | sub %o0, 1, %o0 /* IEU1 */ |
| 752 | sub %o2, 1, %o2 /* IEU0 Group */ |
| 753 | be,pn %xcc, 5f /* CTI Group */ |
| 754 | stb %g2, [%o0] /* Store */ |
| 755 | 4: lduh [%o1 - 2], %g2 /* Load Group */ |
| 756 | sub %o1, 2, %o1 /* IEU0 */ |
| 757 | sub %o0, 2, %o0 /* IEU1 */ |
| 758 | sub %o2, 2, %o2 /* IEU0 */ |
| 759 | sth %g2, [%o0] /* Store Group + bubble */ |
| 760 | 5: andcc %o1, 4, %g0 /* IEU1 */ |
| 761 | 236: be,a,pn %xcc, 2f /* CTI */ |
| 762 | andcc %o2, -128, %g6 /* IEU1 Group */ |
| 763 | lduw [%o1 - 4], %g5 /* Load Group */ |
| 764 | sub %o1, 4, %o1 /* IEU0 */ |
| 765 | sub %o0, 4, %o0 /* IEU1 */ |
| 766 | sub %o2, 4, %o2 /* IEU0 Group */ |
| 767 | stw %g5, [%o0] /* Store */ |
| 768 | andcc %o2, -128, %g6 /* IEU1 Group */ |
| 769 | 2: be,pn %xcc, 235f /* CTI */ |
| 770 | andcc %o0, 4, %g0 /* IEU1 Group */ |
| 771 | be,pn %xcc, 282f + 4 /* CTI Group */ |
| 772 | 5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5) |
| 773 | RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5) |
| 774 | RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5) |
| 775 | RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5) |
| 776 | subcc %g6, 128, %g6 /* IEU1 Group */ |
| 777 | sub %o1, 128, %o1 /* IEU0 */ |
| 778 | bne,pt %xcc, 5b /* CTI */ |
| 779 | sub %o0, 128, %o0 /* IEU0 Group */ |
| 780 | 235: andcc %o2, 0x70, %g6 /* IEU1 Group */ |
| 781 | 41: be,pn %xcc, 280f /* CTI */ |
| 782 | andcc %o2, 8, %g0 /* IEU1 Group */ |
| 783 | /* Clk1 8-( */ |
| 784 | /* Clk2 8-( */ |
| 785 | /* Clk3 8-( */ |
| 786 | /* Clk4 8-( */ |
| 787 | 279: rd %pc, %o5 /* PDU Group */ |
| 788 | sll %g6, 1, %g5 /* IEU0 Group */ |
| 789 | sub %o1, %g6, %o1 /* IEU1 */ |
| 790 | sub %o5, %g5, %o5 /* IEU0 Group */ |
| 791 | jmpl %o5 + %lo(280f - 279b), %g0 /* CTI Group brk forced*/ |
| 792 | sub %o0, %g6, %o0 /* IEU0 Group */ |
| 793 | RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5) |
| 794 | RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5) |
| 795 | RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5) |
| 796 | RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5) |
| 797 | RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5) |
| 798 | RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5) |
| 799 | RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5) |
| 800 | 280: be,pt %xcc, 281f /* CTI */ |
| 801 | andcc %o2, 4, %g0 /* IEU1 */ |
| 802 | ldx [%o1 - 8], %g2 /* Load Group */ |
| 803 | sub %o0, 8, %o0 /* IEU0 */ |
| 804 | stw %g2, [%o0 + 4] /* Store Group */ |
| 805 | sub %o1, 8, %o1 /* IEU1 */ |
| 806 | srlx %g2, 32, %g2 /* IEU0 Group */ |
| 807 | stw %g2, [%o0] /* Store */ |
| 808 | 281: be,pt %xcc, 1f /* CTI */ |
| 809 | andcc %o2, 2, %g0 /* IEU1 Group */ |
| 810 | lduw [%o1 - 4], %g2 /* Load Group */ |
| 811 | sub %o1, 4, %o1 /* IEU0 */ |
| 812 | stw %g2, [%o0 - 4] /* Store Group */ |
| 813 | sub %o0, 4, %o0 /* IEU0 */ |
| 814 | 1: be,pt %xcc, 1f /* CTI */ |
| 815 | andcc %o2, 1, %g0 /* IEU1 Group */ |
| 816 | lduh [%o1 - 2], %g2 /* Load Group */ |
| 817 | sub %o1, 2, %o1 /* IEU0 */ |
| 818 | sth %g2, [%o0 - 2] /* Store Group */ |
| 819 | sub %o0, 2, %o0 /* IEU0 */ |
| 820 | 1: be,pt %xcc, 211f /* CTI */ |
| 821 | nop /* IEU1 */ |
| 822 | ldub [%o1 - 1], %g2 /* Load Group */ |
| 823 | stb %g2, [%o0 - 1] /* Store Group + bubble */ |
| 824 | 211: retl |
| 825 | mov %g4, %o0 |
| 826 | |
| 827 | 282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5) |
| 828 | RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5) |
| 829 | subcc %g6, 128, %g6 /* IEU1 Group */ |
| 830 | sub %o1, 128, %o1 /* IEU0 */ |
| 831 | bne,pt %xcc, 282b /* CTI */ |
| 832 | sub %o0, 128, %o0 /* IEU0 Group */ |
| 833 | andcc %o2, 0x70, %g6 /* IEU1 */ |
| 834 | be,pn %xcc, 284f /* CTI */ |
| 835 | andcc %o2, 8, %g0 /* IEU1 Group */ |
| 836 | /* Clk1 8-( */ |
| 837 | /* Clk2 8-( */ |
| 838 | /* Clk3 8-( */ |
| 839 | /* Clk4 8-( */ |
| 840 | 283: rd %pc, %o5 /* PDU Group */ |
| 841 | sub %o1, %g6, %o1 /* IEU0 Group */ |
| 842 | sub %o5, %g6, %o5 /* IEU1 */ |
| 843 | jmpl %o5 + %lo(284f - 283b), %g0 /* CTI Group brk forced*/ |
| 844 | sub %o0, %g6, %o0 /* IEU0 Group */ |
| 845 | RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3) |
| 846 | RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3) |
| 847 | RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3) |
| 848 | RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3) |
| 849 | RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3) |
| 850 | RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3) |
| 851 | RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3) |
| 852 | 284: be,pt %xcc, 285f /* CTI Group */ |
| 853 | andcc %o2, 4, %g0 /* IEU1 */ |
| 854 | ldx [%o1 - 8], %g2 /* Load Group */ |
| 855 | sub %o0, 8, %o0 /* IEU0 */ |
| 856 | sub %o1, 8, %o1 /* IEU0 Group */ |
| 857 | stx %g2, [%o0] /* Store */ |
| 858 | 285: be,pt %xcc, 1f /* CTI */ |
| 859 | andcc %o2, 2, %g0 /* IEU1 Group */ |
| 860 | lduw [%o1 - 4], %g2 /* Load Group */ |
| 861 | sub %o0, 4, %o0 /* IEU0 */ |
| 862 | sub %o1, 4, %o1 /* IEU0 Group */ |
| 863 | stw %g2, [%o0] /* Store */ |
| 864 | 1: be,pt %xcc, 1f /* CTI */ |
| 865 | andcc %o2, 1, %g0 /* IEU1 Group */ |
| 866 | lduh [%o1 - 2], %g2 /* Load Group */ |
| 867 | sub %o0, 2, %o0 /* IEU0 */ |
| 868 | sub %o1, 2, %o1 /* IEU0 Group */ |
| 869 | sth %g2, [%o0] /* Store */ |
| 870 | 1: be,pt %xcc, 1f /* CTI */ |
| 871 | nop /* IEU0 Group */ |
| 872 | ldub [%o1 - 1], %g2 /* Load Group */ |
| 873 | stb %g2, [%o0 - 1] /* Store Group + bubble */ |
| 874 | 1: retl |
| 875 | mov %g4, %o0 |
| 876 | |
| 877 | 232: brz,pt %g2, 2f /* CTI Group */ |
| 878 | sub %o2, %g2, %o2 /* IEU0 Group */ |
| 879 | 1: ldub [%o1 - 1], %g5 /* Load Group */ |
| 880 | sub %o1, 1, %o1 /* IEU0 */ |
| 881 | sub %o0, 1, %o0 /* IEU1 */ |
| 882 | subcc %g2, 1, %g2 /* IEU1 Group */ |
| 883 | bne,pt %xcc, 1b /* CTI */ |
| 884 | stb %g5, [%o0] /* Store */ |
| 885 | 2: andn %o2, 7, %g5 /* IEU0 Group */ |
| 886 | and %o2, 7, %o2 /* IEU1 */ |
| 887 | fmovd %f0, %f2 /* FPU */ |
| 888 | alignaddr %o1, %g0, %g1 /* GRU Group */ |
| 889 | ldd [%g1], %f4 /* Load Group */ |
| 890 | 1: ldd [%g1 - 8], %f6 /* Load Group */ |
| 891 | sub %g1, 8, %g1 /* IEU0 Group */ |
| 892 | subcc %g5, 8, %g5 /* IEU1 */ |
| 893 | faligndata %f6, %f4, %f0 /* GRU Group */ |
| 894 | std %f0, [%o0 - 8] /* Store */ |
| 895 | sub %o1, 8, %o1 /* IEU0 Group */ |
| 896 | be,pn %xcc, 233f /* CTI */ |
| 897 | sub %o0, 8, %o0 /* IEU1 */ |
| 898 | ldd [%g1 - 8], %f4 /* Load Group */ |
| 899 | sub %g1, 8, %g1 /* IEU0 */ |
| 900 | subcc %g5, 8, %g5 /* IEU1 */ |
| 901 | faligndata %f4, %f6, %f0 /* GRU Group */ |
| 902 | std %f0, [%o0 - 8] /* Store */ |
| 903 | sub %o1, 8, %o1 /* IEU0 */ |
| 904 | bne,pn %xcc, 1b /* CTI Group */ |
| 905 | sub %o0, 8, %o0 /* IEU0 */ |
| 906 | 233: brz,pn %o2, 234f /* CTI Group */ |
| 907 | nop /* IEU0 */ |
| 908 | 237: ldub [%o1 - 1], %g5 /* LOAD */ |
| 909 | sub %o1, 1, %o1 /* IEU0 */ |
| 910 | sub %o0, 1, %o0 /* IEU1 */ |
| 911 | subcc %o2, 1, %o2 /* IEU1 */ |
| 912 | bne,pt %xcc, 237b /* CTI */ |
| 913 | stb %g5, [%o0] /* Store Group */ |
| 914 | 234: wr %g0, FPRS_FEF, %fprs |
| 915 | retl |
| 916 | mov %g4, %o0 |
| 917 | END(memmove) |
| 918 | libc_hidden_def(memmove) |
| 919 | |
| 920 | #ifdef USE_BPR |
| 921 | weak_alias(memcpy,__align_cpy_1) |
| 922 | weak_alias(memcpy,__align_cpy_2) |
| 923 | #endif |