| xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | /* _memcopy.c -- subroutines for memory copy functions. | 
 | 2 |    Copyright (C) 1991-2016 Free Software Foundation, Inc. | 
 | 3 |    This file is part of the GNU C Library. | 
 | 4 |    Contributed by Torbjorn Granlund (tege@sics.se). | 
 | 5 |  | 
 | 6 |    The GNU C Library is free software; you can redistribute it and/or | 
 | 7 |    modify it under the terms of the GNU Lesser General Public | 
 | 8 |    License as published by the Free Software Foundation; either | 
 | 9 |    version 2.1 of the License, or (at your option) any later version. | 
 | 10 |  | 
 | 11 |    The GNU C Library is distributed in the hope that it will be useful, | 
 | 12 |    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 | 13 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
 | 14 |    Lesser General Public License for more details. | 
 | 15 |  | 
 | 16 |    You should have received a copy of the GNU Lesser General Public | 
 | 17 |    License along with the GNU C Library; if not, see | 
 | 18 |    <http://www.gnu.org/licenses/>.  */ | 
 | 19 |  | 
 | 20 | /* BE VERY CAREFUL IF YOU CHANGE THIS CODE...!  */ | 
 | 21 |  | 
 | 22 | #include <stddef.h> | 
 | 23 | #include <memcopy.h> | 
 | 24 |  | 
 | 25 | /* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to | 
 | 26 |    block beginning at DSTP with LEN `op_t' words (not LEN bytes!). | 
 | 27 |    Both SRCP and DSTP should be aligned for memory operations on `op_t's.  */ | 
 | 28 |  | 
 | 29 | #ifndef WORDCOPY_FWD_ALIGNED | 
 | 30 | # define WORDCOPY_FWD_ALIGNED _wordcopy_fwd_aligned | 
 | 31 | #endif | 
 | 32 |  | 
 | 33 | void | 
 | 34 | WORDCOPY_FWD_ALIGNED (long int dstp, long int srcp, size_t len) | 
 | 35 | { | 
 | 36 |   op_t a0, a1; | 
 | 37 |  | 
 | 38 |   switch (len % 8) | 
 | 39 |     { | 
 | 40 |     case 2: | 
 | 41 |       a0 = ((op_t *) srcp)[0]; | 
 | 42 |       srcp -= 6 * OPSIZ; | 
 | 43 |       dstp -= 7 * OPSIZ; | 
 | 44 |       len += 6; | 
 | 45 |       goto do1; | 
 | 46 |     case 3: | 
 | 47 |       a1 = ((op_t *) srcp)[0]; | 
 | 48 |       srcp -= 5 * OPSIZ; | 
 | 49 |       dstp -= 6 * OPSIZ; | 
 | 50 |       len += 5; | 
 | 51 |       goto do2; | 
 | 52 |     case 4: | 
 | 53 |       a0 = ((op_t *) srcp)[0]; | 
 | 54 |       srcp -= 4 * OPSIZ; | 
 | 55 |       dstp -= 5 * OPSIZ; | 
 | 56 |       len += 4; | 
 | 57 |       goto do3; | 
 | 58 |     case 5: | 
 | 59 |       a1 = ((op_t *) srcp)[0]; | 
 | 60 |       srcp -= 3 * OPSIZ; | 
 | 61 |       dstp -= 4 * OPSIZ; | 
 | 62 |       len += 3; | 
 | 63 |       goto do4; | 
 | 64 |     case 6: | 
 | 65 |       a0 = ((op_t *) srcp)[0]; | 
 | 66 |       srcp -= 2 * OPSIZ; | 
 | 67 |       dstp -= 3 * OPSIZ; | 
 | 68 |       len += 2; | 
 | 69 |       goto do5; | 
 | 70 |     case 7: | 
 | 71 |       a1 = ((op_t *) srcp)[0]; | 
 | 72 |       srcp -= 1 * OPSIZ; | 
 | 73 |       dstp -= 2 * OPSIZ; | 
 | 74 |       len += 1; | 
 | 75 |       goto do6; | 
 | 76 |  | 
 | 77 |     case 0: | 
 | 78 |       if (OP_T_THRES <= 3 * OPSIZ && len == 0) | 
 | 79 | 	return; | 
 | 80 |       a0 = ((op_t *) srcp)[0]; | 
 | 81 |       srcp -= 0 * OPSIZ; | 
 | 82 |       dstp -= 1 * OPSIZ; | 
 | 83 |       goto do7; | 
 | 84 |     case 1: | 
 | 85 |       a1 = ((op_t *) srcp)[0]; | 
 | 86 |       srcp -=-1 * OPSIZ; | 
 | 87 |       dstp -= 0 * OPSIZ; | 
 | 88 |       len -= 1; | 
 | 89 |       if (OP_T_THRES <= 3 * OPSIZ && len == 0) | 
 | 90 | 	goto do0; | 
 | 91 |       goto do8;			/* No-op.  */ | 
 | 92 |     } | 
 | 93 |  | 
 | 94 |   do | 
 | 95 |     { | 
 | 96 |     do8: | 
 | 97 |       a0 = ((op_t *) srcp)[0]; | 
 | 98 |       ((op_t *) dstp)[0] = a1; | 
 | 99 |     do7: | 
 | 100 |       a1 = ((op_t *) srcp)[1]; | 
 | 101 |       ((op_t *) dstp)[1] = a0; | 
 | 102 |     do6: | 
 | 103 |       a0 = ((op_t *) srcp)[2]; | 
 | 104 |       ((op_t *) dstp)[2] = a1; | 
 | 105 |     do5: | 
 | 106 |       a1 = ((op_t *) srcp)[3]; | 
 | 107 |       ((op_t *) dstp)[3] = a0; | 
 | 108 |     do4: | 
 | 109 |       a0 = ((op_t *) srcp)[4]; | 
 | 110 |       ((op_t *) dstp)[4] = a1; | 
 | 111 |     do3: | 
 | 112 |       a1 = ((op_t *) srcp)[5]; | 
 | 113 |       ((op_t *) dstp)[5] = a0; | 
 | 114 |     do2: | 
 | 115 |       a0 = ((op_t *) srcp)[6]; | 
 | 116 |       ((op_t *) dstp)[6] = a1; | 
 | 117 |     do1: | 
 | 118 |       a1 = ((op_t *) srcp)[7]; | 
 | 119 |       ((op_t *) dstp)[7] = a0; | 
 | 120 |  | 
 | 121 |       srcp += 8 * OPSIZ; | 
 | 122 |       dstp += 8 * OPSIZ; | 
 | 123 |       len -= 8; | 
 | 124 |     } | 
 | 125 |   while (len != 0); | 
 | 126 |  | 
 | 127 |   /* This is the right position for do0.  Please don't move | 
 | 128 |      it into the loop.  */ | 
 | 129 |  do0: | 
 | 130 |   ((op_t *) dstp)[0] = a1; | 
 | 131 | } | 
 | 132 |  | 
 | 133 | /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to | 
 | 134 |    block beginning at DSTP with LEN `op_t' words (not LEN bytes!). | 
 | 135 |    DSTP should be aligned for memory operations on `op_t's, but SRCP must | 
 | 136 |    *not* be aligned.  */ | 
 | 137 |  | 
 | 138 | #ifndef WORDCOPY_FWD_DEST_ALIGNED | 
 | 139 | # define WORDCOPY_FWD_DEST_ALIGNED _wordcopy_fwd_dest_aligned | 
 | 140 | #endif | 
 | 141 |  | 
 | 142 | void | 
 | 143 | WORDCOPY_FWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len) | 
 | 144 | { | 
 | 145 |   op_t a0, a1, a2, a3; | 
 | 146 |   int sh_1, sh_2; | 
 | 147 |  | 
 | 148 |   /* Calculate how to shift a word read at the memory operation | 
 | 149 |      aligned srcp to make it aligned for copy.  */ | 
 | 150 |  | 
 | 151 |   sh_1 = 8 * (srcp % OPSIZ); | 
 | 152 |   sh_2 = 8 * OPSIZ - sh_1; | 
 | 153 |  | 
 | 154 |   /* Make SRCP aligned by rounding it down to the beginning of the `op_t' | 
 | 155 |      it points in the middle of.  */ | 
 | 156 |   srcp &= -OPSIZ; | 
 | 157 |  | 
 | 158 |   switch (len % 4) | 
 | 159 |     { | 
 | 160 |     case 2: | 
 | 161 |       a1 = ((op_t *) srcp)[0]; | 
 | 162 |       a2 = ((op_t *) srcp)[1]; | 
 | 163 |       srcp -= 1 * OPSIZ; | 
 | 164 |       dstp -= 3 * OPSIZ; | 
 | 165 |       len += 2; | 
 | 166 |       goto do1; | 
 | 167 |     case 3: | 
 | 168 |       a0 = ((op_t *) srcp)[0]; | 
 | 169 |       a1 = ((op_t *) srcp)[1]; | 
 | 170 |       srcp -= 0 * OPSIZ; | 
 | 171 |       dstp -= 2 * OPSIZ; | 
 | 172 |       len += 1; | 
 | 173 |       goto do2; | 
 | 174 |     case 0: | 
 | 175 |       if (OP_T_THRES <= 3 * OPSIZ && len == 0) | 
 | 176 | 	return; | 
 | 177 |       a3 = ((op_t *) srcp)[0]; | 
 | 178 |       a0 = ((op_t *) srcp)[1]; | 
 | 179 |       srcp -=-1 * OPSIZ; | 
 | 180 |       dstp -= 1 * OPSIZ; | 
 | 181 |       len += 0; | 
 | 182 |       goto do3; | 
 | 183 |     case 1: | 
 | 184 |       a2 = ((op_t *) srcp)[0]; | 
 | 185 |       a3 = ((op_t *) srcp)[1]; | 
 | 186 |       srcp -=-2 * OPSIZ; | 
 | 187 |       dstp -= 0 * OPSIZ; | 
 | 188 |       len -= 1; | 
 | 189 |       if (OP_T_THRES <= 3 * OPSIZ && len == 0) | 
 | 190 | 	goto do0; | 
 | 191 |       goto do4;			/* No-op.  */ | 
 | 192 |     } | 
 | 193 |  | 
 | 194 |   do | 
 | 195 |     { | 
 | 196 |     do4: | 
 | 197 |       a0 = ((op_t *) srcp)[0]; | 
 | 198 |       ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2); | 
 | 199 |     do3: | 
 | 200 |       a1 = ((op_t *) srcp)[1]; | 
 | 201 |       ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2); | 
 | 202 |     do2: | 
 | 203 |       a2 = ((op_t *) srcp)[2]; | 
 | 204 |       ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2); | 
 | 205 |     do1: | 
 | 206 |       a3 = ((op_t *) srcp)[3]; | 
 | 207 |       ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2); | 
 | 208 |  | 
 | 209 |       srcp += 4 * OPSIZ; | 
 | 210 |       dstp += 4 * OPSIZ; | 
 | 211 |       len -= 4; | 
 | 212 |     } | 
 | 213 |   while (len != 0); | 
 | 214 |  | 
 | 215 |   /* This is the right position for do0.  Please don't move | 
 | 216 |      it into the loop.  */ | 
 | 217 |  do0: | 
 | 218 |   ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2); | 
 | 219 | } | 
 | 220 |  | 
 | 221 | /* _wordcopy_bwd_aligned -- Copy block finishing right before | 
 | 222 |    SRCP to block finishing right before DSTP with LEN `op_t' words | 
 | 223 |    (not LEN bytes!).  Both SRCP and DSTP should be aligned for memory | 
 | 224 |    operations on `op_t's.  */ | 
 | 225 |  | 
 | 226 | #ifndef WORDCOPY_BWD_ALIGNED | 
 | 227 | # define WORDCOPY_BWD_ALIGNED _wordcopy_bwd_aligned | 
 | 228 | #endif | 
 | 229 |  | 
 | 230 | void | 
 | 231 | WORDCOPY_BWD_ALIGNED (long int dstp, long int srcp, size_t len) | 
 | 232 | { | 
 | 233 |   op_t a0, a1; | 
 | 234 |  | 
 | 235 |   switch (len % 8) | 
 | 236 |     { | 
 | 237 |     case 2: | 
 | 238 |       srcp -= 2 * OPSIZ; | 
 | 239 |       dstp -= 1 * OPSIZ; | 
 | 240 |       a0 = ((op_t *) srcp)[1]; | 
 | 241 |       len += 6; | 
 | 242 |       goto do1; | 
 | 243 |     case 3: | 
 | 244 |       srcp -= 3 * OPSIZ; | 
 | 245 |       dstp -= 2 * OPSIZ; | 
 | 246 |       a1 = ((op_t *) srcp)[2]; | 
 | 247 |       len += 5; | 
 | 248 |       goto do2; | 
 | 249 |     case 4: | 
 | 250 |       srcp -= 4 * OPSIZ; | 
 | 251 |       dstp -= 3 * OPSIZ; | 
 | 252 |       a0 = ((op_t *) srcp)[3]; | 
 | 253 |       len += 4; | 
 | 254 |       goto do3; | 
 | 255 |     case 5: | 
 | 256 |       srcp -= 5 * OPSIZ; | 
 | 257 |       dstp -= 4 * OPSIZ; | 
 | 258 |       a1 = ((op_t *) srcp)[4]; | 
 | 259 |       len += 3; | 
 | 260 |       goto do4; | 
 | 261 |     case 6: | 
 | 262 |       srcp -= 6 * OPSIZ; | 
 | 263 |       dstp -= 5 * OPSIZ; | 
 | 264 |       a0 = ((op_t *) srcp)[5]; | 
 | 265 |       len += 2; | 
 | 266 |       goto do5; | 
 | 267 |     case 7: | 
 | 268 |       srcp -= 7 * OPSIZ; | 
 | 269 |       dstp -= 6 * OPSIZ; | 
 | 270 |       a1 = ((op_t *) srcp)[6]; | 
 | 271 |       len += 1; | 
 | 272 |       goto do6; | 
 | 273 |  | 
 | 274 |     case 0: | 
 | 275 |       if (OP_T_THRES <= 3 * OPSIZ && len == 0) | 
 | 276 | 	return; | 
 | 277 |       srcp -= 8 * OPSIZ; | 
 | 278 |       dstp -= 7 * OPSIZ; | 
 | 279 |       a0 = ((op_t *) srcp)[7]; | 
 | 280 |       goto do7; | 
 | 281 |     case 1: | 
 | 282 |       srcp -= 9 * OPSIZ; | 
 | 283 |       dstp -= 8 * OPSIZ; | 
 | 284 |       a1 = ((op_t *) srcp)[8]; | 
 | 285 |       len -= 1; | 
 | 286 |       if (OP_T_THRES <= 3 * OPSIZ && len == 0) | 
 | 287 | 	goto do0; | 
 | 288 |       goto do8;			/* No-op.  */ | 
 | 289 |     } | 
 | 290 |  | 
 | 291 |   do | 
 | 292 |     { | 
 | 293 |     do8: | 
 | 294 |       a0 = ((op_t *) srcp)[7]; | 
 | 295 |       ((op_t *) dstp)[7] = a1; | 
 | 296 |     do7: | 
 | 297 |       a1 = ((op_t *) srcp)[6]; | 
 | 298 |       ((op_t *) dstp)[6] = a0; | 
 | 299 |     do6: | 
 | 300 |       a0 = ((op_t *) srcp)[5]; | 
 | 301 |       ((op_t *) dstp)[5] = a1; | 
 | 302 |     do5: | 
 | 303 |       a1 = ((op_t *) srcp)[4]; | 
 | 304 |       ((op_t *) dstp)[4] = a0; | 
 | 305 |     do4: | 
 | 306 |       a0 = ((op_t *) srcp)[3]; | 
 | 307 |       ((op_t *) dstp)[3] = a1; | 
 | 308 |     do3: | 
 | 309 |       a1 = ((op_t *) srcp)[2]; | 
 | 310 |       ((op_t *) dstp)[2] = a0; | 
 | 311 |     do2: | 
 | 312 |       a0 = ((op_t *) srcp)[1]; | 
 | 313 |       ((op_t *) dstp)[1] = a1; | 
 | 314 |     do1: | 
 | 315 |       a1 = ((op_t *) srcp)[0]; | 
 | 316 |       ((op_t *) dstp)[0] = a0; | 
 | 317 |  | 
 | 318 |       srcp -= 8 * OPSIZ; | 
 | 319 |       dstp -= 8 * OPSIZ; | 
 | 320 |       len -= 8; | 
 | 321 |     } | 
 | 322 |   while (len != 0); | 
 | 323 |  | 
 | 324 |   /* This is the right position for do0.  Please don't move | 
 | 325 |      it into the loop.  */ | 
 | 326 |  do0: | 
 | 327 |   ((op_t *) dstp)[7] = a1; | 
 | 328 | } | 
 | 329 |  | 
 | 330 | /* _wordcopy_bwd_dest_aligned -- Copy block finishing right | 
 | 331 |    before SRCP to block finishing right before DSTP with LEN `op_t' | 
 | 332 |    words (not LEN bytes!).  DSTP should be aligned for memory | 
 | 333 |    operations on `op_t', but SRCP must *not* be aligned.  */ | 
 | 334 |  | 
 | 335 | #ifndef WORDCOPY_BWD_DEST_ALIGNED | 
 | 336 | # define WORDCOPY_BWD_DEST_ALIGNED _wordcopy_bwd_dest_aligned | 
 | 337 | #endif | 
 | 338 |  | 
 | 339 | void | 
 | 340 | WORDCOPY_BWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len) | 
 | 341 | { | 
 | 342 |   op_t a0, a1, a2, a3; | 
 | 343 |   int sh_1, sh_2; | 
 | 344 |  | 
 | 345 |   /* Calculate how to shift a word read at the memory operation | 
 | 346 |      aligned srcp to make it aligned for copy.  */ | 
 | 347 |  | 
 | 348 |   sh_1 = 8 * (srcp % OPSIZ); | 
 | 349 |   sh_2 = 8 * OPSIZ - sh_1; | 
 | 350 |  | 
 | 351 |   /* Make srcp aligned by rounding it down to the beginning of the op_t | 
 | 352 |      it points in the middle of.  */ | 
 | 353 |   srcp &= -OPSIZ; | 
 | 354 |   srcp += OPSIZ; | 
 | 355 |  | 
 | 356 |   switch (len % 4) | 
 | 357 |     { | 
 | 358 |     case 2: | 
 | 359 |       srcp -= 3 * OPSIZ; | 
 | 360 |       dstp -= 1 * OPSIZ; | 
 | 361 |       a2 = ((op_t *) srcp)[2]; | 
 | 362 |       a1 = ((op_t *) srcp)[1]; | 
 | 363 |       len += 2; | 
 | 364 |       goto do1; | 
 | 365 |     case 3: | 
 | 366 |       srcp -= 4 * OPSIZ; | 
 | 367 |       dstp -= 2 * OPSIZ; | 
 | 368 |       a3 = ((op_t *) srcp)[3]; | 
 | 369 |       a2 = ((op_t *) srcp)[2]; | 
 | 370 |       len += 1; | 
 | 371 |       goto do2; | 
 | 372 |     case 0: | 
 | 373 |       if (OP_T_THRES <= 3 * OPSIZ && len == 0) | 
 | 374 | 	return; | 
 | 375 |       srcp -= 5 * OPSIZ; | 
 | 376 |       dstp -= 3 * OPSIZ; | 
 | 377 |       a0 = ((op_t *) srcp)[4]; | 
 | 378 |       a3 = ((op_t *) srcp)[3]; | 
 | 379 |       goto do3; | 
 | 380 |     case 1: | 
 | 381 |       srcp -= 6 * OPSIZ; | 
 | 382 |       dstp -= 4 * OPSIZ; | 
 | 383 |       a1 = ((op_t *) srcp)[5]; | 
 | 384 |       a0 = ((op_t *) srcp)[4]; | 
 | 385 |       len -= 1; | 
 | 386 |       if (OP_T_THRES <= 3 * OPSIZ && len == 0) | 
 | 387 | 	goto do0; | 
 | 388 |       goto do4;			/* No-op.  */ | 
 | 389 |     } | 
 | 390 |  | 
 | 391 |   do | 
 | 392 |     { | 
 | 393 |     do4: | 
 | 394 |       a3 = ((op_t *) srcp)[3]; | 
 | 395 |       ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2); | 
 | 396 |     do3: | 
 | 397 |       a2 = ((op_t *) srcp)[2]; | 
 | 398 |       ((op_t *) dstp)[2] = MERGE (a3, sh_1, a0, sh_2); | 
 | 399 |     do2: | 
 | 400 |       a1 = ((op_t *) srcp)[1]; | 
 | 401 |       ((op_t *) dstp)[1] = MERGE (a2, sh_1, a3, sh_2); | 
 | 402 |     do1: | 
 | 403 |       a0 = ((op_t *) srcp)[0]; | 
 | 404 |       ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2); | 
 | 405 |  | 
 | 406 |       srcp -= 4 * OPSIZ; | 
 | 407 |       dstp -= 4 * OPSIZ; | 
 | 408 |       len -= 4; | 
 | 409 |     } | 
 | 410 |   while (len != 0); | 
 | 411 |  | 
 | 412 |   /* This is the right position for do0.  Please don't move | 
 | 413 |      it into the loop.  */ | 
 | 414 |  do0: | 
 | 415 |   ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2); | 
 | 416 | } |