yuezonghe | 824eb0c | 2024-06-27 02:32:26 -0700 | [diff] [blame^] | 1 | /* memmove implementation for SH4 |
| 2 | * |
| 3 | * Copyright (C) 2009 STMicroelectronics Ltd. |
| 4 | * |
| 5 | * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> |
| 6 | * |
| 7 | * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. |
| 8 | */ |
| 9 | |
| 10 | #ifndef __SH_FPU_ANY__ |
| 11 | #include "../../generic/memmove.c" |
| 12 | #else |
| 13 | |
| 14 | #include <string.h> |
| 15 | |
| 16 | #define FPSCR_SR (1 << 20) |
| 17 | #define STORE_FPSCR(x) __asm__ __volatile__("sts fpscr, %0" : "=r"(x)) |
| 18 | #define LOAD_FPSCR(x) __asm__ __volatile__("lds %0, fpscr" : : "r"(x)) |
| 19 | |
| 20 | static void fpu_optimised_copy_fwd(void *dest, const void *src, size_t len) |
| 21 | { |
| 22 | char *d = (char *)dest; |
| 23 | char *s = (char *)src; |
| 24 | |
| 25 | if (len >= 64) { |
| 26 | unsigned long fpscr; |
| 27 | int *s1; |
| 28 | int *d1; |
| 29 | |
| 30 | /* Align the dest to 4 byte boundary. */ |
| 31 | while ((unsigned)d & 0x7) { |
| 32 | *d++ = *s++; |
| 33 | len--; |
| 34 | } |
| 35 | |
| 36 | s1 = (int *)s; |
| 37 | d1 = (int *)d; |
| 38 | |
| 39 | /* check if s is well aligned to use FPU */ |
| 40 | if (!((unsigned)s1 & 0x7)) { |
| 41 | |
| 42 | /* Align the dest to cache-line boundary */ |
| 43 | while ((unsigned)d1 & 0x1c) { |
| 44 | *d1++ = *s1++; |
| 45 | len -= 4; |
| 46 | } |
| 47 | |
| 48 | /* Use paired single precision load or store mode for |
| 49 | * 64-bit tranfering.*/ |
| 50 | STORE_FPSCR(fpscr); |
| 51 | LOAD_FPSCR(FPSCR_SR); |
| 52 | |
| 53 | while (len >= 32) { |
| 54 | __asm__ __volatile__ ("fmov @%0+,dr0":"+r" (s1)); |
| 55 | __asm__ __volatile__ ("fmov @%0+,dr2":"+r" (s1)); |
| 56 | __asm__ __volatile__ ("fmov @%0+,dr4":"+r" (s1)); |
| 57 | __asm__ __volatile__ ("fmov @%0+,dr6":"+r" (s1)); |
| 58 | __asm__ |
| 59 | __volatile__ ("fmov dr0,@%0"::"r" |
| 60 | (d1):"memory"); |
| 61 | d1 += 2; |
| 62 | __asm__ |
| 63 | __volatile__ ("fmov dr2,@%0"::"r" |
| 64 | (d1):"memory"); |
| 65 | d1 += 2; |
| 66 | __asm__ |
| 67 | __volatile__ ("fmov dr4,@%0"::"r" |
| 68 | (d1):"memory"); |
| 69 | d1 += 2; |
| 70 | __asm__ |
| 71 | __volatile__ ("fmov dr6,@%0"::"r" |
| 72 | (d1):"memory"); |
| 73 | d1 += 2; |
| 74 | len -= 32; |
| 75 | } |
| 76 | LOAD_FPSCR(fpscr); |
| 77 | } |
| 78 | s = (char *)s1; |
| 79 | d = (char *)d1; |
| 80 | /*TODO: other subcases could be covered here?!?*/ |
| 81 | } |
| 82 | /* Go to per-byte copy */ |
| 83 | while (len > 0) { |
| 84 | *d++ = *s++; |
| 85 | len--; |
| 86 | } |
| 87 | return; |
| 88 | } |
| 89 | |
| 90 | void *memmove(void *dest, const void *src, size_t len) |
| 91 | { |
| 92 | unsigned long int d = (long int)dest; |
| 93 | unsigned long int s = (long int)src; |
| 94 | unsigned long int res; |
| 95 | |
| 96 | if (d >= s) |
| 97 | res = d - s; |
| 98 | else |
| 99 | res = s - d; |
| 100 | /* |
| 101 | * 1) dest and src are not overlap ==> memcpy (BWD/FDW) |
| 102 | * 2) dest and src are 100% overlap ==> memcpy (BWD/FDW) |
| 103 | * 3) left-to-right overlap ==> Copy from the beginning to the end |
| 104 | * 4) right-to-left overlap ==> Copy from the end to the beginning |
| 105 | */ |
| 106 | |
| 107 | if (res == 0) /* 100% overlap */ |
| 108 | memcpy(dest, src, len); /* No overlap */ |
| 109 | else if (res >= len) |
| 110 | memcpy(dest, src, len); |
| 111 | else { |
| 112 | if (d > s) /* right-to-left overlap */ |
| 113 | memcpy(dest, src, len); /* memcpy is BWD */ |
| 114 | else /* cannot use SH4 memcpy for this case */ |
| 115 | fpu_optimised_copy_fwd(dest, src, len); |
| 116 | } |
| 117 | return (dest); |
| 118 | } |
| 119 | |
| 120 | libc_hidden_def(memmove) |
| 121 | #endif /*__SH_FPU_ANY__ */ |