blob: 8059bd4ccdb8daf6972af7196829400c7e865abb [file] [log] [blame]
yuezonghe824eb0c2024-06-27 02:32:26 -07001/* memmove implementation for SH4
2 *
3 * Copyright (C) 2009 STMicroelectronics Ltd.
4 *
5 * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
6 *
7 * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
8 */
9
10#ifndef __SH_FPU_ANY__
11#include "../../generic/memmove.c"
12#else
13
14#include <string.h>
15
16#define FPSCR_SR (1 << 20)
17#define STORE_FPSCR(x) __asm__ __volatile__("sts fpscr, %0" : "=r"(x))
18#define LOAD_FPSCR(x) __asm__ __volatile__("lds %0, fpscr" : : "r"(x))
19
20static void fpu_optimised_copy_fwd(void *dest, const void *src, size_t len)
21{
22 char *d = (char *)dest;
23 char *s = (char *)src;
24
25 if (len >= 64) {
26 unsigned long fpscr;
27 int *s1;
28 int *d1;
29
30 /* Align the dest to 4 byte boundary. */
31 while ((unsigned)d & 0x7) {
32 *d++ = *s++;
33 len--;
34 }
35
36 s1 = (int *)s;
37 d1 = (int *)d;
38
39 /* check if s is well aligned to use FPU */
40 if (!((unsigned)s1 & 0x7)) {
41
42 /* Align the dest to cache-line boundary */
43 while ((unsigned)d1 & 0x1c) {
44 *d1++ = *s1++;
45 len -= 4;
46 }
47
48 /* Use paired single precision load or store mode for
49 * 64-bit tranfering.*/
50 STORE_FPSCR(fpscr);
51 LOAD_FPSCR(FPSCR_SR);
52
53 while (len >= 32) {
54 __asm__ __volatile__ ("fmov @%0+,dr0":"+r" (s1));
55 __asm__ __volatile__ ("fmov @%0+,dr2":"+r" (s1));
56 __asm__ __volatile__ ("fmov @%0+,dr4":"+r" (s1));
57 __asm__ __volatile__ ("fmov @%0+,dr6":"+r" (s1));
58 __asm__
59 __volatile__ ("fmov dr0,@%0"::"r"
60 (d1):"memory");
61 d1 += 2;
62 __asm__
63 __volatile__ ("fmov dr2,@%0"::"r"
64 (d1):"memory");
65 d1 += 2;
66 __asm__
67 __volatile__ ("fmov dr4,@%0"::"r"
68 (d1):"memory");
69 d1 += 2;
70 __asm__
71 __volatile__ ("fmov dr6,@%0"::"r"
72 (d1):"memory");
73 d1 += 2;
74 len -= 32;
75 }
76 LOAD_FPSCR(fpscr);
77 }
78 s = (char *)s1;
79 d = (char *)d1;
80 /*TODO: other subcases could be covered here?!?*/
81 }
82 /* Go to per-byte copy */
83 while (len > 0) {
84 *d++ = *s++;
85 len--;
86 }
87 return;
88}
89
90void *memmove(void *dest, const void *src, size_t len)
91{
92 unsigned long int d = (long int)dest;
93 unsigned long int s = (long int)src;
94 unsigned long int res;
95
96 if (d >= s)
97 res = d - s;
98 else
99 res = s - d;
100 /*
101 * 1) dest and src are not overlap ==> memcpy (BWD/FDW)
102 * 2) dest and src are 100% overlap ==> memcpy (BWD/FDW)
103 * 3) left-to-right overlap ==> Copy from the beginning to the end
104 * 4) right-to-left overlap ==> Copy from the end to the beginning
105 */
106
107 if (res == 0) /* 100% overlap */
108 memcpy(dest, src, len); /* No overlap */
109 else if (res >= len)
110 memcpy(dest, src, len);
111 else {
112 if (d > s) /* right-to-left overlap */
113 memcpy(dest, src, len); /* memcpy is BWD */
114 else /* cannot use SH4 memcpy for this case */
115 fpu_optimised_copy_fwd(dest, src, len);
116 }
117 return (dest);
118}
119
120libc_hidden_def(memmove)
121#endif /*__SH_FPU_ANY__ */