blob: 076b8f001d3619048fe2a7fa1bc6f0b7d1b87a4a [file] [log] [blame]
lh9ed821d2023-04-07 01:36:19 -07001/* Optimized memset for Xtensa.
2 Copyright (C) 2001, 2007 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
18 Boston, MA 02110-1301, USA. */
19
20#include "../../sysdeps/linux/xtensa/sysdep.h"
21#include <bits/xtensa-config.h>
22
23/* Do not use .literal_position in the ENTRY macro. */
24#undef LITERAL_POSITION
25#define LITERAL_POSITION
26
27/* void *memset (void *dst, int c, size_t length)
28
29 The algorithm is as follows:
30
31 Create a word with c in all byte positions.
32
33 If the destination is aligned, set 16B chunks with a loop, and then
34 finish up with 8B, 4B, 2B, and 1B stores conditional on the length.
35
36 If the destination is unaligned, align it by conditionally
37 setting 1B and/or 2B and then go to aligned case.
38
39 This code tries to use fall-through branches for the common
40 case of an aligned destination (except for the branches to
41 the alignment labels). */
42
43
44/* Byte-by-byte set. */
45
46 .text
47 .align 4
48 .literal_position
49__memset_aux:
50
51 /* Skip a byte to get 1 mod 4 alignment for LOOPNEZ
52 (0 mod 4 alignment for LBEG). */
53 .byte 0
54
55.Lbyteset:
56#if XCHAL_HAVE_LOOPS
57 loopnez a4, 2f
58#else
59 beqz a4, 2f
60 add a6, a5, a4 /* a6 = ending address */
61#endif
621: s8i a3, a5, 0
63 addi a5, a5, 1
64#if !XCHAL_HAVE_LOOPS
65 blt a5, a6, 1b
66#endif
672: retw
68
69
70/* Destination is unaligned. */
71
72 .align 4
73
74.Ldst1mod2: /* dst is only byte aligned */
75
76 /* Do short sizes byte-by-byte. */
77 bltui a4, 8, .Lbyteset
78
79 /* Set 1 byte. */
80 s8i a3, a5, 0
81 addi a5, a5, 1
82 addi a4, a4, -1
83
84 /* Now retest if dst is aligned. */
85 _bbci.l a5, 1, .Ldstaligned
86
87.Ldst2mod4: /* dst has 16-bit alignment */
88
89 /* Do short sizes byte-by-byte. */
90 bltui a4, 8, .Lbyteset
91
92 /* Set 2 bytes. */
93 s16i a3, a5, 0
94 addi a5, a5, 2
95 addi a4, a4, -2
96
97 /* dst is now aligned; return to main algorithm */
98 j .Ldstaligned
99
100
101ENTRY (memset)
102 /* a2 = dst, a3 = c, a4 = length */
103
104 /* Duplicate character into all bytes of word. */
105 extui a3, a3, 0, 8
106 slli a7, a3, 8
107 or a3, a3, a7
108 slli a7, a3, 16
109 or a3, a3, a7
110
111 mov a5, a2 /* copy dst so that a2 is return value */
112
113 /* Check if dst is unaligned. */
114 _bbsi.l a2, 0, .Ldst1mod2
115 _bbsi.l a2, 1, .Ldst2mod4
116.Ldstaligned:
117
118 /* Get number of loop iterations with 16B per iteration. */
119 srli a7, a4, 4
120
121 /* Destination is word-aligned. */
122#if XCHAL_HAVE_LOOPS
123 loopnez a7, 2f
124#else
125 beqz a7, 2f
126 slli a6, a7, 4
127 add a6, a6, a5 /* a6 = end of last 16B chunk */
128#endif
129 /* Set 16 bytes per iteration. */
1301: s32i a3, a5, 0
131 s32i a3, a5, 4
132 s32i a3, a5, 8
133 s32i a3, a5, 12
134 addi a5, a5, 16
135#if !XCHAL_HAVE_LOOPS
136 blt a5, a6, 1b
137#endif
138
139 /* Set any leftover pieces smaller than 16B. */
1402: bbci.l a4, 3, 3f
141
142 /* Set 8 bytes. */
143 s32i a3, a5, 0
144 s32i a3, a5, 4
145 addi a5, a5, 8
146
1473: bbci.l a4, 2, 4f
148
149 /* Set 4 bytes. */
150 s32i a3, a5, 0
151 addi a5, a5, 4
152
1534: bbci.l a4, 1, 5f
154
155 /* Set 2 bytes. */
156 s16i a3, a5, 0
157 addi a5, a5, 2
158
1595: bbci.l a4, 0, 6f
160
161 /* Set 1 byte. */
162 s8i a3, a5, 0
1636: retw
164
165libc_hidden_def (memset)