blob: 30de1635c99e52fadefbc8cb2b7f94482095294a [file] [log] [blame]
rjw1f884582022-01-06 17:20:42 +08001/*
2 * Copyright (c) 2008-2012 Travis Geiselbrecht
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files
6 * (the "Software"), to deal in the Software without restriction,
7 * including without limitation the rights to use, copy, modify, merge,
8 * publish, distribute, sublicense, and/or sell copies of the Software,
9 * and to permit persons to whom the Software is furnished to do so,
10 * subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include <sys/types.h>
24#include <stdio.h>
25#include <rand.h>
26#include <err.h>
27#include <stdlib.h>
28#include <string.h>
29#include <app/tests.h>
30#include <kernel/thread.h>
31#include <kernel/mutex.h>
32#include <kernel/semaphore.h>
33#include <kernel/event.h>
34#include <platform.h>
35
36const size_t BUFSIZE = (1024*1024);
37const uint ITER = 1024;
38
39__NO_INLINE static void bench_set_overhead(void)
40{
41 uint32_t *buf = malloc(BUFSIZE);
42
43 uint count = arch_cycle_count();
44 for (uint i = 0; i < ITER; i++) {
45 __asm__ volatile("");
46 }
47 count = arch_cycle_count() - count;
48
49 printf("took %u cycles overhead to loop %u times\n",
50 count, ITER);
51
52 free(buf);
53}
54
55__NO_INLINE static void bench_memset(void)
56{
57 void *buf = malloc(BUFSIZE);
58
59 uint count = arch_cycle_count();
60 for (uint i = 0; i < ITER; i++) {
61 memset(buf, 0, BUFSIZE);
62 }
63 count = arch_cycle_count() - count;
64
65 printf("took %u cycles to memset a buffer of size %u %d times (%u bytes), %f bytes/cycle\n",
66 count, BUFSIZE, ITER, BUFSIZE * ITER, (BUFSIZE * ITER) / (float)count);
67
68 free(buf);
69}
70
71#define bench_cset(type) \
72__NO_INLINE static void bench_cset_##type(void) \
73{ \
74 type *buf = malloc(BUFSIZE); \
75 \
76 uint count = arch_cycle_count(); \
77 for (uint i = 0; i < ITER; i++) { \
78 for (uint j = 0; j < BUFSIZE / sizeof(*buf); j++) { \
79 buf[j] = 0; \
80 } \
81 } \
82 count = arch_cycle_count() - count; \
83 \
84 printf("took %u cycles to manually clear a buffer using wordsize %d of size %u %d times (%u bytes), %f bytes/cycle\n", \
85 count, sizeof(*buf), BUFSIZE, ITER, BUFSIZE * ITER, (BUFSIZE * ITER) / (float)count); \
86 \
87 free(buf); \
88}
89
90bench_cset(uint8_t)
91bench_cset(uint16_t)
92bench_cset(uint32_t)
93bench_cset(uint64_t)
94
95__NO_INLINE static void bench_cset_wide(void)
96{
97 uint32_t *buf = malloc(BUFSIZE);
98
99 uint count = arch_cycle_count();
100 for (uint i = 0; i < ITER; i++) {
101 for (uint j = 0; j < BUFSIZE / sizeof(*buf) / 8; j++) {
102 buf[j*8] = 0;
103 buf[j*8+1] = 0;
104 buf[j*8+2] = 0;
105 buf[j*8+3] = 0;
106 buf[j*8+4] = 0;
107 buf[j*8+5] = 0;
108 buf[j*8+6] = 0;
109 buf[j*8+7] = 0;
110 }
111 }
112 count = arch_cycle_count() - count;
113
114 printf("took %u cycles to manually clear a buffer of size %u %d times 8 words at a time (%u bytes), %f bytes/cycle\n",
115 count, BUFSIZE, ITER, BUFSIZE * ITER, (BUFSIZE * ITER) / (float)count);
116
117 free(buf);
118}
119
120__NO_INLINE static void bench_memcpy(void)
121{
122 uint8_t *buf = calloc(1, BUFSIZE);
123
124 uint count = arch_cycle_count();
125 for (uint i = 0; i < ITER; i++) {
126 memcpy(buf, buf + BUFSIZE / 2, BUFSIZE / 2);
127 }
128 count = arch_cycle_count() - count;
129
130 printf("took %u cycles to memcpy a buffer of size %u %d times (%u source bytes), %f source bytes/cycle\n",
131 count, BUFSIZE / 2, ITER, BUFSIZE / 2 * ITER, (BUFSIZE / 2 * ITER) / (float)count);
132
133 free(buf);
134}
135
136#if ARCH_ARM
137__NO_INLINE static void arm_bench_cset_stm(void)
138{
139 uint32_t *buf = malloc(BUFSIZE);
140
141 uint count = arch_cycle_count();
142 for (uint i = 0; i < ITER; i++) {
143 for (uint j = 0; j < BUFSIZE / sizeof(*buf) / 8; j++) {
144 __asm__ volatile(
145 "stm %0, {r0-r7};"
146 :: "r" (&buf[j*8])
147 );
148 }
149 }
150 count = arch_cycle_count() - count;
151
152 printf("took %u cycles to manually clear a buffer of size %u %d times 8 words at a time using stm (%u bytes), %f bytes/cycle\n",
153 count, BUFSIZE, ITER, BUFSIZE * ITER, (BUFSIZE * ITER) / (float)count);
154
155 free(buf);
156}
157
158#if (__CORTEX_M >= 0x03)
159__NO_INLINE static void arm_bench_multi_issue(void)
160{
161 uint32_t cycles;
162 uint32_t a = 0, b = 0, c = 0, d = 0, e = 0, f = 0, g = 0, h = 0;
163#define ITER 1000000
164 uint count = ITER;
165 cycles = arch_cycle_count();
166 while (count--) {
167 asm volatile ("");
168 asm volatile ("add %0, %0, %0" : "=r" (a) : "r" (a));
169 asm volatile ("add %0, %0, %0" : "=r" (b) : "r" (b));
170 asm volatile ("and %0, %0, %0" : "=r" (c) : "r" (c));
171 asm volatile ("mov %0, %0" : "=r" (d) : "r" (d));
172 asm volatile ("orr %0, %0, %0" : "=r" (e) : "r" (e));
173 asm volatile ("add %0, %0, %0" : "=r" (f) : "r" (f));
174 asm volatile ("and %0, %0, %0" : "=r" (g) : "r" (g));
175 asm volatile ("mov %0, %0" : "=r" (h) : "r" (h));
176 }
177 cycles = arch_cycle_count() - cycles;
178
179 printf("took %u cycles to issue 8 integer ops (%f cycles/iteration)\n", cycles, (float)cycles / ITER);
180#undef ITER
181}
182#endif // __CORTEX_M
183#endif // ARCH_ARM
184
185#if WITH_LIB_LIBM
186#include <math.h>
187
188__NO_INLINE static void bench_sincos(void)
189{
190 printf("touching the floating point unit\n");
191 __UNUSED volatile double _hole = sin(0);
192
193 uint count = arch_cycle_count();
194 __UNUSED double a = sin(2.0);
195 count = arch_cycle_count() - count;
196 printf("took %u cycles for sin()\n", count);
197
198 count = arch_cycle_count();
199 a = cos(2.0);
200 count = arch_cycle_count() - count;
201 printf("took %u cycles for cos()\n", count);
202
203 count = arch_cycle_count();
204 a = sinf(2.0);
205 count = arch_cycle_count() - count;
206 printf("took %u cycles for sinf()\n", count);
207
208 count = arch_cycle_count();
209 a = cosf(2.0);
210 count = arch_cycle_count() - count;
211 printf("took %u cycles for cosf()\n", count);
212
213 count = arch_cycle_count();
214 a = sqrt(1234567.0);
215 count = arch_cycle_count() - count;
216 printf("took %u cycles for sqrt()\n", count);
217
218 count = arch_cycle_count();
219 a = sqrtf(1234567.0f);
220 count = arch_cycle_count() - count;
221 printf("took %u cycles for sqrtf()\n", count);
222}
223
224#endif // WITH_LIB_LIBM
225
226int benchmarks(int argc, const cmd_args *argv)
227{
228 bench_set_overhead();
229 bench_memset();
230 bench_memcpy();
231
232 bench_cset_uint8_t();
233 bench_cset_uint16_t();
234 bench_cset_uint32_t();
235 bench_cset_uint64_t();
236 bench_cset_wide();
237
238#if ARCH_ARM
239 arm_bench_cset_stm();
240
241#if (__CORTEX_M >= 0x03)
242 arm_bench_multi_issue();
243#endif
244#endif
245#if WITH_LIB_LIBM
246 bench_sincos();
247#endif
248
249 return NO_ERROR;
250}
251