blob: 938d8a5d38a45f8b14e97beaeeb9b7bbac0b00d3 [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001/*
2 * Performance Profiling routines
3 *
4 * Copyright (c) 2017 Realtek Semiconductor Corp.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include "8192cd_cfg.h"
12
13#ifdef PERF_DUMP
14
15#include "romeperf.h"
16//#include <asm/rtl865x/rtl_glue.h>
17#define KERNEL_SYSeALLS
18#include <asm/unistd.h>
19#include <asm/processor.h>
20#include <asm/uaccess.h>
21#include <asm/rlxregs.h>
22#ifdef CONFIG_WIRELESS_LAN_MODULE
23#define __IRAM
24#else
25#define __IRAM __attribute__ ((section(".iram-gen")))
26#endif
27#define __OPT_0 __attribute__((optimize("O0")))
28
29#define rtlglue_malloc(size) kmalloc(size, 0x1f0)
30#define rtlglue_free(p) kfree(p)
31#define rtlglue_printf panic_printk
32
33
34enum CP3_COUNTER
35{
36 CP3CNT_CYCLES = 0,
37 CP3CNT_NEW_INST_FECTH,
38 CP3CNT_NEW_INST_FETCH_CACHE_MISS,
39 CP3CNT_NEW_INST_MISS_BUSY_CYCLE,
40 CP3CNT_DATA_STORE_INST,
41 CP3CNT_DATA_LOAD_INST,
42 CP3CNT_DATA_LOAD_OR_STORE_INST,
43 CP3CNT_EXACT_RETIRED_INST,
44 CP3CNT_RETIRED_INST_FOR_PIPE_A,
45 CP3CNT_RETIRED_INST_FOR_PIPE_B,
46 CP3CNT_DATA_LOAD_OR_STORE_CACHE_MISS,
47 CP3CNT_DATA_LOAD_OR_STORE_MISS_BUSY_CYCLE,
48 CP3CNT_RESERVED12,
49 CP3CNT_RESERVED13,
50 CP3CNT_RESERVED14,
51 CP3CNT_RESERVED15,
52};
53
54/* Local variables */
55//static
56uint64 tempVariable64;
57static uint32 tempVariable32;
58#ifdef PERF_DUMP_CP3_DUAL_COUNTER_EN
59static uint32 tempVariable32_2;
60unsigned char bCounterUsed[8];
61unsigned char numOfUsedCounter = 0;
62#endif
63static uint64 currCnt[4];
64unsigned int countTemp = 0xff;
65
66
67/* Global variables */
68#ifdef CONFIG_WIRELESS_LAN_MODULE
69static uint64 cnt1, cnt2;
70static rtl8651_romeperf_stat_t romePerfStat[ROMEPERF_INDEX_MAX];
71static uint32 rtl8651_romeperf_inited = 0;
72static uint32 rtl8651_romeperf_enable = TRUE;
73#else
74uint64 cnt1, cnt2;
75rtl8651_romeperf_stat_t romePerfStat[ROMEPERF_INDEX_MAX];
76uint32 rtl8651_romeperf_inited = 0;
77uint32 rtl8651_romeperf_enable = TRUE;
78#endif
79
80unsigned char *mapping[]=
81{
82#if (PERF_DUMP_CP3_SELECT == PERF_DUMP_CP3_OLD)
83 "CP3CNT_CYCLES",
84 "CP3CNT_NEW_INST_FECTH",
85 "CP3CNT_NEW_INST_FETCH_CACHE_MISS",
86 "CP3CNT_NEW_INST_MISS_BUSY_CYCLE",
87 "CP3CNT_DATA_STORE_INST",
88 "CP3CNT_DATA_LOAD_INST",
89 "CP3CNT_DATA_LOAD_OR_STORE_INST",
90 "CP3CNT_EXACT_RETIRED_INST",
91 "CP3CNT_RETIRED_INST_FOR_PIPE_A",
92 "CP3CNT_RETIRED_INST_FOR_PIPE_B",
93 "CP3CNT_DATA_LOAD_OR_STORE_CACHE_MISS",
94 "CP3CNT_DATA_LOAD_OR_STORE_MISS_BUSY_CYCLE",
95 "CP3CNT_RESERVED12",
96 "CP3CNT_RESERVED13",
97 "CP3CNT_RESERVED14",
98 "CP3CNT_RESERVED15",
99};
100#else
101 "CP3CNT_STOP_COUNT",
102 "CP3CNT_INST_FECTH",
103 "CP3CNT_ICACHE_MISS",
104 "CP3CNT_ICACHE_MISS_CYCLE",
105 "CP3CNT_STORE_INST",
106 "CP3CNT_LOAD_INST",
107 "CP3CNT_LOAD_OR_STORE_INST",
108 "CP3CNT_COMPLETE_INST",
109 "CP3CNT_CYCLES",
110 "CP3CNT_ICACHE_SOFT_MISS",
111 "CP3CNT_DCACHE_MISS",
112 "CP3CNT_DCACHE_MISS_CYCLES",
113 "CP3CNT_L2CACHE_HIT",
114 "CP3CNT_L2CACHE_HIT_CYCLES",
115 "CP3CNT_L2CACHE_MISS",
116 "CP3CNT_L2CACHE_MISS_CYCLES",
117 "CP3CNT_BRANCH_PREDICTION",
118 "CP3CNT_BRANCH_PREDICTION_MISS",
119 };
120#endif
121
122
123__IRAM void CP3_COUNTER0_INIT( void )
124{
125 __asm__ __volatile__
126 (
127 "mfc0 $8, $12 \n\t"
128 "la $9, 0x80000000 \n\t"
129 "or $8, $9 \n\t"
130 "mtc0 $8, $12 \n\t"
131 :
132 :
133 :"$8","$9"
134 );
135}
136
137__IRAM uint32 CP3_COUNTER0_IS_INITED( void )
138{
139 __asm__ __volatile__
140 (
141 "mfc0 $8, $12 \n\t"
142 "la $9, tempVariable32 \n\t"
143 "sw $8, 0($9) \n\t"
144 :
145 :
146 :"$8","$9"
147 );
148 return tempVariable32;
149}
150
151
152__IRAM void CP3_COUNTER0_START( void )
153{
154
155#ifdef PERF_DUMP_CP3_DUAL_COUNTER_EN
156 __asm__ __volatile__
157 (
158 "la $8, tempVariable32 \n\t"
159 "lw $8, 0($8) \n\t"
160 "ctc3 $8, $0 \n\t"
161 "la $8, tempVariable32_2 \n\t"
162 "lw $8, 0($8) \n\t"
163 "ctc3 $8, $1 \n\t"
164 "li $8, 0xf \n\t"
165 "ctc3 $8, $2 \n\t"
166 :
167 :
168 :"$8"
169 );
170#else
171 __asm__ __volatile__
172 (
173 "la $8, tempVariable32 \n\t"
174 "lw $8, 0($8) \n\t"
175 "ctc3 $8, $0 \n\t"
176 :
177 :
178 :"$8"
179 );
180#endif // PERF_DUMP_CP3_DUAL_COUNTER_EN
181}
182
183
184__IRAM void CP3_COUNTER0_ASSIGN_EVENT( void )
185{
186#if (PERF_DUMP_CP3_SELECT == PERF_DUMP_CP3_OLD)
187#if 1 /* Inst */
188 tempVariable32 = /* Counter0 */((0x10|CP3CNT_CYCLES)<< 0) |
189 /* Counter1 */((0x10|CP3CNT_NEW_INST_FECTH)<< 8) |
190 /* Counter2 */((0x10|CP3CNT_NEW_INST_FETCH_CACHE_MISS)<<16) |
191 /* Counter3 */((0x10|CP3CNT_NEW_INST_MISS_BUSY_CYCLE)<<24);
192#elif 1 /* Data (LOAD+STORE) */
193 tempVariable32 = /* Counter0 */((0x10|CP3CNT_CYCLES)<< 0) |
194 /* Counter1 */((0x10|CP3CNT_DATA_LOAD_OR_STORE_INST)<< 8) |
195 /* Counter2 */((0x10|CP3CNT_DATA_LOAD_OR_STORE_CACHE_MISS)<<16) |
196 /* Counter3 */((0x10|CP3CNT_DATA_LOAD_OR_STORE_MISS_BUSY_CYCLE)<<24);
197#elif 1 /* Data (STORE) */
198 tempVariable32 = /* Counter0 */((0x10|CP3CNT_DATA_LOAD_INST)<< 0) |
199 /* Counter1 */((0x10|CP3CNT_DATA_STORE_INST)<< 8) |
200 /* Counter2 */((0x10|CP3CNT_DATA_LOAD_OR_STORE_CACHE_MISS)<<16) |
201 /* Counter3 */((0x10|CP3CNT_DATA_LOAD_OR_STORE_MISS_BUSY_CYCLE)<<24);
202#else
203#error
204#endif
205
206#elif (PERF_DUMP_CP3_SELECT == PERF_DUMP_CP3_NEW)
207
208#ifdef PERF_DUMP_CP3_DUAL_COUNTER_EN
209 #if 1 /* Inst */
210 tempVariable32 = /* Counter0 */((CP3CNT_CYCLES)<< 0) |
211 /* Counter1 */((CP3CNT_INST_FECTH)<< 8) |
212 /* Counter2 */((CP3CNT_ICACHE_MISS)<<16) |
213 /* Counter3 */((CP3CNT_ICACHE_MISS_CYCLE)<<24);
214 tempVariable32_2 = /* Counter0 */((CP3CNT_CYCLES)<< 0) |
215 /* Counter1 */((CP3CNT_INST_FECTH)<< 8) |
216 /* Counter2 */((CP3CNT_ICACHE_MISS)<<16) |
217 /* Counter3 */((CP3CNT_ICACHE_MISS_CYCLE)<<24);
218 #elif 1 /* Data (LOAD+STORE) */
219 tempVariable32 = /* Counter0 */((CP3CNT_CYCLES)<< 0) |
220 /* Counter1 */((CP3CNT_LOAD_OR_STORE_INST)<< 8) |
221 /* Counter2 */((CP3CNT_STORE_INST)<<16) |
222 /* Counter3 */((CP3CNT_LOAD_INST)<<24);
223 #else
224 #error
225 #endif
226#else
227 #if 1 /* Inst */
228 tempVariable32 = /* Counter0 */((CP3CNT_CYCLES)<< 0) |
229 /* Counter1 */((CP3CNT_INST_FECTH)<< 8) |
230 /* Counter2 */((CP3CNT_ICACHE_MISS)<<16) |
231 /* Counter3 */((CP3CNT_ICACHE_MISS_CYCLE)<<24);
232 #elif 1 /* Data (LOAD+STORE) */
233 tempVariable32 = /* Counter0 */((CP3CNT_CYCLES)<< 0) |
234 /* Counter1 */((CP3CNT_LOAD_OR_STORE_INST)<< 8) |
235 /* Counter2 */((CP3CNT_STORE_INST)<<16) |
236 /* Counter3 */((CP3CNT_LOAD_INST)<<24);
237 #else
238 #error
239 #endif
240#endif // PERF_DUMP_CP3_DUAL_COUNTER_EN
241
242#else
243#error "PERF_DUMP_CP3_SELECT flag error"
244#endif
245
246}
247
248
249__IRAM void CP3_COUNTER0_RESET_ONE(int cnt_num)
250{
251 switch(cnt_num)
252 {
253 case 0:
254 __asm__ __volatile__
255 (
256 "mtc3 $0, $8 \n\t"
257 );
258
259 break;
260 case 1:
261 __asm__ __volatile__
262 (
263 "mtc3 $0, $10 \n\t"
264 );
265
266 break;
267 case 2:
268 __asm__ __volatile__
269 (
270 "mtc3 $0, $12 \n\t"
271 );
272
273 break;
274 case 3:
275 __asm__ __volatile__
276 (
277 "mtc3 $0, $14 \n\t"
278 );
279
280 break;
281 case 4:
282 __asm__ __volatile__
283 (
284 "mtc3 $0, $9 \n\t"
285 );
286
287 break;
288 case 5:
289 __asm__ __volatile__
290 (
291 "mtc3 $0, $11 \n\t"
292 );
293
294 break;
295 case 6:
296 __asm__ __volatile__
297 (
298 "mtc3 $0, $13 \n\t"
299 );
300
301 break;
302 case 7:
303 __asm__ __volatile__
304 (
305 "mtc3 $0, $15 \n\t"
306 );
307
308 break;
309 default:
310 printk("CP3 RESET ERROR COUNTER = %x \n",cnt_num);
311 break;
312 }
313}
314
315__IRAM void CP3_COUNTER0_RESET( void )
316{
317 __asm__ __volatile__
318 (
319 "mtc3 $0, $8 \n\t"
320 "mtc3 $0, $9 \n\t"
321 "mtc3 $0, $10 \n\t"
322 "mtc3 $0, $11 \n\t"
323 "mtc3 $0, $12 \n\t"
324 "mtc3 $0, $13 \n\t"
325 "mtc3 $0, $14 \n\t"
326 "mtc3 $0, $15 \n\t"
327 );
328}
329
330__IRAM void CP3_COUNTER0_STOP( void )
331{
332#ifdef PERF_DUMP_CP3_DUAL_COUNTER_EN
333 __asm__ __volatile__
334 (
335 "ctc3 $0, $0 \n\t"
336 "ctc3 $0, $1 \n\t"
337 );
338#else
339 __asm__ __volatile__
340 (
341 "ctc3 $0, $0 \n\t"
342 );
343#endif
344}
345
346__IRAM uint64 CP3_COUNTER0_GET( void )
347{
348 __asm__ __volatile__
349 (
350 "la $8, tempVariable64 \n\t"
351 "mfc3 $9, $9 \n\t"
352 "sw $9, 0($8) \n\t"
353 "mfc3 $9, $8 \n\t"
354 "sw $9, 4($8) \n\t"
355 :
356 :
357 :"$8","$9"
358 );
359 return tempVariable64;
360}
361
362__IRAM void CP3_COUNTER0_GET_ALL( void )
363{
364__asm__ __volatile__ (
365 "mfc3 $9, $9 \n\t"
366 "sw $9, 0x00(%0) \n\t"
367 "mfc3 $9, $8 \n\t"
368 "sw $9, 0x04(%0) \n\t"
369 "mfc3 $9, $11 \n\t"
370 "sw $9, 0x08(%0) \n\t"
371 "mfc3 $9, $10 \n\t"
372 "sw $9, 0x0C(%0) \n\t"
373 "mfc3 $9, $13 \n\t"
374 "sw $9, 0x10(%0) \n\t"
375 "mfc3 $9, $12 \n\t"
376 "sw $9, 0x14(%0) \n\t"
377 "mfc3 $9, $15 \n\t"
378 "sw $9, 0x18(%0) \n\t"
379 "mfc3 $9, $14 \n\t"
380 "sw $9, 0x1C(%0) \n\t"
381 :
382 :"r"(currCnt)
383 );
384}
385
386int32 rtl8651_romeperfInit()
387{
388 CP3_COUNTER0_INIT();
389 CP3_COUNTER0_ASSIGN_EVENT();
390
391 rtl8651_romeperf_inited = TRUE;
392 rtl8651_romeperf_enable = TRUE;
393 memset( &romePerfStat, 0, sizeof( romePerfStat ) );
394
395#if (PERF_DUMP_INIT_SELECT == PERF_DUMP_INIT_ORI)
396 romePerfStat[ROMEPERF_INDEX_NAPT_ADD].desc = "NAPT add_all";
397 romePerfStat[ROMEPERF_INDEX_NAPT_ADD_1].desc = "NAPT add_checkIntIP";
398 romePerfStat[ROMEPERF_INDEX_NAPT_ADD_2].desc = "NAPT add_localServer";
399 romePerfStat[ROMEPERF_INDEX_NAPT_ADD_3].desc = "NAPT add_checkExtIp";
400 romePerfStat[ROMEPERF_INDEX_NAPT_ADD_4].desc = "NAPT add_dupCheck1";
401 romePerfStat[ROMEPERF_INDEX_NAPT_ADD_5].desc = "NAPT add_dupCheck2";
402 romePerfStat[ROMEPERF_INDEX_NAPT_ADD_6].desc = "NAPT add_bPortReused";
403 romePerfStat[ROMEPERF_INDEX_NAPT_ADD_7].desc = "NAPT add_routeCache";
404 romePerfStat[ROMEPERF_INDEX_NAPT_ADD_8].desc = "NAPT add_tooManyConn";
405 romePerfStat[ROMEPERF_INDEX_NAPT_ADD_9].desc = "NAPT add_initConn";
406 romePerfStat[ROMEPERF_INDEX_NAPT_ADD_10].desc = "NAPT add_decisionFlo";
407 romePerfStat[ROMEPERF_INDEX_NAPT_ADD_11].desc = "NAPT add_ambiguous";
408 romePerfStat[ROMEPERF_INDEX_NAPT_DEL].desc = "NAPT del";
409 romePerfStat[ROMEPERF_INDEX_NAPT_FIND_OUTBOUND].desc = "NATP outbound";
410 romePerfStat[ROMEPERF_INDEX_NAPT_FIND_INBOUND].desc = "NAPT inbound";
411 romePerfStat[ROMEPERF_INDEX_NAPT_UPDATE].desc = "NAPT update";
412 romePerfStat[ROMEPERF_INDEX_UNTIL_RXTHREAD].desc = "IntDispatch-RxThread";
413 romePerfStat[ROMEPERF_INDEX_RECVLOOP].desc = "RecvLoop-FwdInput";
414 romePerfStat[ROMEPERF_INDEX_FWDENG_INPUT].desc = "FwdEng_Input()";
415 romePerfStat[ROMEPERF_INDEX_BEFORE_CRYPTO_ENCAP].desc = "FwdInput-Crypto(En)";
416 romePerfStat[ROMEPERF_INDEX_ENCAP].desc = "IPSEC Encap";
417 romePerfStat[ROMEPERF_INDEX_ENCAP_CRYPTO_ENGINE].desc = "Encap Crypto";
418 romePerfStat[ROMEPERF_INDEX_ENCAP_AUTH_ENGINE].desc = "Encap Authtication";
419 romePerfStat[ROMEPERF_INDEX_BEFORE_CRYPTO_DECAP].desc = "FwdInput-Crypto(De)";
420 romePerfStat[ROMEPERF_INDEX_DECAP].desc = "IPSEC Decap";
421 romePerfStat[ROMEPERF_INDEX_DECAP_CRYPTO_ENGINE].desc = "Decap Crypto";
422 romePerfStat[ROMEPERF_INDEX_DECAP_AUTH_ENGINE].desc = "Decap Authtication";
423 romePerfStat[ROMEPERF_INDEX_FASTPATH].desc = "Fast Path";
424 romePerfStat[ROMEPERF_INDEX_SLOWPATH].desc = "Slow Path";
425 romePerfStat[ROMEPERF_INDEX_FWDENG_SEND].desc = "FwdEngSend()";
426 romePerfStat[ROMEPERF_INDEX_UNTIL_ACLDB].desc = "FwdInput() Until ACLDB";
427 romePerfStat[ROMEPERF_INDEX_GET_MTU_AND_SOURCE_MAC].desc = "L3Route_MTU_srcMAC";
428 romePerfStat[ROMEPERF_INDEX_PPTPL2TP_1].desc = "L3Route_PPTPL2TP_1";
429 romePerfStat[ROMEPERF_INDEX_PPPOE_ARP_CACHE].desc = "L3Route_PPPoE_ArpCache";
430 romePerfStat[ROMEPERF_INDEX_PPTPL2TP_SEND].desc = "L3Route_PptpL2tpSend()";
431 romePerfStat[ROMEPERF_INDEX_FRAG].desc = "L3Route_Fragment";
432 romePerfStat[ROMEPERF_INDEX_EGRESS_ACL].desc = "FwdSend_EgressACL";
433 romePerfStat[ROMEPERF_INDEX_PPTPL2TP_ENCAP].desc = "FwdSend_PPTP/L2TP_Encap";
434 romePerfStat[ROMEPERF_INDEX_FROM_PS].desc = "FwdSend_FromPS";
435 romePerfStat[ROMEPERF_INDEX_EXTDEV_SEND].desc = "FwdSend_ExtDevSend()";
436 romePerfStat[ROMEPERF_INDEX_FRAG_2ND_HALF].desc = "FwdSend_Frag_2ndHalf()";
437 romePerfStat[ROMEPERF_INDEX_TXPKTPOST].desc = "rtl8651_txPktPostProcessing()";
438 romePerfStat[ROMEPERF_INDEX_MBUFPAD].desc = "mBuf_padding()";
439 romePerfStat[ROMEPERF_INDEX_TXALIGN].desc = "_swNic_txAlign";
440 romePerfStat[ROMEPERF_INDEX_ISRTXRECYCLE].desc = "_swNic_isrTxRecycle";
441 romePerfStat[ROMEPERF_INDEX_16].desc = "FwdEng_temp_16";
442 romePerfStat[ROMEPERF_INDEX_17].desc = "FwdEng_temp_17";
443 romePerfStat[ROMEPERF_INDEX_18].desc = "FwdEng_temp_18";
444 romePerfStat[ROMEPERF_INDEX_19].desc = "FwdEng_temp_19";
445 romePerfStat[ROMEPERF_INDEX_20].desc = "FwdEng_temp_20";
446 romePerfStat[ROMEPERF_INDEX_21].desc = "FwdEng_temp_21";
447 romePerfStat[ROMEPERF_INDEX_22].desc = "FwdEng_temp_22";
448 romePerfStat[ROMEPERF_INDEX_23].desc = "FwdEng_temp_23";
449 romePerfStat[ROMEPERF_INDEX_24].desc = "FwdEng_temp_24";
450 romePerfStat[ROMEPERF_INDEX_25].desc = "FwdEng_temp_25";
451 romePerfStat[ROMEPERF_INDEX_FLUSHDCACHE].desc = "rtlglue_flushDCache";
452 romePerfStat[ROMEPERF_INDEX_IRAM_1].desc = "IRAM Cached within IRAM";
453 romePerfStat[ROMEPERF_INDEX_IRAM_2].desc = "IRAM Uncached within IRAM";
454 romePerfStat[ROMEPERF_INDEX_IRAM_3].desc = "test ICACHE (1024*100)";
455 romePerfStat[ROMEPERF_INDEX_IRAM_4].desc = "test Uncached (1024*10)";
456 romePerfStat[ROMEPERF_INDEX_DRAM_1].desc = "DRAM Cached within DRAM";
457 romePerfStat[ROMEPERF_INDEX_DRAM_2].desc = "DRAM Uncached within DRAM";
458 romePerfStat[ROMEPERF_INDEX_DRAM_3].desc = "test DCACHE (1024*100)";
459 romePerfStat[ROMEPERF_INDEX_DRAM_4].desc = "test Uncached (1024*10)";
460 romePerfStat[ROMEPERF_INDEX_BMP].desc = "KMP Algorithm";
461 romePerfStat[ROMEPERF_INDEX_MDCMDIO].desc = "MDCMDIO PHY Register ACCESS";
462#elif (PERF_DUMP_INIT_SELECT == PERF_DUMP_INIT_WLAN_TRX)
463 romePerfStat[ROMEPERF_INDEX_TX_PREWORK].desc = "XMIT_PREWORK";
464 romePerfStat[ROMEPERF_INDEX_TX_XMIT_OUT].desc = "XMIT_OUT";
465 romePerfStat[ROMEPERF_INDEX_TX_XMIT_OUT_2].desc = "XMIT_OUT_2";
466 romePerfStat[ROMEPERF_INDEX_RX_ONE_PKT].desc = "RX_ONE_PKT";
467 romePerfStat[ROMEPERF_INDEX_RX_ONE_PKT_2].desc = "RX_ONE_PKT_2";
468 romePerfStat[ROMEPERF_INDEX_TX_START_XMIT].desc = "XMT_1";
469 romePerfStat[ROMEPERF_INDEX_TX_START_XMIT_2].desc = "XMIT_2";
470 romePerfStat[ROMEPERF_INDEX_TX_START_XMIT_3].desc = "XMIT_3";
471 romePerfStat[ROMEPERF_INDEX_TX_START_XMIT_4].desc = "XMIT_4";
472 romePerfStat[ROMEPERF_INDEX_TX_START_XMIT_5].desc = "XMIT_5";
473#else
474#error "PERF_DUMP_INIT_SELECT flag error"
475#endif
476
477 return SUCCESS;
478}
479
480int32 rtl8651_romeperfReset()
481{
482 rtl8651_romeperfInit();
483
484 return SUCCESS;
485}
486
487#if 0/* old fashion function, for reference only. */
488int32 rtl8651_romeperfStart()
489{
490 if ( rtl8651_romeperf_inited == FALSE ) rtl8651_romeperfInit();
491
492 START_AND_GET_CP3_COUNTER0( cnt1 );
493
494 return SUCCESS;
495}
496
497int32 rtl8651_romeperfStop( uint64 *pDiff )
498{
499 if ( rtl8651_romeperf_inited == FALSE ) rtl8651_romeperfInit();
500
501 STOP_AND_GET_CP3_COUNTER0( cnt2 );
502
503 *pDiff = cnt2 - cnt1;
504 return SUCCESS;
505}
506#endif
507
508int32 rtl8651_romeperfGet( uint64 *pGet )
509{
510 if ( rtl8651_romeperf_inited == FALSE ) return FAILED;
511
512 /* Louis patch: someone will disable CP3 in somewhere. */
513 CP3_COUNTER0_INIT();
514
515 CP3_COUNTER0_STOP();
516 *pGet = CP3_COUNTER0_GET();
517 CP3_COUNTER0_START();
518
519 return SUCCESS;
520}
521
522int32 rtl8651_romeperfPause( void )
523{
524 if ( rtl8651_romeperf_inited == FALSE ) return FAILED;
525
526 rtl8651_romeperf_enable = FALSE;
527
528 /* Louis patch: someone will disable CP3 in somewhere. */
529 CP3_COUNTER0_INIT();
530
531 CP3_COUNTER0_STOP();
532
533 return SUCCESS;
534}
535
536int32 rtl8651_romeperfResume( void )
537{
538 if ( rtl8651_romeperf_inited == FALSE ) return FAILED;
539
540 rtl8651_romeperf_enable = TRUE;
541
542 /* Louis patch: someone will disable CP3 in somewhere. */
543 CP3_COUNTER0_INIT();
544
545 CP3_COUNTER0_START();
546
547 return SUCCESS;
548}
549
550
551#ifdef PERF_DUMP_CP3_DUAL_COUNTER_EN
552
553__IRAM int32 rtl_romeperfEnterPoint_dual(uint32 index, int cnt_num,char *event)
554{
555 unsigned char i,j,counter;
556
557 /* Louis patch: someone will disable CP3 in somewhere. */
558 CP3_COUNTER0_INIT();
559 CP3_COUNTER0_STOP();
560
561 if ( (rtl8651_romeperf_inited == FALSE) || (rtl8651_romeperf_enable == FALSE) ) {
562 //CP3_COUNTER0_START();
563 return FAILED;
564 }
565
566 if ( index >= (sizeof(romePerfStat)/sizeof(rtl8651_romeperf_stat_t)) ) {
567 printk("CP3 index error! \n");
568 CP3_COUNTER0_START();
569 return FAILED;
570 }
571
572 if(cnt_num + numOfUsedCounter > MAX_CP3_COUNTER) {
573 printk("Index=[%x]CP3 counter not enough, request:%x, availableCnt:%x \n",index,cnt_num,(MAX_CP3_COUNTER-numOfUsedCounter));
574 CP3_COUNTER0_START();
575 return FAILED;
576 }
577
578 if(TRUE == romePerfStat[index].bUsed) {
579 printk("CP3 Error : reue the index=%x \n",index);
580 CP3_COUNTER0_START();
581 return FAILED;
582 }
583
584 // check reCall enter function, but not exit
585 if((countTemp!=0xff)&(countTemp != index))
586 {
587 romePerfStat[index].reEnterIdx = countTemp;
588 }
589
590 countTemp = index;
591
592 romePerfStat[index].numOfCount = cnt_num;
593
594 //printk("ENTER romePerfStat[%x].startCounter =%x \n",index,romePerfStat[index].startCounter);
595
596 for(i=0; i<cnt_num ;i++)
597 {
598 counter = getAvailableCnt();
599 romePerfStat[index].Counter[i] = counter;
600 romePerfStat[index].Event[i] = event[i];
601 bCounterUsed[counter] = TRUE;
602 // printk("ENTER index= %x counter =[%x]requestCntNum :%x\n",index,counter,cnt_num);
603 CP3_COUNTER0_RESET_ONE(counter);
604
605 setEvent(counter,event[i]);
606 numOfUsedCounter++;
607 }
608
609 // printk("ENTER index =%x total numOfUsedCounter =%x\n",index,numOfUsedCounter);
610
611 romePerfStat[index].bUsed = TRUE;
612 romePerfStat[index].hasTempCycle = TRUE;
613 CP3_COUNTER0_START();
614
615 return SUCCESS;
616}
617
618__IRAM int32 rtl_romeperfExitPoint_dual(uint32 index)
619{
620 unsigned char i,j,counter;
621
622 CP3_COUNTER0_INIT();
623 CP3_COUNTER0_STOP();
624
625 if ( (rtl8651_romeperf_inited == FALSE) || (rtl8651_romeperf_enable == FALSE) )
626 {
627 //CP3_COUNTER0_START();
628 return FAILED;
629 }
630
631 if ( index >= (sizeof(romePerfStat)/sizeof(rtl8651_romeperf_stat_t)) )
632 {
633 printk("CP3 index error! \n");
634 CP3_COUNTER0_START();
635 return FAILED;
636 }
637
638 if ( romePerfStat[index].hasTempCycle == FALSE )
639 {
640 printk("CP3 EXIT error! romePerfStat[%x].hasTempCycle == FALSE \n",index);
641 CP3_COUNTER0_START();
642 return FAILED;
643 }
644
645
646 if(countTemp == index)
647 {
648 countTemp = 0xff;
649 }
650
651 CP3_COUNTER0_GET_ALL();
652
653 for(i=0; i< romePerfStat[index].numOfCount; i++)
654 {
655 counter = romePerfStat[index].Counter[i];
656 //printk("LEAVE counter=[%x] cnt_index =%x romePerfStat[index].numOfCount =%x \n",counter,index,romePerfStat[index].numOfCount);
657 if (counter <4) {
658 romePerfStat[index].accCycle[i] += (currCnt[counter] & 0x00ffffff);
659 } else { // conter > 4
660 romePerfStat[index].accCycle[i] += ((currCnt[counter-4]>>32) & 0x00ffffff);
661 }
662
663 // counter available
664 bCounterUsed[counter] = FALSE;
665 numOfUsedCounter--;
666 }
667
668 // printk("LEAVE numOfUsedCounter =%x \n",numOfUsedCounter);
669
670 romePerfStat[index].bUsed = FALSE;
671 romePerfStat[index].hasTempCycle = FALSE;
672 romePerfStat[index].executedNum++;
673
674 //CP3_COUNTER0_RESET_DUAL(cnt_num);
675 CP3_COUNTER0_START();
676
677 return SUCCESS;
678}
679
680int getAvailableCnt(void)
681{
682 int i;
683
684 for(i=0;i<MAX_CP3_COUNTER;i++)
685 {
686 if( bCounterUsed[i] == false)
687 {
688 return i;
689 }
690 }
691
692 printk("Not avalable counter !!! \n");
693}
694
695int setEvent(int counter,char event)
696{
697 if(event >= CP3_CNT_MAX)
698 {
699 printk("Event over max = %x \n",event);
700 return FAILED;
701 }
702
703 if(counter < 4)
704 {
705 tempVariable32 = (tempVariable32 & (~(0xff << (counter*8)))) | ( event << (counter*8));
706 }
707 else
708 {
709 tempVariable32_2 = (tempVariable32_2 & (~(0xff << ((counter-4)*8)))) | ( event << ((counter-4)*8));
710 }
711}
712
713#else
714
715
716__IRAM int32 rtl8651_romeperfEnterPoint( uint32 index )
717{
718 if ( rtl8651_romeperf_inited == FALSE ||
719 rtl8651_romeperf_enable == FALSE ) return FAILED;
720 if ( index >= (sizeof(romePerfStat)/sizeof(rtl8651_romeperf_stat_t)) )
721 return FAILED;
722
723 if((countTemp!=0xff)&(countTemp != index))
724 {
725 romePerfStat[index].reEnterIdx = countTemp;
726 }
727
728 countTemp = index;
729
730 /* Louis patch: someone will disable CP3 in somewhere. */
731 CP3_COUNTER0_INIT();
732
733 CP3_COUNTER0_STOP();
734 CP3_COUNTER0_RESET();
735 CP3_COUNTER0_GET_ALL();
736
737 romePerfStat[index].tempCycle[0] = currCnt[0];
738 romePerfStat[index].tempCycle[1] = currCnt[1];
739 romePerfStat[index].tempCycle[2] = currCnt[2];
740 romePerfStat[index].tempCycle[3] = currCnt[3];
741
742 romePerfStat[index].hasTempCycle = TRUE;
743
744 CP3_COUNTER0_START();
745
746 return SUCCESS;
747}
748
749
750__IRAM int32 rtl8651_romeperfExitPoint( uint32 index )
751{
752 if ( rtl8651_romeperf_inited == FALSE ||
753 rtl8651_romeperf_enable == FALSE ) return FAILED;
754 if ( index >= (sizeof(romePerfStat)/sizeof(rtl8651_romeperf_stat_t)) )
755 return FAILED;
756 if ( romePerfStat[index].hasTempCycle == FALSE )
757 return FAILED;
758
759 if((countTemp == index)||(countTemp == 0))
760 {
761 countTemp = 0;
762 }
763
764 /* Louis patch: someone will disable CP3 in somewhere. */
765 CP3_COUNTER0_INIT();
766
767 CP3_COUNTER0_STOP();
768 CP3_COUNTER0_GET_ALL();
769
770 romePerfStat[index].accCycle[0] += (currCnt[0] - romePerfStat[index].tempCycle[0]);
771 romePerfStat[index].accCycle[1] += (currCnt[1] - romePerfStat[index].tempCycle[1]);
772 romePerfStat[index].accCycle[2] += (currCnt[2] - romePerfStat[index].tempCycle[2]);
773 romePerfStat[index].accCycle[3] += (currCnt[3] - romePerfStat[index].tempCycle[3]);
774
775
776 romePerfStat[index].hasTempCycle = FALSE;
777 romePerfStat[index].executedNum++;
778
779 CP3_COUNTER0_RESET();
780
781 return SUCCESS;
782}
783
784#endif //#ifdef PERF_DUMP_CP3_DUAL_COUNTER_EN
785
786int32 rtl8651_romeperfDump( int start, int end )
787{
788#if 0
789 int i;
790
791 rtlglue_printf( "index %30s %12s %8s %10s\n", "description", "accCycle", "totalNum", "Average" );
792 for( i = start; i <= end; i++ )
793 {
794 if ( romePerfStat[i].executedNum == 0 )
795 {
796 rtlglue_printf( "[%3d] %30s %12s %8s %10s\n", i, romePerfStat[i].desc, "--", "--", "--" );
797 }
798 else
799 {
800 int j;
801 rtlglue_printf( "[%3d] %30s ",
802 i, romePerfStat[i].desc );
803 for( j =0; j < sizeof(romePerfStat[i].accCycle)/sizeof(romePerfStat[i].accCycle[0]);
804 j++ )
805 {
806 uint32 *pAccCycle = (uint32*)&romePerfStat[i].accCycle[j];
807 uint32 avrgCycle = /* Hi-word */ (pAccCycle[0]*(0xffffffff/romePerfStat[i].executedNum)) +
808 /* Low-word */(pAccCycle[1]/romePerfStat[i].executedNum);
809
810 rtlglue_printf( "%12llu %8u %10u\n",
811 romePerfStat[i].accCycle[j],
812 romePerfStat[i].executedNum,
813 avrgCycle
814 );
815 rtlglue_printf( " %3s %30s ", "", "" );
816 }
817 rtlglue_printf( "\r" );
818 }
819 }
820
821 return SUCCESS;
822#else
823 int i,bReturn;
824
825 rtl8651_romeperf_stat_t* statSnapShot = rtlglue_malloc(sizeof(rtl8651_romeperf_stat_t) * (end - start + 1) );
826 if( statSnapShot == NULL )
827 {
828 rtlglue_printf("statSnapShot mem alloc failed\n");
829 return FAILED;
830 }
831
832 rtlglue_printf( "index %30s %30s %12s %8s %10s %8s\n", "description","Event","accCycle", "totalNum", "Average","reENTERIdx" );
833
834 for( i = start; i <= end; i++ )
835 {
836 int j;
837 for( j =0; j < sizeof(romePerfStat[i].accCycle)/sizeof(romePerfStat[i].accCycle[0]); j++ )
838 {
839 statSnapShot[i].accCycle[j] = romePerfStat[i].accCycle[j];
840 statSnapShot[i].tempCycle[j] = romePerfStat[i].tempCycle[j];
841 }
842 statSnapShot[i].executedNum = romePerfStat[i].executedNum;
843 statSnapShot[i].hasTempCycle = romePerfStat[i].hasTempCycle;
844 }
845
846 for( i = start; i < end; i++ )
847 {
848 if ( statSnapShot[i].executedNum == 0 )
849 {
850 rtlglue_printf( "[%3d] %30s %30s %12s %8s %10s %8s\n", i, romePerfStat[i].desc, "--", "--", "--", "--", "--","--" );
851 }
852 else
853 {
854 int j;
855 rtlglue_printf( "[%3d] %30s ", i, romePerfStat[i].desc );
856#ifdef PERF_DUMP_CP3_DUAL_COUNTER_EN
857 for( j =0; j < romePerfStat[i].numOfCount; j++ )
858#else
859 for( j =0; j < sizeof(statSnapShot[i].accCycle)/sizeof(statSnapShot[i].accCycle[0]); j++ )
860#endif //#ifdef PERF_DUMP_CP3_DUAL_COUNTER_EN
861 {
862 uint32 *pAccCycle = (uint32*)&statSnapShot[i].accCycle[j];
863 uint32 avrgCycle = /* Hi-word */ (pAccCycle[0]*(0xffffffff/statSnapShot[i].executedNum)) +
864 /* Low-word */(pAccCycle[1]/statSnapShot[i].executedNum);
865
866 rtlglue_printf( "%30s %12llu %8u %10u %8x\n",
867#ifdef PERF_DUMP_CP3_DUAL_COUNTER_EN
868 mapping[romePerfStat[i].Event[j]],
869#else
870 mapping[getEventIndex(j)],
871#endif //#ifdef PERF_DUMP_CP3_DUAL_COUNTER_EN
872 statSnapShot[i].accCycle[j],
873 statSnapShot[i].executedNum,
874 avrgCycle,
875 romePerfStat[i].reEnterIdx);
876
877 rtlglue_printf( " %3s %30s ", "", "" );
878 }
879 rtlglue_printf( "\r" );
880 }
881 }
882
883 rtlglue_free(statSnapShot);
884
885 return SUCCESS;
886#endif
887}
888
889////////////////////////////////////////////////////////////////////////////////
890
891int getEventIndex(int i)
892{
893 int index;
894#if (PERF_DUMP_CP3_SELECT == PERF_DUMP_CP3_OLD)
895 index = (tempVariable32 >> (i*8))&0xf;
896#else
897#ifdef PERF_DUMP_CP3_DUAL_COUNTER_EN
898 if(i < 4)
899 {
900 index = (tempVariable32 >> (i*8))&0xff;
901 }
902 else
903 {
904 index = (tempVariable32_2 >> ((i-4)*8))&0xff;
905 }
906#else
907 index = (tempVariable32 >> (i*8))&0xff;
908#endif // PERF_DUMP_CP3_DUAL_COUNTER_EN
909#endif //(PERF_DUMP_CP3_SELECT == PERF_DUMP_CP3_OLD)
910 return index;
911}
912
913
914#endif // PERF_DUMP
915