/* * Performance Profiling routines * * $Id: romeperf.c,v 1.1 2009/11/06 12:26:48 victoryman Exp $ * * Copyright (c) 2009 Realtek Semiconductor Corp. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ #include "8192cd_cfg.h" #ifdef PERF_DUMP #include "romeperf.h" //#include #define KERNEL_SYSeALLS #include #include #include #include #ifdef CONFIG_WIRELESS_LAN_MODULE #define __IRAM #else #define __IRAM __attribute__ ((section(".iram-gen"))) #endif #define __OPT_0 __attribute__((optimize("O0"))) #define rtlglue_malloc(size) kmalloc(size, 0x1f0) #define rtlglue_free(p) kfree(p) #define rtlglue_printf panic_printk enum CP3_COUNTER { CP3CNT_CYCLES = 0, CP3CNT_NEW_INST_FECTH, CP3CNT_NEW_INST_FETCH_CACHE_MISS, CP3CNT_NEW_INST_MISS_BUSY_CYCLE, CP3CNT_DATA_STORE_INST, CP3CNT_DATA_LOAD_INST, CP3CNT_DATA_LOAD_OR_STORE_INST, CP3CNT_EXACT_RETIRED_INST, CP3CNT_RETIRED_INST_FOR_PIPE_A, CP3CNT_RETIRED_INST_FOR_PIPE_B, CP3CNT_DATA_LOAD_OR_STORE_CACHE_MISS, CP3CNT_DATA_LOAD_OR_STORE_MISS_BUSY_CYCLE, CP3CNT_RESERVED12, CP3CNT_RESERVED13, CP3CNT_RESERVED14, CP3CNT_RESERVED15, }; /* Local variables */ //static uint64 tempVariable64; static uint32 tempVariable32; #ifdef PERF_DUMP_CP3_DUAL_COUNTER_EN static uint32 tempVariable32_2; unsigned char bCounterUsed[8]; unsigned char numOfUsedCounter = 0; #endif static uint64 currCnt[4]; unsigned int countTemp = 0xff; /* Global variables */ #ifdef CONFIG_WIRELESS_LAN_MODULE static uint64 cnt1, cnt2; static rtl8651_romeperf_stat_t romePerfStat[ROMEPERF_INDEX_MAX]; static uint32 rtl8651_romeperf_inited = 0; static uint32 rtl8651_romeperf_enable = TRUE; #else uint64 cnt1, cnt2; rtl8651_romeperf_stat_t romePerfStat[ROMEPERF_INDEX_MAX]; uint32 rtl8651_romeperf_inited = 0; uint32 rtl8651_romeperf_enable = TRUE; #endif __IRAM void CP3_COUNTER0_INIT( void ) { __asm__ __volatile__ ( "mfc0 $8, $12 \n\t" "la $9, 0x80000000 \n\t" "or $8, $9 \n\t" "mtc0 $8, $12 \n\t" : : :"$8","$9" ); } __IRAM uint32 CP3_COUNTER0_IS_INITED( void ) { __asm__ __volatile__ ( "mfc0 $8, $12 \n\t" "la $9, tempVariable32 \n\t" "sw $8, 0($9) \n\t" : : :"$8","$9" ); return tempVariable32; } __IRAM void CP3_COUNTER0_START( void ) { #ifdef PERF_DUMP_CP3_DUAL_COUNTER_EN __asm__ __volatile__ ( "la $8, tempVariable32 \n\t" "lw $8, 0($8) \n\t" "ctc3 $8, $0 \n\t" "la $8, tempVariable32_2 \n\t" "lw $8, 0($8) \n\t" "ctc3 $8, $1 \n\t" "li $8, 0xf \n\t" "ctc3 $8, $2 \n\t" : : :"$8" ); #else __asm__ __volatile__ ( "la $8, tempVariable32 \n\t" "lw $8, 0($8) \n\t" "ctc3 $8, $0 \n\t" : : :"$8" ); #endif // PERF_DUMP_CP3_DUAL_COUNTER_EN } __IRAM void CP3_COUNTER0_ASSIGN_EVENT( void ) { #if (PERF_DUMP_CP3_SELECT == PERF_DUMP_CP3_OLD) #if 1 /* Inst */ tempVariable32 = /* Counter0 */((0x10|CP3CNT_CYCLES)<< 0) | /* Counter1 */((0x10|CP3CNT_NEW_INST_FECTH)<< 8) | /* Counter2 */((0x10|CP3CNT_NEW_INST_FETCH_CACHE_MISS)<<16) | /* Counter3 */((0x10|CP3CNT_NEW_INST_MISS_BUSY_CYCLE)<<24); #elif 1 /* Data (LOAD+STORE) */ tempVariable32 = /* Counter0 */((0x10|CP3CNT_CYCLES)<< 0) | /* Counter1 */((0x10|CP3CNT_DATA_LOAD_OR_STORE_INST)<< 8) | /* Counter2 */((0x10|CP3CNT_DATA_LOAD_OR_STORE_CACHE_MISS)<<16) | /* Counter3 */((0x10|CP3CNT_DATA_LOAD_OR_STORE_MISS_BUSY_CYCLE)<<24); #elif 1 /* Data (STORE) */ tempVariable32 = /* Counter0 */((0x10|CP3CNT_DATA_LOAD_INST)<< 0) | /* Counter1 */((0x10|CP3CNT_DATA_STORE_INST)<< 8) | /* Counter2 */((0x10|CP3CNT_DATA_LOAD_OR_STORE_CACHE_MISS)<<16) | /* Counter3 */((0x10|CP3CNT_DATA_LOAD_OR_STORE_MISS_BUSY_CYCLE)<<24); #else #error #endif #elif (PERF_DUMP_CP3_SELECT == PERF_DUMP_CP3_NEW) #ifdef PERF_DUMP_CP3_DUAL_COUNTER_EN #if 1 /* Inst */ tempVariable32 = /* Counter0 */((CP3CNT_CYCLES)<< 0) | /* Counter1 */((CP3CNT_INST_FECTH)<< 8) | /* Counter2 */((CP3CNT_ICACHE_MISS)<<16) | /* Counter3 */((CP3CNT_ICACHE_MISS_CYCLE)<<24); tempVariable32_2 = /* Counter0 */((CP3CNT_CYCLES)<< 0) | /* Counter1 */((CP3CNT_INST_FECTH)<< 8) | /* Counter2 */((CP3CNT_ICACHE_MISS)<<16) | /* Counter3 */((CP3CNT_ICACHE_MISS_CYCLE)<<24); #elif 1 /* Data (LOAD+STORE) */ tempVariable32 = /* Counter0 */((CP3CNT_CYCLES)<< 0) | /* Counter1 */((CP3CNT_LOAD_OR_STORE_INST)<< 8) | /* Counter2 */((CP3CNT_STORE_INST)<<16) | /* Counter3 */((CP3CNT_LOAD_INST)<<24); #else #error #endif #else #if 1 /* Inst */ tempVariable32 = /* Counter0 */((CP3CNT_CYCLES)<< 0) | /* Counter1 */((CP3CNT_INST_FECTH)<< 8) | /* Counter2 */((CP3CNT_ICACHE_MISS)<<16) | /* Counter3 */((CP3CNT_ICACHE_MISS_CYCLE)<<24); #elif 1 /* Data (LOAD+STORE) */ tempVariable32 = /* Counter0 */((CP3CNT_CYCLES)<< 0) | /* Counter1 */((CP3CNT_LOAD_OR_STORE_INST)<< 8) | /* Counter2 */((CP3CNT_STORE_INST)<<16) | /* Counter3 */((CP3CNT_LOAD_INST)<<24); #else #error #endif #endif // PERF_DUMP_CP3_DUAL_COUNTER_EN #else #error "PERF_DUMP_CP3_SELECT flag error" #endif } __IRAM void CP3_COUNTER0_RESET_ONE(int cnt_num) { switch(cnt_num) { case 0: __asm__ __volatile__ ( "mtc3 $0, $8 \n\t" ); break; case 1: __asm__ __volatile__ ( "mtc3 $0, $10 \n\t" ); break; case 2: __asm__ __volatile__ ( "mtc3 $0, $12 \n\t" ); break; case 3: __asm__ __volatile__ ( "mtc3 $0, $14 \n\t" ); break; case 4: __asm__ __volatile__ ( "mtc3 $0, $9 \n\t" ); break; case 5: __asm__ __volatile__ ( "mtc3 $0, $11 \n\t" ); break; case 6: __asm__ __volatile__ ( "mtc3 $0, $13 \n\t" ); break; case 7: __asm__ __volatile__ ( "mtc3 $0, $15 \n\t" ); break; default: printk("CP3 RESET ERROR COUNTER = %x \n",cnt_num); break; } } __IRAM void CP3_COUNTER0_RESET( void ) { __asm__ __volatile__ ( "mtc3 $0, $8 \n\t" "mtc3 $0, $9 \n\t" "mtc3 $0, $10 \n\t" "mtc3 $0, $11 \n\t" "mtc3 $0, $12 \n\t" "mtc3 $0, $13 \n\t" "mtc3 $0, $14 \n\t" "mtc3 $0, $15 \n\t" ); } __IRAM void CP3_COUNTER0_STOP( void ) { #ifdef PERF_DUMP_CP3_DUAL_COUNTER_EN __asm__ __volatile__ ( "ctc3 $0, $0 \n\t" "ctc3 $0, $1 \n\t" ); #else __asm__ __volatile__ ( "ctc3 $0, $0 \n\t" ); #endif } __IRAM uint64 CP3_COUNTER0_GET( void ) { __asm__ __volatile__ ( "la $8, tempVariable64 \n\t" "mfc3 $9, $9 \n\t" "sw $9, 0($8) \n\t" "mfc3 $9, $8 \n\t" "sw $9, 4($8) \n\t" : : :"$8","$9" ); return tempVariable64; } __IRAM void CP3_COUNTER0_GET_ALL( void ) { __asm__ __volatile__ ( "mfc3 $9, $9 \n\t" "sw $9, 0x00(%0) \n\t" "mfc3 $9, $8 \n\t" "sw $9, 0x04(%0) \n\t" "mfc3 $9, $11 \n\t" "sw $9, 0x08(%0) \n\t" "mfc3 $9, $10 \n\t" "sw $9, 0x0C(%0) \n\t" "mfc3 $9, $13 \n\t" "sw $9, 0x10(%0) \n\t" "mfc3 $9, $12 \n\t" "sw $9, 0x14(%0) \n\t" "mfc3 $9, $15 \n\t" "sw $9, 0x18(%0) \n\t" "mfc3 $9, $14 \n\t" "sw $9, 0x1C(%0) \n\t" : :"r"(currCnt) ); } int32 rtl8651_romeperfInit() { CP3_COUNTER0_INIT(); CP3_COUNTER0_ASSIGN_EVENT(); rtl8651_romeperf_inited = TRUE; rtl8651_romeperf_enable = TRUE; memset( &romePerfStat, 0, sizeof( romePerfStat ) ); #if (PERF_DUMP_INIT_SELECT == PERF_DUMP_INIT_ORI) romePerfStat[ROMEPERF_INDEX_NAPT_ADD].desc = "NAPT add_all"; romePerfStat[ROMEPERF_INDEX_NAPT_ADD_1].desc = "NAPT add_checkIntIP"; romePerfStat[ROMEPERF_INDEX_NAPT_ADD_2].desc = "NAPT add_localServer"; romePerfStat[ROMEPERF_INDEX_NAPT_ADD_3].desc = "NAPT add_checkExtIp"; romePerfStat[ROMEPERF_INDEX_NAPT_ADD_4].desc = "NAPT add_dupCheck1"; romePerfStat[ROMEPERF_INDEX_NAPT_ADD_5].desc = "NAPT add_dupCheck2"; romePerfStat[ROMEPERF_INDEX_NAPT_ADD_6].desc = "NAPT add_bPortReused"; romePerfStat[ROMEPERF_INDEX_NAPT_ADD_7].desc = "NAPT add_routeCache"; romePerfStat[ROMEPERF_INDEX_NAPT_ADD_8].desc = "NAPT add_tooManyConn"; romePerfStat[ROMEPERF_INDEX_NAPT_ADD_9].desc = "NAPT add_initConn"; romePerfStat[ROMEPERF_INDEX_NAPT_ADD_10].desc = "NAPT add_decisionFlo"; romePerfStat[ROMEPERF_INDEX_NAPT_ADD_11].desc = "NAPT add_ambiguous"; romePerfStat[ROMEPERF_INDEX_NAPT_DEL].desc = "NAPT del"; romePerfStat[ROMEPERF_INDEX_NAPT_FIND_OUTBOUND].desc = "NATP outbound"; romePerfStat[ROMEPERF_INDEX_NAPT_FIND_INBOUND].desc = "NAPT inbound"; romePerfStat[ROMEPERF_INDEX_NAPT_UPDATE].desc = "NAPT update"; romePerfStat[ROMEPERF_INDEX_UNTIL_RXTHREAD].desc = "IntDispatch-RxThread"; romePerfStat[ROMEPERF_INDEX_RECVLOOP].desc = "RecvLoop-FwdInput"; romePerfStat[ROMEPERF_INDEX_FWDENG_INPUT].desc = "FwdEng_Input()"; romePerfStat[ROMEPERF_INDEX_BEFORE_CRYPTO_ENCAP].desc = "FwdInput-Crypto(En)"; romePerfStat[ROMEPERF_INDEX_ENCAP].desc = "IPSEC Encap"; romePerfStat[ROMEPERF_INDEX_ENCAP_CRYPTO_ENGINE].desc = "Encap Crypto"; romePerfStat[ROMEPERF_INDEX_ENCAP_AUTH_ENGINE].desc = "Encap Authtication"; romePerfStat[ROMEPERF_INDEX_BEFORE_CRYPTO_DECAP].desc = "FwdInput-Crypto(De)"; romePerfStat[ROMEPERF_INDEX_DECAP].desc = "IPSEC Decap"; romePerfStat[ROMEPERF_INDEX_DECAP_CRYPTO_ENGINE].desc = "Decap Crypto"; romePerfStat[ROMEPERF_INDEX_DECAP_AUTH_ENGINE].desc = "Decap Authtication"; romePerfStat[ROMEPERF_INDEX_FASTPATH].desc = "Fast Path"; romePerfStat[ROMEPERF_INDEX_SLOWPATH].desc = "Slow Path"; romePerfStat[ROMEPERF_INDEX_FWDENG_SEND].desc = "FwdEngSend()"; romePerfStat[ROMEPERF_INDEX_UNTIL_ACLDB].desc = "FwdInput() Until ACLDB"; romePerfStat[ROMEPERF_INDEX_GET_MTU_AND_SOURCE_MAC].desc = "L3Route_MTU_srcMAC"; romePerfStat[ROMEPERF_INDEX_PPTPL2TP_1].desc = "L3Route_PPTPL2TP_1"; romePerfStat[ROMEPERF_INDEX_PPPOE_ARP_CACHE].desc = "L3Route_PPPoE_ArpCache"; romePerfStat[ROMEPERF_INDEX_PPTPL2TP_SEND].desc = "L3Route_PptpL2tpSend()"; romePerfStat[ROMEPERF_INDEX_FRAG].desc = "L3Route_Fragment"; romePerfStat[ROMEPERF_INDEX_EGRESS_ACL].desc = "FwdSend_EgressACL"; romePerfStat[ROMEPERF_INDEX_PPTPL2TP_ENCAP].desc = "FwdSend_PPTP/L2TP_Encap"; romePerfStat[ROMEPERF_INDEX_FROM_PS].desc = "FwdSend_FromPS"; romePerfStat[ROMEPERF_INDEX_EXTDEV_SEND].desc = "FwdSend_ExtDevSend()"; romePerfStat[ROMEPERF_INDEX_FRAG_2ND_HALF].desc = "FwdSend_Frag_2ndHalf()"; romePerfStat[ROMEPERF_INDEX_TXPKTPOST].desc = "rtl8651_txPktPostProcessing()"; romePerfStat[ROMEPERF_INDEX_MBUFPAD].desc = "mBuf_padding()"; romePerfStat[ROMEPERF_INDEX_TXALIGN].desc = "_swNic_txAlign"; romePerfStat[ROMEPERF_INDEX_ISRTXRECYCLE].desc = "_swNic_isrTxRecycle"; romePerfStat[ROMEPERF_INDEX_16].desc = "FwdEng_temp_16"; romePerfStat[ROMEPERF_INDEX_17].desc = "FwdEng_temp_17"; romePerfStat[ROMEPERF_INDEX_18].desc = "FwdEng_temp_18"; romePerfStat[ROMEPERF_INDEX_19].desc = "FwdEng_temp_19"; romePerfStat[ROMEPERF_INDEX_20].desc = "FwdEng_temp_20"; romePerfStat[ROMEPERF_INDEX_21].desc = "FwdEng_temp_21"; romePerfStat[ROMEPERF_INDEX_22].desc = "FwdEng_temp_22"; romePerfStat[ROMEPERF_INDEX_23].desc = "FwdEng_temp_23"; romePerfStat[ROMEPERF_INDEX_24].desc = "FwdEng_temp_24"; romePerfStat[ROMEPERF_INDEX_25].desc = "FwdEng_temp_25"; romePerfStat[ROMEPERF_INDEX_FLUSHDCACHE].desc = "rtlglue_flushDCache"; romePerfStat[ROMEPERF_INDEX_IRAM_1].desc = "IRAM Cached within IRAM"; romePerfStat[ROMEPERF_INDEX_IRAM_2].desc = "IRAM Uncached within IRAM"; romePerfStat[ROMEPERF_INDEX_IRAM_3].desc = "test ICACHE (1024*100)"; romePerfStat[ROMEPERF_INDEX_IRAM_4].desc = "test Uncached (1024*10)"; romePerfStat[ROMEPERF_INDEX_DRAM_1].desc = "DRAM Cached within DRAM"; romePerfStat[ROMEPERF_INDEX_DRAM_2].desc = "DRAM Uncached within DRAM"; romePerfStat[ROMEPERF_INDEX_DRAM_3].desc = "test DCACHE (1024*100)"; romePerfStat[ROMEPERF_INDEX_DRAM_4].desc = "test Uncached (1024*10)"; romePerfStat[ROMEPERF_INDEX_BMP].desc = "KMP Algorithm"; romePerfStat[ROMEPERF_INDEX_MDCMDIO].desc = "MDCMDIO PHY Register ACCESS"; #elif (PERF_DUMP_INIT_SELECT == PERF_DUMP_INIT_WLAN_TRX) romePerfStat[ROMEPERF_INDEX_TX_PREWORK].desc = "XMIT_PREWORK"; romePerfStat[ROMEPERF_INDEX_TX_XMIT_OUT].desc = "XMIT_OUT"; romePerfStat[ROMEPERF_INDEX_TX_XMIT_OUT_2].desc = "XMIT_OUT_2"; romePerfStat[ROMEPERF_INDEX_RX_ONE_PKT].desc = "RX_ONE_PKT"; romePerfStat[ROMEPERF_INDEX_RX_ONE_PKT_2].desc = "RX_ONE_PKT_2"; romePerfStat[ROMEPERF_INDEX_TX_START_XMIT].desc = "XMT_1"; romePerfStat[ROMEPERF_INDEX_TX_START_XMIT_2].desc = "XMIT_2"; romePerfStat[ROMEPERF_INDEX_TX_START_XMIT_3].desc = "XMIT_3"; romePerfStat[ROMEPERF_INDEX_TX_START_XMIT_4].desc = "XMIT_4"; romePerfStat[ROMEPERF_INDEX_TX_START_XMIT_5].desc = "XMIT_5"; #else #error "PERF_DUMP_INIT_SELECT flag error" #endif return SUCCESS; } int32 rtl8651_romeperfReset() { rtl8651_romeperfInit(); return SUCCESS; } #if 0/* old fashion function, for reference only. */ int32 rtl8651_romeperfStart() { if ( rtl8651_romeperf_inited == FALSE ) rtl8651_romeperfInit(); START_AND_GET_CP3_COUNTER0( cnt1 ); return SUCCESS; } int32 rtl8651_romeperfStop( uint64 *pDiff ) { if ( rtl8651_romeperf_inited == FALSE ) rtl8651_romeperfInit(); STOP_AND_GET_CP3_COUNTER0( cnt2 ); *pDiff = cnt2 - cnt1; return SUCCESS; } #endif int32 rtl8651_romeperfGet( uint64 *pGet ) { if ( rtl8651_romeperf_inited == FALSE ) return FAILED; /* Louis patch: someone will disable CP3 in somewhere. */ CP3_COUNTER0_INIT(); CP3_COUNTER0_STOP(); *pGet = CP3_COUNTER0_GET(); CP3_COUNTER0_START(); return SUCCESS; } int32 rtl8651_romeperfPause( void ) { if ( rtl8651_romeperf_inited == FALSE ) return FAILED; rtl8651_romeperf_enable = FALSE; /* Louis patch: someone will disable CP3 in somewhere. */ CP3_COUNTER0_INIT(); CP3_COUNTER0_STOP(); return SUCCESS; } int32 rtl8651_romeperfResume( void ) { if ( rtl8651_romeperf_inited == FALSE ) return FAILED; rtl8651_romeperf_enable = TRUE; /* Louis patch: someone will disable CP3 in somewhere. */ CP3_COUNTER0_INIT(); CP3_COUNTER0_START(); return SUCCESS; } #ifdef PERF_DUMP_CP3_DUAL_COUNTER_EN __IRAM int32 rtl_romeperfEnterPoint_dual(uint32 index, int cnt_num,char *event) { unsigned char i,j,counter; /* Louis patch: someone will disable CP3 in somewhere. */ CP3_COUNTER0_INIT(); CP3_COUNTER0_STOP(); if ( (rtl8651_romeperf_inited == FALSE) || (rtl8651_romeperf_enable == FALSE) ) { //CP3_COUNTER0_START(); return FAILED; } if ( index >= (sizeof(romePerfStat)/sizeof(rtl8651_romeperf_stat_t)) ) { printk("CP3 index error! \n"); CP3_COUNTER0_START(); return FAILED; } if(cnt_num + numOfUsedCounter > MAX_CP3_COUNTER) { printk("Index=[%x]CP3 counter not enough, request:%x, availableCnt:%x \n",index,cnt_num,(MAX_CP3_COUNTER-numOfUsedCounter)); CP3_COUNTER0_START(); return FAILED; } if(TRUE == romePerfStat[index].bUsed) { printk("CP3 Error : reue the index=%x \n",index); CP3_COUNTER0_START(); return FAILED; } // check reCall enter function, but not exit if((countTemp!=0xff)&(countTemp != index)) { romePerfStat[index].reEnterIdx = countTemp; } countTemp = index; romePerfStat[index].numOfCount = cnt_num; //printk("ENTER romePerfStat[%x].startCounter =%x \n",index,romePerfStat[index].startCounter); for(i=0; i= (sizeof(romePerfStat)/sizeof(rtl8651_romeperf_stat_t)) ) { printk("CP3 index error! \n"); CP3_COUNTER0_START(); return FAILED; } if ( romePerfStat[index].hasTempCycle == FALSE ) { printk("CP3 EXIT error! romePerfStat[%x].hasTempCycle == FALSE \n",index); CP3_COUNTER0_START(); return FAILED; } if(countTemp == index) { countTemp = 0xff; } CP3_COUNTER0_GET_ALL(); for(i=0; i< romePerfStat[index].numOfCount; i++) { counter = romePerfStat[index].Counter[i]; //printk("LEAVE counter=[%x] cnt_index =%x romePerfStat[index].numOfCount =%x \n",counter,index,romePerfStat[index].numOfCount); if (counter <4) { romePerfStat[index].accCycle[i] += (currCnt[counter] & 0x00ffffff); } else { // conter > 4 romePerfStat[index].accCycle[i] += ((currCnt[counter-4]>>32) & 0x00ffffff); } // counter available bCounterUsed[counter] = FALSE; numOfUsedCounter--; } // printk("LEAVE numOfUsedCounter =%x \n",numOfUsedCounter); romePerfStat[index].bUsed = FALSE; romePerfStat[index].hasTempCycle = FALSE; romePerfStat[index].executedNum++; //CP3_COUNTER0_RESET_DUAL(cnt_num); CP3_COUNTER0_START(); return SUCCESS; } int getAvailableCnt(void) { int i; for(i=0;i= CP3_CNT_MAX) { printk("Event over max = %x \n",event); return FAILED; } if(counter < 4) { tempVariable32 = (tempVariable32 & (~(0xff << (counter*8)))) | ( event << (counter*8)); } else { tempVariable32_2 = (tempVariable32_2 & (~(0xff << ((counter-4)*8)))) | ( event << ((counter-4)*8)); } } #else __IRAM int32 rtl8651_romeperfEnterPoint( uint32 index ) { if ( rtl8651_romeperf_inited == FALSE || rtl8651_romeperf_enable == FALSE ) return FAILED; if ( index >= (sizeof(romePerfStat)/sizeof(rtl8651_romeperf_stat_t)) ) return FAILED; if((countTemp!=0xff)&(countTemp != index)) { romePerfStat[index].reEnterIdx = countTemp; } countTemp = index; /* Louis patch: someone will disable CP3 in somewhere. */ CP3_COUNTER0_INIT(); CP3_COUNTER0_STOP(); CP3_COUNTER0_RESET(); CP3_COUNTER0_GET_ALL(); romePerfStat[index].tempCycle[0] = currCnt[0]; romePerfStat[index].tempCycle[1] = currCnt[1]; romePerfStat[index].tempCycle[2] = currCnt[2]; romePerfStat[index].tempCycle[3] = currCnt[3]; romePerfStat[index].hasTempCycle = TRUE; CP3_COUNTER0_START(); return SUCCESS; } __IRAM int32 rtl8651_romeperfExitPoint( uint32 index ) { if ( rtl8651_romeperf_inited == FALSE || rtl8651_romeperf_enable == FALSE ) return FAILED; if ( index >= (sizeof(romePerfStat)/sizeof(rtl8651_romeperf_stat_t)) ) return FAILED; if ( romePerfStat[index].hasTempCycle == FALSE ) return FAILED; if((countTemp == index)||(countTemp == 0)) { countTemp = 0; } /* Louis patch: someone will disable CP3 in somewhere. */ CP3_COUNTER0_INIT(); CP3_COUNTER0_STOP(); CP3_COUNTER0_GET_ALL(); romePerfStat[index].accCycle[0] += (currCnt[0] - romePerfStat[index].tempCycle[0]); romePerfStat[index].accCycle[1] += (currCnt[1] - romePerfStat[index].tempCycle[1]); romePerfStat[index].accCycle[2] += (currCnt[2] - romePerfStat[index].tempCycle[2]); romePerfStat[index].accCycle[3] += (currCnt[3] - romePerfStat[index].tempCycle[3]); romePerfStat[index].hasTempCycle = FALSE; romePerfStat[index].executedNum++; CP3_COUNTER0_RESET(); return SUCCESS; } #endif //#ifdef PERF_DUMP_CP3_DUAL_COUNTER_EN int32 rtl8651_romeperfDump( int start, int end ) { #if 0 int i; rtlglue_printf( "index %30s %12s %8s %10s\n", "description", "accCycle", "totalNum", "Average" ); for( i = start; i <= end; i++ ) { if ( romePerfStat[i].executedNum == 0 ) { rtlglue_printf( "[%3d] %30s %12s %8s %10s\n", i, romePerfStat[i].desc, "--", "--", "--" ); } else { int j; rtlglue_printf( "[%3d] %30s ", i, romePerfStat[i].desc ); for( j =0; j < sizeof(romePerfStat[i].accCycle)/sizeof(romePerfStat[i].accCycle[0]); j++ ) { uint32 *pAccCycle = (uint32*)&romePerfStat[i].accCycle[j]; uint32 avrgCycle = /* Hi-word */ (pAccCycle[0]*(0xffffffff/romePerfStat[i].executedNum)) + /* Low-word */(pAccCycle[1]/romePerfStat[i].executedNum); rtlglue_printf( "%12llu %8u %10u\n", romePerfStat[i].accCycle[j], romePerfStat[i].executedNum, avrgCycle ); rtlglue_printf( " %3s %30s ", "", "" ); } rtlglue_printf( "\r" ); } } return SUCCESS; #else int i,bReturn; rtl8651_romeperf_stat_t* statSnapShot = rtlglue_malloc(sizeof(rtl8651_romeperf_stat_t) * (end - start + 1) ); if( statSnapShot == NULL ) { rtlglue_printf("statSnapShot mem alloc failed\n"); return FAILED; } rtlglue_printf( "index %30s %30s %12s %8s %10s %8s\n", "description","Event","accCycle", "totalNum", "Average","reENTERIdx" ); for( i = start; i <= end; i++ ) { int j; for( j =0; j < sizeof(romePerfStat[i].accCycle)/sizeof(romePerfStat[i].accCycle[0]); j++ ) { statSnapShot[i].accCycle[j] = romePerfStat[i].accCycle[j]; statSnapShot[i].tempCycle[j] = romePerfStat[i].tempCycle[j]; } statSnapShot[i].executedNum = romePerfStat[i].executedNum; statSnapShot[i].hasTempCycle = romePerfStat[i].hasTempCycle; } for( i = start; i < end; i++ ) { if ( statSnapShot[i].executedNum == 0 ) { rtlglue_printf( "[%3d] %30s %30s %12s %8s %10s %8s\n", i, romePerfStat[i].desc, "--", "--", "--", "--", "--","--" ); } else { int j; rtlglue_printf( "[%3d] %30s ", i, romePerfStat[i].desc ); #ifdef PERF_DUMP_CP3_DUAL_COUNTER_EN for( j =0; j < romePerfStat[i].numOfCount; j++ ) #else for( j =0; j < sizeof(statSnapShot[i].accCycle)/sizeof(statSnapShot[i].accCycle[0]); j++ ) #endif //#ifdef PERF_DUMP_CP3_DUAL_COUNTER_EN { uint32 *pAccCycle = (uint32*)&statSnapShot[i].accCycle[j]; uint32 avrgCycle = /* Hi-word */ (pAccCycle[0]*(0xffffffff/statSnapShot[i].executedNum)) + /* Low-word */(pAccCycle[1]/statSnapShot[i].executedNum); rtlglue_printf( "%30s %12llu %8u %10u %8x\n", #ifdef PERF_DUMP_CP3_DUAL_COUNTER_EN mapping[romePerfStat[i].Event[j]], #else mapping[getEventIndex(j)], #endif //#ifdef PERF_DUMP_CP3_DUAL_COUNTER_EN statSnapShot[i].accCycle[j], statSnapShot[i].executedNum, avrgCycle, romePerfStat[i].reEnterIdx); rtlglue_printf( " %3s %30s ", "", "" ); } rtlglue_printf( "\r" ); } } rtlglue_free(statSnapShot); return SUCCESS; #endif } //////////////////////////////////////////////////////////////////////////////// int getEventIndex(int i) { int index; #if (PERF_DUMP_CP3_SELECT == PERF_DUMP_CP3_OLD) index = (tempVariable32 >> (i*8))&0xf; #else #ifdef PERF_DUMP_CP3_DUAL_COUNTER_EN if(i < 4) { index = (tempVariable32 >> (i*8))&0xff; } else { index = (tempVariable32_2 >> ((i-4)*8))&0xff; } #else index = (tempVariable32 >> (i*8))&0xff; #endif // PERF_DUMP_CP3_DUAL_COUNTER_EN #endif //(PERF_DUMP_CP3_SELECT == PERF_DUMP_CP3_OLD) return index; } #endif // PERF_DUMP