/*---------------------------------------------------------------------------* Copyright (C) Nintendo. All rights reserved. These coded instructions, statements, and computer programs contain proprietary information of Nintendo of America Inc. and/or Nintendo Company Ltd., and are protected by Federal copyright law. They may not be disclosed to third parties or copied or duplicated in any form, in whole or in part, without the prior written consent of Nintendo. *---------------------------------------------------------------------------*/ // gx2Perf.h // // Declares GPU performance APIs for gx2 library. #ifndef _CAFE_GX2_PERF_H_ #define _CAFE_GX2_PERF_H_ #ifdef __cplusplus extern "C" { #endif /// @addtogroup GX2DeprecatedGroup /// @{ /// At this offset is the starting 8-samples of the pipeline stats. #define GX2_PIPELINE_DATA_START_OFFSET GX2_COUNTER_PIPELINE /// At this offset is the end final 8-samples of the pipeline stats. #define GX2_PIPELINE_DATA_END_OFFSET GX2_COUNTER_PIPELINE + GX2_NUM_COUNTER_PIPELINE #define GX2_RESULT_SIZE GX2_PIPELINE_DATA_END_OFFSET + GX2_NUM_COUNTER_PIPELINE /// \brief Data container for all low-level performance counter settings. /// /// \deprecated Please use the Perf APIs described in /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs" /// /// \note This structure must be allocated in GPU memory. /// typedef struct __GX2CounterInfo { /// Counter results are written to this buffer /// From 0 to GX2_COUNTER_LAST-1 is "normal" 64-bit counter data. /// From GX2_PIPELINE_DATA_START_OFFSET (aka GX2_COUNTER_LAST) to GX2_PIPELINE_DATA_START_OFFSET+GX2_NUM_COUNTER_PIPELINE-1 are the pipeline stat start values. /// From GX2_PIPELINE_DATA_END_OFFSET to GX2_PIPELINE_DATA_END_OFFSET+GX2_NUM_COUNTER_PIPELINE-1 are the pipeline stat end values. u64 results[GX2_RESULT_SIZE]; /// The CPU updated data below needs to be 64 bytes away from the GPU written data above. This is so /// any CPU updates and cache line flushes don't overwrite data updated by the GPU. A cache line is 64 bytes. u8 padding[64]; /// Flag if each counter is enabled or not GX2Boolean enabled[GX2_COUNTER_LAST+1]; /// What statistic is being counted by each hardware counter? _GX2StatId stats[GX2_COUNTER_LAST+1]; /// In unified shader architecture, each SIMD can change to /// each type of shader (vertex, pixel, geometry, etc.) /// This controls what shader type to count for SQ counters. _GX2SqType sqType[GX2_NUM_COUNTER_SQ]; } _GX2CounterInfo; /// \brief Data structure to store high-level performance counter data. /// /// \deprecated Please use the Perf APIs described in /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs" /// /// \note This structure must be allocated in GPU memory. Using the high-level counters will change the low-level /// counter registers and possibly corrupt anything you are trying to count simultaneously with the low-level counters, /// with the exception of GX2_COUNTER_PIPELINE. /// typedef struct _GX2PerfInfo { /// Container for counterInfo (low-level counter data) _GX2CounterInfo counterInfo; /// Number of CP (command processor) counters used u8 usedCpCount; /// Number of GRBM (gfx register bus mgr) counters used u8 usedGrbmCount; /// Number of PA-SU (prim assembler/setup) counters used u8 usedPaSuCount; /// Number of VGT (vtx grouper/tessellator) counters used u8 usedVgtCount; /// Number of SQ (sequencer) counters used u8 usedSqCount; /// Number of SPI (shader parameter interpolator) counters used u8 usedSpiCount; /// Number of SX (shader exporter) counters used u8 usedSxCount; /// Number of TA (texture addresser) counters used u8 usedTaCount; /// Number of TCP (texture cache per-pipe/L1) counters used u8 usedTcpCount; /// Number of TCC (texture cache per-channel/L2) counters used u8 usedTccCount; /// Number of DB (depth buffer) counters used u8 usedDbCount; /// Number of CB (color buffer) counters used u8 usedCbCount; /// Indicate which low-level counter index is used by the given high-level statistic u8 idxGrbmCount; u8 idxGrbmGuiActive; u8 idxGrbmShBusy; u8 idxPaSuClipBusy; u8 idxPaSuClprCullPrim; u8 idxPaSuSuZeroAreaCullPrim; u8 idxPaSuSuBackFaceCullPrim; u8 idxPaSuSuFrontFaceCullPrim; u8 idxPaSuSuPolyModeFaceCull; u8 idxPaSuPaInputPrim; u8 idxPaSuSuStalledSc; u8 idxVgtVgtPaClippSend; u8 idxVgtVgtPaClippIsEvent; u8 idxVgtReusedVsIndices; u8 idxVgtPaClippSend; u8 idxVgtPaClippIsEvent; u8 idxSqEsVsItemsPerType; u8 idxSqPsItemsPerType; u8 idxSqEsVsGsPsTaTexInstrsPerType; u8 idxSqEsVsTaTexInstrsPerType; u8 idxSqGsTaTexInstrsPerType; u8 idxSqPsTaTexInstrsPerType; u8 idxSqEsVsAluClauseInstrsPerType; u8 idxSqGsAluClauseInstrsPerType; u8 idxSqPsAluClauseInstrsPerType; u8 idxSqEsVsGsPsAluClauseInstrGroupsPerType; u8 idxSqEsVsAluClauseInstrGroupsPerType; u8 idxSqGsAluClauseInstrGroupsPerType; u8 idxSqPsAluClauseInstrGroupsPerType; u8 idxSqEsVsThreadLevelPerType; u8 idxSqGsThreadLevelPerType; u8 idxSqPsThreadLevelPerType; u8 idxSqEsVsGsPsThreadLevelPerType; u8 idxSqEsVsThreadsPerType; u8 idxSqGsThreadsPerType; u8 idxSqPsThreadsPerType; u8 idxSpiPctL0PiBusy; u8 idxSpiPctL1PiBusy; u8 idxSxDb0Pixels; u8 idxSxDb1Pixels; u8 idxSxDb0StallCycles; u8 idxSxDb1StallCycles; u8 idxTaAlignerCycles; u8 idxTcpTcpTaStallCycles; u8 idxTcpTcpTagconflictStallCycles; u8 idxTcpFmtV8Pixels; u8 idxTcpFmtV16Pixels; u8 idxTcpFmtV32Pixels; u8 idxTcpFmtV642Pixels; u8 idxTcpFmtV641Pixels; u8 idxTcpFmtV1284CyclePixels; u8 idxTcpFmtV1282CyclePixels; u8 idxTcpFmtV1281CyclePixels; u8 idxTcpTotalPixels; u8 idxTccReqsTcTfMiss; u8 idxDbOpPipeBusy; u8 idxDbDbScTileNoOps; u8 idxDbDbScTilePixelRate; u8 idxDbDbScTileFastOps; u8 idxDbDbScTileHierKill; u8 idxDbDbScQuadTiles; u8 idxDbPreZSamplesPassingZ; u8 idxDbPreZSamplesFailingS; u8 idxDbPreZSamplesFailingZ; u8 idxDbPostZSamplesPassingZ; u8 idxDbPostZSamplesFailingS; u8 idxDbPostZSamplesFailingZ; u8 idxDbScDbTileTiles; u8 idxDbDbCbLqiadStalls; u8 idxCbDrawnPixel; u8 idxCbCcMcWriteRequest; } GX2PerfInfo; /// \brief Low-Level: Reset all low-level performance counter settings /// /// \deprecated Please use the Perf APIs described in /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs" /// /// \param info Info structure to reset /// /// \donotcall \threadsafe \devonly \enddonotcall /// GX2_INLINE void _GX2ResetCounterInfo(_GX2CounterInfo* info) { u32 rsize = sizeof(u64)*(GX2_RESULT_SIZE); ASSERT(NULL != info); memset(info->results, 0xff, rsize); // write GX2_INVALID_COUNTER_VALUE_U64 memset(info->enabled, 0, sizeof(_GX2CounterInfo)-rsize); } /// \brief Low-Level: Enables a specific low-level counter /// /// \deprecated Please use the Perf APIs described in /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs" /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs" /// /// Call this multiple times to enable multiple low-level counters /// /// \note Using the high-level counters will change the low-level counter registers /// and possibly corrupt anything you are trying to count simultaneously with the low-level counters, /// with the exception of GX2_COUNTER_PIPELINE. /// /// \param info Structure to contain all low-level counter info /// \param id Which low-level hardware counter to enable /// \param parm What low-level parameter it should count /// /// \donotcall \threadsafe \devonly \enddonotcall /// void GX2API _GX2InitCounterInfo(_GX2CounterInfo *info, _GX2CounterId id, _GX2StatId parm); /// \brief Low-Level: Send completed low-level counter configuration to GPU. /// /// \deprecated Please use the Perf APIs described in /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs" /// /// \param info Structure containing all low-level counter info /// /// \note Make sure to call _GX2SampleCounters() to collect any previously counted values you care about before /// calling this function. Currently this function will reset counted results for the CB and SQ. /// /// \donotcall \gx2_typical \enddonotcall /// /// \writesgpu /// \alwayswritesgpu /// void GX2API _GX2SetCounterInfo(const _GX2CounterInfo *info); /// \brief Low-Level: Resets to zero all active low-level counters /// /// \deprecated Please use the Perf APIs described in /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs" /// /// \donotcall \gx2_typical \enddonotcall /// /// \writesgpu /// \alwayswritesgpu /// void GX2API _GX2ResetCounters(void); /// \brief Low-Level: Start (or restart) all active low-level counters counting /// /// \deprecated Please use the Perf APIs described in /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs" /// /// \donotcall \gx2_typical \enddonotcall /// /// \writesgpu /// \alwayswritesgpu /// void GX2API _GX2StartCounters(const _GX2CounterInfo *cinfo); /// \brief Low-Level: Stop (pause) all active low-level counters from counting /// /// \deprecated Please use the Perf APIs described in /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs" /// /// \donotcall \gx2_typical \enddonotcall /// /// \writesgpu /// \alwayswritesgpu /// void GX2API _GX2StopCounters(const _GX2CounterInfo *cinfo); /// \brief Low-Level: Tell GPU to write all active low-level counter values to memory /// /// \deprecated Please use the Perf APIs described in /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs" /// /// \param info Structure to contain all low-level counter info /// /// \donotcall \gx2_typical \enddonotcall /// /// \writesgpu /// \alwayswritesgpu /// void GX2API _GX2SampleCounters(_GX2CounterInfo *info); /// \brief Low-Level: Check if low-level counter data has been written by GPU /// /// \deprecated Please use the Perf APIs described in /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs" /// /// \param info Structure to contain all low-level counter info /// /// \note This method only works when the counters have been reset using /// \ref _GX2ResetCounterInfo. However, that also resets the counter selection. /// It is perhaps better to use a timestamp-based synchronization method. /// /// \donotcall \notthreadsafe \devonly \enddonotcall /// GX2_INLINE GX2Boolean _GX2GetCountersReady(const _GX2CounterInfo *info) { u32 i; GX2Invalidate(GX2_INVALIDATE_CPU, (void *)info, sizeof(_GX2CounterInfo)); for(i=0; ienabled[i]) if(GX2_INVALID_COUNTER_VALUE_U64 == info->results[i]) return GX2_FALSE; return GX2_TRUE; } /// \brief Reset all high-level GPU performance metrics. /// /// \deprecated Please use the Perf APIs described in /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs" /// /// \param perfinfo User-allocated structure for performance metric data. /// /// \donotcall \threadsafe \devonly \enddonotcall /// GX2_INLINE void GX2ResetPerfMetrics(GX2PerfInfo* perfinfo) { _GX2ResetCounterInfo(&perfinfo->counterInfo); memset(&perfinfo->usedCpCount, 0, sizeof(u8)*12); memset(&perfinfo->idxGrbmCount, 0xff, sizeof(GX2PerfInfo) - sizeof(u8)*12 - sizeof(_GX2CounterInfo)); } /// \brief Enable a specific high-level GPU performance metric. /// /// \deprecated Please use the Perf APIs described in /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs" /// /// \param perfinfo User-allocated structure for performance metric data. /// \param metric Desired high-level GPU performance metric to count /// /// \return GX2_TRUE if metric can be counted, GX2_FALSE if otherwise. /// /// You may enable multiple metrics at once. However, /// there are hardware restrictions on how many metrics can be sampled per pass. /// If this function returns false, it means that the desired combination /// of metrics cannot be gathered in a single pass; you must use multiple /// passes in order to sample that combination. /// /// \note Using the high-level counters will change the low-level counter registers /// and possibly corrupt anything you are trying to count simultaneously with the low-level counters, /// with the exception of GX2_COUNTER_PIPELINE. /// /// \donotcall \threadsafe \devonly \enddonotcall /// GX2Boolean GX2API GX2InitPerfMetric(GX2PerfInfo* perfinfo, GX2PerfMetric metric); /// \brief Starts all enabled high-level GPU performance counters. /// /// \deprecated Please use the Perf APIs described in /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs" /// /// \param perfinfo User-allocated structure for performance metric data. /// /// \donotcall \gx2_typical \enddonotcall /// /// \writesgpu /// \alwayswritesgpu /// GX2_INLINE void GX2BeginPerf(const GX2PerfInfo* perfinfo) { _GX2SetCounterInfo(&perfinfo->counterInfo); _GX2ResetCounters(); _GX2StartCounters(&perfinfo->counterInfo); } /// \brief Pause all enabled high-level GPU performance counters. /// /// \deprecated Please use the Perf APIs described in /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs" /// /// \donotcall \gx2_typical \enddonotcall /// /// \writesgpu /// \alwayswritesgpu /// GX2_INLINE void GX2PerfPause(const GX2PerfInfo* perfinfo) { _GX2StopCounters(&perfinfo->counterInfo); } /// \brief Resume all enabled high-level GPU performance counters. /// /// \deprecated Please use the Perf APIs described in /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs" /// /// \donotcall \gx2_typical \enddonotcall /// /// \writesgpu /// \alwayswritesgpu /// GX2_INLINE void GX2PerfPlay(const GX2PerfInfo* perfinfo) { _GX2StartCounters(&perfinfo->counterInfo); } /// \brief Stop and sample all enabled high-level GPU performance counters. /// /// \deprecated Please use the Perf APIs described in /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs" /// /// \param perfinfo User-allocated structure for performance metric data. /// /// \donotcall \gx2_typical \enddonotcall /// /// \writesgpu /// \alwayswritesgpu /// GX2_INLINE void GX2EndPerf(GX2PerfInfo* perfinfo) { _GX2StopCounters(&perfinfo->counterInfo); _GX2SampleCounters(&perfinfo->counterInfo); } /// \brief Check if performance counter data has been written by GPU yet. /// /// \deprecated Please use the Perf APIs described in /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs" /// /// \param perfinfo User-allocated structure for performance metric data. /// /// \note This method only works when the counters have been reset using /// \ref GX2ResetPerfMetrics. However, that also resets the counter selection. /// It is perhaps better to use a timestamp-based synchronization method. /// /// \donotcall \threadsafe \devonly \enddonotcall /// GX2_INLINE GX2Boolean GX2GetPerfMetricReady(const GX2PerfInfo *perfinfo) { return _GX2GetCountersReady(&perfinfo->counterInfo); } /// \brief Get the value of a high-level u64-type perf metric. /// /// \deprecated Please use the Perf APIs described in /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs" /// /// \param perfinfo User-allocated structure for performance metric data. /// \param metric Metric to be read. /// \param result u64 counter value. /// /// \donotcall \threadsafe \devonly \enddonotcall /// void GX2API GX2GetPerfMetricU64(const GX2PerfInfo *perfinfo, GX2PerfMetric metric, u64* result); /// \brief Get the value of a high-level f32-type perf metric. /// /// \deprecated Please use the Perf APIs described in /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs" /// /// \param perfinfo User-allocated structure for performance metric data. /// \param metric Metric to be read. /// \param result 32-bit floating point metric data, given as percentage. /// /// \donotcall \threadsafe \devonly \enddonotcall /// void GX2API GX2GetPerfMetricF32(const GX2PerfInfo *perfinfo, GX2PerfMetric metric, f32* result); /// @} #ifdef __cplusplus } #endif #endif /// __DEMO_PERF_H__