1 /*---------------------------------------------------------------------------*
2
3 Copyright (C) 2010-2011 Nintendo. All rights reserved.
4
5 These coded instructions, statements, and computer programs contain
6 proprietary information of Nintendo of America Inc. and/or Nintendo
7 Company Ltd., and are protected by Federal copyright law. They may
8 not be disclosed to third parties or copied or duplicated in any form,
9 in whole or in part, without the prior written consent of Nintendo.
10
11 *---------------------------------------------------------------------------*/
12
13 // gx2Perf.h
14 //
15 // Declares GPU performance APIs for gx2 library.
16
17
18 #ifndef _CAFE_GX2_PERF_H_
19 #define _CAFE_GX2_PERF_H_
20
21 #ifdef __cplusplus
22 extern "C" {
23 #endif
24
25 /// @addtogroup GX2DeprecatedGroup
26 /// @{
27
28 /// At this offset is the starting 8-samples of the pipeline stats.
29 #define GX2_PIPELINE_DATA_START_OFFSET GX2_COUNTER_PIPELINE
30
31 /// At this offset is the end final 8-samples of the pipeline stats.
32 #define GX2_PIPELINE_DATA_END_OFFSET GX2_COUNTER_PIPELINE + GX2_NUM_COUNTER_PIPELINE
33
34 #define GX2_RESULT_SIZE GX2_PIPELINE_DATA_END_OFFSET + GX2_NUM_COUNTER_PIPELINE
35
36 /// \brief Data container for all low-level performance counter settings.
37 ///
38 /// \deprecated Please use the Perf APIs described in
39 /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs"
40 ///
41 /// \note This structure must be allocated in GPU memory.
42 ///
43 typedef struct __GX2CounterInfo
44 {
45 /// Counter results are written to this buffer
46 /// From 0 to GX2_COUNTER_LAST-1 is "normal" 64-bit counter data.
47 /// From GX2_PIPELINE_DATA_START_OFFSET (aka GX2_COUNTER_LAST) to GX2_PIPELINE_DATA_START_OFFSET+GX2_NUM_COUNTER_PIPELINE-1 are the pipeline stat start values.
48 /// From GX2_PIPELINE_DATA_END_OFFSET to GX2_PIPELINE_DATA_END_OFFSET+GX2_NUM_COUNTER_PIPELINE-1 are the pipeline stat end values.
49 u64 results[GX2_RESULT_SIZE];
50
51 /// The CPU updated data below needs to be 64 bytes away from the GPU written data above. This is so
52 /// any CPU updates and cache line flushes don't overwrite data updated by the GPU. A cache line is 64 bytes.
53 u8 padding[64];
54
55 /// Flag if each counter is enabled or not
56 GX2Boolean enabled[GX2_COUNTER_LAST+1];
57 /// What statistic is being counted by each hardware counter?
58 _GX2StatId stats[GX2_COUNTER_LAST+1];
59 /// In unified shader architecture, each SIMD can change to
60 /// each type of shader (vertex, pixel, geometry, etc.)
61 /// This controls what shader type to count for SQ counters.
62 _GX2SqType sqType[GX2_NUM_COUNTER_SQ];
63 } _GX2CounterInfo;
64
65 /// \brief Data stucture to store high-level performance counter data.
66 ///
67 /// \deprecated Please use the Perf APIs described in
68 /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs"
69 ///
70 /// \note This structure must be allocated in GPU memory. Using the high-level counters will change the low-level
71 /// counter registers and possibly corrupt anything you are trying to count simultaneously with the low-level counters,
72 /// with the exception of GX2_COUNTER_PIPELINE.
73 ///
74 typedef struct _GX2PerfInfo
75 {
76 /// Container for counterInfo (low-level counter data)
77 _GX2CounterInfo counterInfo;
78
79 /// Number of CP (command processor) counters used
80 u8 usedCpCount;
81 /// Number of GRBM (gfx register bus mgr) counters used
82 u8 usedGrbmCount;
83 /// Number of PA-SU (prim assembler/setup) counters used
84 u8 usedPaSuCount;
85 /// Number of VGT (vtx grouper/tessellator) counters used
86 u8 usedVgtCount;
87 /// Number of SQ (sequencer) counters used
88 u8 usedSqCount;
89 /// Number of SPI (shader parameter interpolator) counters used
90 u8 usedSpiCount;
91 /// Number of SX (shader exporter) counters used
92 u8 usedSxCount;
93 /// Number of TA (texture addresser) counters used
94 u8 usedTaCount;
95 /// Number of TCP (texture cache per-pipe/L1) counters used
96 u8 usedTcpCount;
97 /// Number of TCC (texture cache per-channel/L2) counters used
98 u8 usedTccCount;
99 /// Number of DB (depth buffer) counters used
100 u8 usedDbCount;
101 /// Number of CB (color buffer) counters used
102 u8 usedCbCount;
103
104 /// Indicate which low-level counter index is used by the given high-level statistic
105 u8 idxGrbmCount;
106 u8 idxGrbmGuiActive;
107 u8 idxGrbmShBusy;
108
109 u8 idxPaSuClipBusy;
110 u8 idxPaSuClprCullPrim;
111 u8 idxPaSuSuZeroAreaCullPrim;
112 u8 idxPaSuSuBackFaceCullPrim;
113 u8 idxPaSuSuFrontFaceCullPrim;
114 u8 idxPaSuSuPolyModeFaceCull;
115 u8 idxPaSuPaInputPrim;
116 u8 idxPaSuSuStalledSc;
117
118 u8 idxVgtVgtPaClippSend;
119 u8 idxVgtVgtPaClippIsEvent;
120 u8 idxVgtReusedVsIndices;
121 u8 idxVgtPaClippSend;
122 u8 idxVgtPaClippIsEvent;
123
124 u8 idxSqEsVsItemsPerType;
125 u8 idxSqPsItemsPerType;
126 u8 idxSqEsVsGsPsTaTexInstrsPerType;
127 u8 idxSqEsVsTaTexInstrsPerType;
128 u8 idxSqGsTaTexInstrsPerType;
129 u8 idxSqPsTaTexInstrsPerType;
130 u8 idxSqEsVsAluClauseInstrsPerType;
131 u8 idxSqGsAluClauseInstrsPerType;
132 u8 idxSqPsAluClauseInstrsPerType;
133 u8 idxSqEsVsGsPsAluClauseInstrGroupsPerType;
134 u8 idxSqEsVsAluClauseInstrGroupsPerType;
135 u8 idxSqGsAluClauseInstrGroupsPerType;
136 u8 idxSqPsAluClauseInstrGroupsPerType;
137 u8 idxSqEsVsThreadLevelPerType;
138 u8 idxSqGsThreadLevelPerType;
139 u8 idxSqPsThreadLevelPerType;
140 u8 idxSqEsVsGsPsThreadLevelPerType;
141 u8 idxSqEsVsThreadsPerType;
142 u8 idxSqGsThreadsPerType;
143 u8 idxSqPsThreadsPerType;
144
145 u8 idxSpiPctL0PiBusy;
146 u8 idxSpiPctL1PiBusy;
147
148 u8 idxSxDb0Pixels;
149 u8 idxSxDb1Pixels;
150 u8 idxSxDb0StallCycles;
151 u8 idxSxDb1StallCycles;
152
153 u8 idxTaAlignerCycles;
154
155 u8 idxTcpTcpTaStallCycles;
156 u8 idxTcpTcpTagconflictStallCycles;
157 u8 idxTcpFmtV8Pixels;
158 u8 idxTcpFmtV16Pixels;
159 u8 idxTcpFmtV32Pixels;
160 u8 idxTcpFmtV642Pixels;
161 u8 idxTcpFmtV641Pixels;
162 u8 idxTcpFmtV1284CyclePixels;
163 u8 idxTcpFmtV1282CyclePixels;
164 u8 idxTcpFmtV1281CyclePixels;
165 u8 idxTcpTotalPixels;
166
167 u8 idxTccReqsTcTfMiss;
168
169 u8 idxDbOpPipeBusy;
170 u8 idxDbDbScTileNoOps;
171 u8 idxDbDbScTilePixelRate;
172 u8 idxDbDbScTileFastOps;
173 u8 idxDbDbScTileHierKill;
174 u8 idxDbDbScQuadTiles;
175 u8 idxDbPreZSamplesPassingZ;
176 u8 idxDbPreZSamplesFailingS;
177 u8 idxDbPreZSamplesFailingZ;
178 u8 idxDbPostZSamplesPassingZ;
179 u8 idxDbPostZSamplesFailingS;
180 u8 idxDbPostZSamplesFailingZ;
181 u8 idxDbScDbTileTiles;
182 u8 idxDbDbCbLqiadStalls;
183
184 u8 idxCbDrawnPixel;
185 u8 idxCbCcMcWriteRequest;
186 } GX2PerfInfo;
187
188 /// \brief Low-Level: Reset all low-level performance counter settings
189 ///
190 /// \deprecated Please use the Perf APIs described in
191 /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs"
192 ///
193 /// \param info Info structure to reset
194 ///
195 /// \donotcall \threadsafe \devonly \enddonotcall
196 ///
_GX2ResetCounterInfo(_GX2CounterInfo * info)197 GX2_INLINE void _GX2ResetCounterInfo(_GX2CounterInfo* info)
198 {
199 u32 rsize = sizeof(u64)*(GX2_RESULT_SIZE);
200 ASSERT(NULL != info);
201 memset(info->results, 0xff, rsize); // write GX2_INVALID_COUNTER_VALUE_U64
202 memset(info->enabled, 0, sizeof(_GX2CounterInfo)-rsize);
203 }
204
205 /// \brief Low-Level: Enables a specific low-level counter
206 ///
207 /// \deprecated Please use the Perf APIs described in
208 /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs"
209 /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs"
210 ///
211 /// Call this multiple times to enable multiple low-level counters
212 ///
213 /// \note Using the high-level counters will change the low-level counter registers
214 /// and possibly corrupt anything you are trying to count simultaneously with the low-level counters,
215 /// with the exception of GX2_COUNTER_PIPELINE.
216 ///
217 /// \param info Structure to contain all low-level counter info
218 /// \param id Which low-level hardware counter to enable
219 /// \param parm What low-level parameter it should count
220 ///
221 /// \donotcall \threadsafe \devonly \enddonotcall
222 ///
223 void GX2API _GX2InitCounterInfo(_GX2CounterInfo *info, _GX2CounterId id,
224 _GX2StatId parm);
225
226 /// \brief Low-Level: Send completed low-level counter configuration to GPU.
227 ///
228 /// \deprecated Please use the Perf APIs described in
229 /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs"
230 ///
231 /// \param info Structure containing all low-level counter info
232 ///
233 /// \note Make sure to call _GX2SampleCounters() to collect any previously counted values you care about before
234 /// calling this function. Currently this function will reset counted results for the CB and SQ.
235 ///
236 /// \donotcall \gx2_typical \enddonotcall
237 ///
238 /// \writesgpu
239 /// \alwayswritesgpu
240 ///
241 void GX2API _GX2SetCounterInfo(const _GX2CounterInfo *info);
242
243 /// \brief Low-Level: Resets to zero all active low-level counters
244 ///
245 /// \deprecated Please use the Perf APIs described in
246 /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs"
247 ///
248 /// \donotcall \gx2_typical \enddonotcall
249 ///
250 /// \writesgpu
251 /// \alwayswritesgpu
252 ///
253 void GX2API _GX2ResetCounters(void);
254
255 /// \brief Low-Level: Start (or restart) all active low-level counters counting
256 ///
257 /// \deprecated Please use the Perf APIs described in
258 /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs"
259 ///
260 /// \donotcall \gx2_typical \enddonotcall
261 ///
262 /// \writesgpu
263 /// \alwayswritesgpu
264 ///
265 void GX2API _GX2StartCounters(const _GX2CounterInfo *cinfo);
266
267 /// \brief Low-Level: Stop (pause) all active low-level counters from counting
268 ///
269 /// \deprecated Please use the Perf APIs described in
270 /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs"
271 ///
272 /// \donotcall \gx2_typical \enddonotcall
273 ///
274 /// \writesgpu
275 /// \alwayswritesgpu
276 ///
277 void GX2API _GX2StopCounters(const _GX2CounterInfo *cinfo);
278
279 /// \brief Low-Level: Tell GPU to write all active low-level counter values to memory
280 ///
281 /// \deprecated Please use the Perf APIs described in
282 /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs"
283 ///
284 /// \param info Structure to contain all low-level counter info
285 ///
286 /// \donotcall \gx2_typical \enddonotcall
287 ///
288 /// \writesgpu
289 /// \alwayswritesgpu
290 ///
291 void GX2API _GX2SampleCounters(_GX2CounterInfo *info);
292
293 /// \brief Low-Level: Check if low-level counter data has been written by GPU
294 ///
295 /// \deprecated Please use the Perf APIs described in
296 /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs"
297 ///
298 /// \param info Structure to contain all low-level counter info
299 ///
300 /// \note This method only works when the counters have been reset using
301 /// \ref _GX2ResetCounterInfo. However, that also resets the counter selection.
302 /// It is perhaps better to use a timestamp-based synchronization method.
303 ///
304 /// \donotcall \notthreadsafe \devonly \enddonotcall
305 ///
_GX2GetCountersReady(const _GX2CounterInfo * info)306 GX2_INLINE GX2Boolean _GX2GetCountersReady(const _GX2CounterInfo *info)
307 {
308 u32 i;
309 GX2Invalidate(GX2_INVALIDATE_CPU, (void *)info, sizeof(_GX2CounterInfo));
310 for(i=0; i<GX2_COUNTER_LAST+1; i++)
311 if(GX2_TRUE == info->enabled[i])
312 if(GX2_INVALID_COUNTER_VALUE_U64 == info->results[i])
313 return GX2_FALSE;
314 return GX2_TRUE;
315 }
316
317 /// \brief Reset all high-level GPU performance metrics.
318 ///
319 /// \deprecated Please use the Perf APIs described in
320 /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs"
321 ///
322 /// \param perfinfo User-allocated structure for performance metric data.
323 ///
324 /// \donotcall \threadsafe \devonly \enddonotcall
325 ///
GX2ResetPerfMetrics(GX2PerfInfo * perfinfo)326 GX2_INLINE void GX2ResetPerfMetrics(GX2PerfInfo* perfinfo)
327 {
328 _GX2ResetCounterInfo(&perfinfo->counterInfo);
329 memset(&perfinfo->usedCpCount, 0, sizeof(u8)*12);
330 memset(&perfinfo->idxGrbmCount, 0xff, sizeof(GX2PerfInfo) - sizeof(u8)*12 - sizeof(_GX2CounterInfo));
331 }
332
333 /// \brief Enable a specific high-level GPU performance metric.
334 ///
335 /// \deprecated Please use the Perf APIs described in
336 /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs"
337 ///
338 /// \param perfinfo User-allocated structure for performance metric data.
339 /// \param metric Desired high-level GPU performance metric to count
340 ///
341 /// \return GX2_TRUE if metric can be counted, GX2_FALSE if otherwise.
342 ///
343 /// You may enable multiple metrics at once. However,
344 /// there are hardware restrictions on how many metrics can be sampled per pass.
345 /// If this function returns false, it means that the desired combination
346 /// of metrics cannot be gathered in a single pass; you must use multiple
347 /// passes in order to sample that combination.
348 ///
349 /// \note Using the high-level counters will change the low-level counter registers
350 /// and possibly corrupt anything you are trying to count simultaneously with the low-level counters,
351 /// with the exception of GX2_COUNTER_PIPELINE.
352 ///
353 /// \donotcall \threadsafe \devonly \enddonotcall
354 ///
355 GX2Boolean GX2API GX2InitPerfMetric(GX2PerfInfo* perfinfo,
356 GX2PerfMetric metric);
357
358 /// \brief Starts all enabled high-level GPU performance counters.
359 ///
360 /// \deprecated Please use the Perf APIs described in
361 /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs"
362 ///
363 /// \param perfinfo User-allocated structure for performance metric data.
364 ///
365 /// \donotcall \gx2_typical \enddonotcall
366 ///
367 /// \writesgpu
368 /// \alwayswritesgpu
369 ///
GX2BeginPerf(const GX2PerfInfo * perfinfo)370 GX2_INLINE void GX2BeginPerf(const GX2PerfInfo* perfinfo)
371 {
372 _GX2SetCounterInfo(&perfinfo->counterInfo);
373 _GX2ResetCounters();
374 _GX2StartCounters(&perfinfo->counterInfo);
375 }
376
377 /// \brief Pause all enabled high-level GPU performance counters.
378 ///
379 /// \deprecated Please use the Perf APIs described in
380 /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs"
381 ///
382 /// \donotcall \gx2_typical \enddonotcall
383 ///
384 /// \writesgpu
385 /// \alwayswritesgpu
386 ///
GX2PerfPause(const GX2PerfInfo * perfinfo)387 GX2_INLINE void GX2PerfPause(const GX2PerfInfo* perfinfo)
388 {
389 _GX2StopCounters(&perfinfo->counterInfo);
390 }
391
392 /// \brief Resume all enabled high-level GPU performance counters.
393 ///
394 /// \deprecated Please use the Perf APIs described in
395 /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs"
396 ///
397 /// \donotcall \gx2_typical \enddonotcall
398 ///
399 /// \writesgpu
400 /// \alwayswritesgpu
401 ///
GX2PerfPlay(const GX2PerfInfo * perfinfo)402 GX2_INLINE void GX2PerfPlay(const GX2PerfInfo* perfinfo)
403 {
404 _GX2StartCounters(&perfinfo->counterInfo);
405 }
406
407 /// \brief Stop and sample all enabled high-level GPU performance counters.
408 ///
409 /// \deprecated Please use the Perf APIs described in
410 /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs"
411 ///
412 /// \param perfinfo User-allocated structure for performance metric data.
413 ///
414 /// \donotcall \gx2_typical \enddonotcall
415 ///
416 /// \writesgpu
417 /// \alwayswritesgpu
418 ///
GX2EndPerf(GX2PerfInfo * perfinfo)419 GX2_INLINE void GX2EndPerf(GX2PerfInfo* perfinfo)
420 {
421 _GX2StopCounters(&perfinfo->counterInfo);
422 _GX2SampleCounters(&perfinfo->counterInfo);
423 }
424
425 /// \brief Check if performance counter data has been written by GPU yet.
426 ///
427 /// \deprecated Please use the Perf APIs described in
428 /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs"
429 ///
430 /// \param perfinfo User-allocated structure for performance metric data.
431 ///
432 /// \note This method only works when the counters have been reset using
433 /// \ref GX2ResetPerfMetrics. However, that also resets the counter selection.
434 /// It is perhaps better to use a timestamp-based synchronization method.
435 ///
436 /// \donotcall \threadsafe \devonly \enddonotcall
437 ///
GX2GetPerfMetricReady(const GX2PerfInfo * perfinfo)438 GX2_INLINE GX2Boolean GX2GetPerfMetricReady(const GX2PerfInfo *perfinfo)
439 {
440 return _GX2GetCountersReady(&perfinfo->counterInfo);
441 }
442
443 /// \brief Get the value of a high-level u64-type perf metric.
444 ///
445 /// \deprecated Please use the Perf APIs described in
446 /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs"
447 ///
448 /// \param perfinfo User-allocated structure for performance metric data.
449 /// \param metric Metric to be read.
450 /// \param result u64 counter value.
451 ///
452 /// \donotcall \threadsafe \devonly \enddonotcall
453 ///
454 void GX2API GX2GetPerfMetricU64(const GX2PerfInfo *perfinfo,
455 GX2PerfMetric metric, u64* result);
456
457 /// \brief Get the value of a high-level f32-type perf metric.
458 ///
459 /// \deprecated Please use the Perf APIs described in
460 /// \ref GX2PerfCounterPage "GX2 Perf Counter APIs"
461 ///
462 /// \param perfinfo User-allocated structure for performance metric data.
463 /// \param metric Metric to be read.
464 /// \param result 32-bit floating point metric data, given as percentage.
465 ///
466 /// \donotcall \threadsafe \devonly \enddonotcall
467 ///
468 void GX2API GX2GetPerfMetricF32(const GX2PerfInfo *perfinfo,
469 GX2PerfMetric metric, f32* result);
470
471 /// @}
472
473 #ifdef __cplusplus
474 }
475 #endif
476
477 #endif /// __DEMO_PERF_H__
478