1 /*---------------------------------------------------------------------------*
2
3 Copyright (C) Nintendo. All rights reserved.
4
5 These coded instructions, statements, and computer programs contain
6 proprietary information of Nintendo of America Inc. and/or Nintendo
7 Company Ltd., and are protected by Federal copyright law. They may
8 not be disclosed to third parties or copied or duplicated in any form,
9 in whole or in part, without the prior written consent of Nintendo.
10
11 *---------------------------------------------------------------------------*/
12
13 #include <stdio.h>
14 #include <string.h>
15 #include <math.h>
16
17 #if defined(WIN32) || defined(WIN64)
18 #include <pc/gx2.h>
19 #include <pc/demo.h>
20 #endif
21 #include <cafe/os.h>
22 #include <cafe/mem.h>
23 #include <cafe/gx2.h>
24 #include <cafe/demo.h>
25
26 #include <cafe/gfd.h>
27 #include <cafe/gx2ut.h>
28
29 //Include generated shaders
30 #include "shaders/headers/gx2utClearSurfaceRect.h"
31
32 /*
33 * 0 - Simple clear shader
34 */
35 static const GX2VertexShader* const VS_SHADERS[] = { &gx2utClearSurfaceRect_VS};
36
37 static const GX2PixelShader* const PS_SHADERS[] = { &gx2utClearSurfaceRect_PS};
38
39 static const u32 NUM_SHADERS = 1;
40
41 typedef struct _ClearSurfaceShader {
42 // These variables hold the three types of shaders needed for a call to
43 // GX2SetShaders. The vertex and pixel shaders are loaded from the
44 // header, but since the fetch shader is generated at run-time
45 // it must be handled slightly differently.
46 const GX2VertexShader *pVertexShader;
47 const GX2PixelShader *pPixelShader;
48
49 // The register locations where the offset uniforms are stored for
50 // the pixel and vertex shaders.
51 u32 u_positionLocation;
52 u32 u_clearColorLocation;
53 } ClearSurfaceShader;
54
55 //For now, share one fetch shader buffer for all shaders, since it should be identical
56 #define FETCH_SHADER_SIZE 32 //hard code this value for now
57 ALIGNVAR(GX2_SHADER_ALIGNMENT) static u8 g_GX2UTFetchShader[FETCH_SHADER_SIZE];
58
59
60 static ClearSurfaceShader g_clearSurfaceShader[NUM_SHADERS];
61 static GX2FetchShader fetchShader;
62 typedef struct _VtxFmtF32x2 {
63 f32 position[2];
64 } VtxFmtF32x2;
65
66 // This data works for both RECT and TRISTRIP
67 static const VtxFmtF32x2 CLEAR_SURFACE_RECT_POSITION_DATA[] =
68 {
69 {0.0f, 0.0f},
70 {1.0f, 0.0f},
71 {0.0f, 1.0f},
72 {1.0f, 1.0f}
73 };
74
75 static const u32 VERTEX_COUNT = sizeof(CLEAR_SURFACE_RECT_POSITION_DATA)
76 / sizeof(CLEAR_SURFACE_RECT_POSITION_DATA[0]);
77
78 // Initializes how surfaces will be copied
GX2UTClearSurfaceRectInit(void)79 void GX2UTClearSurfaceRectInit(void)
80 {
81 static GX2Boolean initDone = GX2_FALSE;
82
83 if (initDone == GX2_TRUE)
84 {
85 //OSReport("Skipping init in GX2UTClearSurfaceRectInit\n");
86 return;
87 }
88
89 // Setup shaders
90 u32 i;
91
92 GX2NotifyMemAlloc(g_GX2UTFetchShader,
93 FETCH_SHADER_SIZE,
94 GX2_SHADER_ALIGNMENT);
95
96 for (i = 0; i < NUM_SHADERS; ++i)
97 {
98 g_clearSurfaceShader[i].pVertexShader = VS_SHADERS[i];
99 g_clearSurfaceShader[i].pPixelShader = PS_SHADERS[i];
100
101 GX2Invalidate(GX2_INVALIDATE_CPU_SHADER,
102 g_clearSurfaceShader[i].pVertexShader->shaderPtr,
103 g_clearSurfaceShader[i].pVertexShader->shaderSize);
104
105 GX2NotifyMemAlloc(g_clearSurfaceShader[i].pVertexShader->shaderPtr,
106 g_clearSurfaceShader[i].pVertexShader->shaderSize,
107 GX2_SHADER_ALIGNMENT);
108
109 GX2Invalidate(GX2_INVALIDATE_CPU_SHADER,
110 g_clearSurfaceShader[i].pPixelShader->shaderPtr,
111 g_clearSurfaceShader[i].pPixelShader->shaderSize);
112
113 GX2NotifyMemAlloc(g_clearSurfaceShader[i].pPixelShader->shaderPtr,
114 g_clearSurfaceShader[i].pPixelShader->shaderSize,
115 GX2_SHADER_ALIGNMENT);
116
117 // Lookup the uniform locations in the vertex shader and pixel shader.
118 // The shader author chose the names "u_positions", "u_depth", and "u_clearColor"
119 g_clearSurfaceShader[i].u_positionLocation =
120 (u32)GX2GetVertexUniformVarOffset(g_clearSurfaceShader[i].pVertexShader, "u_positions");
121 g_clearSurfaceShader[i].u_clearColorLocation =
122 (u32)GX2GetPixelUniformVarOffset(g_clearSurfaceShader[i].pPixelShader, "u_clearColor");
123 ASSERT((g_clearSurfaceShader[i].u_positionLocation != GX2_UNIFORM_VAR_INVALID_OFFSET)
124 && (g_clearSurfaceShader[i].u_clearColorLocation != GX2_UNIFORM_VAR_INVALID_OFFSET)
125 && "Couldn't find the correct vertex and pixel shader uniforms.");
126
127 }
128
129 ASSERT(GX2CalcFetchShaderSize(0) <= sizeof(g_GX2UTFetchShader) && "g_GX2UTFetchShader too small!\n");
130 GX2InitFetchShader(&fetchShader, g_GX2UTFetchShader, 0, NULL);
131
132 initDone = GX2_TRUE;
133 }
134
135 /// returns the bits of a floating point value as an unsigned integer
FloatToBits(f32 f)136 static u32 FloatToBits(f32 f)
137 {
138 union { f32 f;
139 u32 u; } converter;
140 converter.f = f;
141 return converter.u;
142 }
143
144 /// returns the bits of a floating point value as an unsigned integer
BitsToFloat(u32 u)145 static f32 BitsToFloat(u32 u)
146 {
147 union { f32 f;
148 u32 u; } converter;
149 converter.u = u;
150 return converter.f;
151 }
152
153 /*******************************************************************************
154 * ConvertFP32ToSmallFP
155 *
156 * @brief
157 * Converts 32 bit floating point value to one with the supplied
158 * representation. Code assume the dest format follows representation
159 * similar to the fp32 IEEE format.
160 *
161 * @return
162 * Converted value as an unsigned integer.
163 *******************************************************************************/
ConvertFP32ToSmallFP(f32 fp32,BOOL signBit,u32 expBits,u32 mantBits)164 u32 ConvertFP32ToSmallFP(
165 f32 fp32, ///< Original fp32 value
166 BOOL signBit, ///< Sign bit in destination format
167 u32 expBits, ///< Exponent bits in destination format
168 u32 mantBits) ///< Mantissa bits in destination format
169 {
170 u32 uiFp32, fp32Sign, fp32Exp, fp32Mant, fp32MantBits, fp32Bias;
171 u32 maxExp, bias;
172 u32 outFp;
173
174 // Extract relevant values from input value
175 uiFp32 = FloatToBits(fp32);
176 fp32Sign = (uiFp32 & 0x80000000) >> 31;
177 fp32Exp = (uiFp32 & 0x7F800000) >> 23;
178 fp32Mant = uiFp32 & 0x007FFFFF;
179 fp32MantBits = 23;
180 fp32Bias = 127;
181
182 // Compute exponent bias for destination format. This is also the max positive (and negative)
183 // unbiased exponents in the format.
184 bias = (1 << (expBits - 1)) - 1;
185
186 // Compute max exponent reserved for NaN and Infs
187 maxExp = (1 << expBits) - 1;
188
189 if (fp32Exp == 0xff)
190 {
191 // Handle NaNs and Infs first. According to the DX10 spec these get converted to NaNs
192 // and Infs in the lower precision format when available, otherwise they go to 0. We
193 // assume the destination format has representations for NaNs and Infs except for when
194 // there is no sign bit to represent signed NaN and Inf.
195 if ((signBit == TRUE) || (fp32Sign == 0))
196 {
197 outFp = (fp32Sign << (expBits + mantBits)) | (maxExp << mantBits) |
198 (fp32Mant >> (fp32MantBits - mantBits));
199 }
200 else
201 {
202 outFp = 0;
203 }
204 }
205 else if ((signBit == FALSE) && (fp32Sign == 1))
206 {
207 // Negative numbers go to zero if they can't be represented
208 outFp = 0;
209 }
210 else if (fp32Exp > (fp32Bias + bias))
211 {
212 // Too large to be represented in the destination format are made into signed MAX_FLOAT.
213 outFp = (fp32Sign << (expBits + mantBits)) | ((maxExp - 1) << mantBits) |
214 ((1 << mantBits) - 1);
215 }
216 else if (fp32Exp < (fp32Bias - (bias - 1)))
217 {
218 // Too small to be represented as a normalized number or it's zero
219 u32 shift;
220
221 // Shift amount is the difference between the fp32 exponent and the the minimum
222 // exponent in the dest format.
223 shift = fp32Bias - (bias - 1) - fp32Exp;
224
225 // Large enough shifts will generate 0
226 if (shift > (fp32MantBits + 1))
227 {
228 fp32Mant = 0;
229 }
230 else
231 {
232 // Add in hidden bit and right shift to align to new format
233 fp32Mant = (fp32Mant | 0x00800000) >> (fp32MantBits - mantBits);
234 fp32Mant = fp32Mant >> shift;
235 }
236
237 outFp = (fp32Sign << (expBits + mantBits)) | fp32Mant;
238 }
239 else
240 {
241 // Can be represented as a normalized number in the new format
242 outFp = (fp32Sign << (expBits + mantBits)) |
243 ((fp32Exp + bias - fp32Bias) << mantBits) |
244 (fp32Mant >> (fp32MantBits - mantBits));
245 }
246
247 // Sanity check
248 ASSERT((outFp & ~((1 << (signBit + expBits + mantBits)) - 1)) == 0x0);
249
250 return outFp;
251 }
252
253
ConvertFP32ToUnorm(f32 fp32,u32 numBits)254 u32 ConvertFP32ToUnorm(
255 f32 fp32, ///< fp32 value to convert
256 u32 numBits) ///< number of bits in destination unorm
257 {
258 u32 uiFp32, out, maxVal;
259 u32 fp32Sign, fp32Exp, fp32Mant;
260
261 // Extract relevant floating point parts
262 uiFp32 = FloatToBits(fp32);
263 fp32Sign = (uiFp32 & 0x80000000) >> 31;
264 fp32Exp = (uiFp32 & 0x7F800000) >> 23;
265 fp32Mant = uiFp32 & 0x007FFFFF;
266
267 // Maximum representable unorm
268 maxVal = (1 << numBits) - 1;
269
270 // Handle NaNs and Infs values separately
271 if (fp32Exp == 0xff)
272 {
273 // Nans and -Inf go to 0
274 if ((fp32Mant != 0x0) || (fp32Sign == 1))
275 {
276 out = 0;
277 }
278 else
279 {
280 // +Inf goes to max representable value
281 out = maxVal;
282 }
283 }
284 else if (fp32 > 1.0f)
285 {
286 out = maxVal;
287 }
288 else if (fp32 < 0.0f)
289 {
290 out = 0;
291 }
292 else
293 {
294 out = static_cast<u32>((fp32 * maxVal) + 0.5f);
295 }
296
297 ASSERT(out <= maxVal);
298
299 return out;
300 }
301
302 /*******************************************************************************
303 * PackClearColor
304 *
305 * @brief
306 * Pack the clear color for the given format into a 32 bit quantity.
307 *
308 * @return
309 * Packed 32-bit clear value.
310 *******************************************************************************/
PackClearColor(f32 r,f32 g,f32 b,f32 a,GX2SurfaceFormat format)311 u32 PackClearColor(f32 r, f32 g, f32 b, f32 a, ///< Clear color
312 GX2SurfaceFormat format) ///< Color format
313 {
314 u32 clearColor = 0;
315
316 if ((format == GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM))
317 {
318 u32 red, green, blue, alpha;
319
320 red = ConvertFP32ToUnorm(r, 8);
321 green = ConvertFP32ToUnorm(g, 8);
322 blue = ConvertFP32ToUnorm(b, 8);
323 alpha = ConvertFP32ToUnorm(a, 8);
324
325 clearColor = (alpha << 24) | (blue << 16) | (green << 8) | red;
326 }
327 else if ((format == GX2_SURFACE_FORMAT_TCS_R10_G10_B10_A2_UNORM) ||
328 (format == GX2_SURFACE_FORMAT_TCS_A2_B10_G10_R10_UNORM))
329 {
330 u32 red, green, blue, alpha;
331
332 red = ConvertFP32ToUnorm(r, 10);
333 green = ConvertFP32ToUnorm(g, 10);
334 blue = ConvertFP32ToUnorm(b, 10);
335 alpha = ConvertFP32ToUnorm(a, 2);
336
337 if (format == GX2_SURFACE_FORMAT_TCS_R10_G10_B10_A2_UNORM)
338 {
339 clearColor = (alpha << 30) | (blue << 20) | (green << 10) | red;
340 }
341 else if (format == GX2_SURFACE_FORMAT_TCS_A2_B10_G10_R10_UNORM)
342 {
343 clearColor = (blue << 22) | (green << 12) | (red << 2) | alpha;
344 }
345 }
346 else if (format == GX2_SURFACE_FORMAT_TC_R11_G11_B10_FLOAT)
347 {
348 u32 redFP11, greenFP11, blueFP10;
349
350 redFP11 = ConvertFP32ToSmallFP(r, FALSE, 5, 6);
351 greenFP11 = ConvertFP32ToSmallFP(g, FALSE, 5, 6);
352 blueFP10 = ConvertFP32ToSmallFP(b, FALSE, 5, 5);
353
354 clearColor = (blueFP10 << 22) | (greenFP11 << 11) | redFP11;
355 }
356 else if (format == GX2_SURFACE_FORMAT_TC_R16_G16_FLOAT)
357 {
358 u32 redFP16, greenFP16;
359
360 redFP16 = ConvertFP32ToSmallFP(r, GX2_TRUE, 5, 10);
361 greenFP16 = ConvertFP32ToSmallFP(g, GX2_TRUE, 5, 10);
362
363 clearColor = (greenFP16 << 16) | (redFP16);
364 }
365
366 else
367 {
368 // Not implemented yet.
369 ASSERT(FALSE);
370 }
371
372 return clearColor;
373 }
374
375 // Identify CB formats that can benefit by being cleared with the DB
ClearUsingDB(GX2ColorBuffer * colorBuffer,GX2UTRect * dstRect)376 GX2Boolean ClearUsingDB(GX2ColorBuffer *colorBuffer, GX2UTRect *dstRect)
377 {
378 GX2Surface *dstSurface = &colorBuffer->surface;
379 u32 dstMip = colorBuffer->viewMip;
380
381 // Can't apply this optimization unless we are clearing the entire surface
382 if ((dstRect->left != 0) ||
383 (dstRect->top != 0) ||
384 (dstRect->right != dstSurface->width >> dstMip) ||
385 (dstRect->bottom != dstSurface->height >> dstMip))
386 {
387 return GX2_FALSE;
388 }
389
390 // Can't apply this optimization if the surface size is not a multiple of 8x8
391 // This is because the color and depth micro-tile formats are different
392 if ((dstRect->right & 0x7) ||
393 (dstRect->bottom & 0x7))
394 {
395 return GX2_FALSE;
396 }
397
398 switch (dstSurface->format)
399 {
400 case GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM:
401 case GX2_SURFACE_FORMAT_TCS_R10_G10_B10_A2_UNORM:
402 case GX2_SURFACE_FORMAT_TCS_A2_B10_G10_R10_UNORM:
403 case GX2_SURFACE_FORMAT_TC_R11_G11_B10_FLOAT:
404 case GX2_SURFACE_FORMAT_TC_R16_G16_FLOAT:
405 case GX2_SURFACE_FORMAT_TCD_R32_FLOAT:
406 break;
407 default:
408 return GX2_FALSE; // NO
409 }
410
411 switch (dstSurface->tileMode)
412 {
413 case GX2_TILE_MODE_1D_TILED_THIN1:
414 case GX2_TILE_MODE_2D_TILED_THIN1:
415 case GX2_TILE_MODE_2D_TILED_THIN2:
416 case GX2_TILE_MODE_2D_TILED_THIN4:
417 case GX2_TILE_MODE_2B_TILED_THIN1:
418 case GX2_TILE_MODE_2B_TILED_THIN2:
419 case GX2_TILE_MODE_2B_TILED_THIN4:
420 break;
421
422 default:
423 return GX2_FALSE;
424 }
425
426 if (dstSurface->aa != GX2_AA_MODE_1X)
427 {
428 return GX2_FALSE;
429 }
430
431 return GX2_TRUE;
432 }
433
IsValidDBFloat(u32 val)434 GX2Boolean IsValidDBFloat(u32 val)
435 {
436 GX2Boolean valid = GX2_TRUE;
437
438 // The following floating point values are not preserved by the DB when the depth value comes
439 // from vertex Z:
440 // - DeNorms are flushed to 0
441 // - NaNs (and Infs) are converted to 0
442 if (((val != 0x0) && ((val & 0x7F800000) == 0x0)) || // Denorms
443 ((val & 0x7F800000) == 0x7F800000)) // NaNs & Infs
444 {
445 valid = GX2_FALSE;
446 }
447
448 return valid;
449 }
450
GX2UTClearRectOp(GX2ColorBuffer * colorBuffer,GX2DepthBuffer * depthBuffer,f32 r,f32 g,f32 b,f32 a,f32 depthValue,u8 stencilValue,GX2ClearMode clearFlags,GX2HiStencilInfo * hiStencil,GX2UTRect * dstRect)451 void GX2UTClearRectOp(GX2ColorBuffer *colorBuffer, GX2DepthBuffer *depthBuffer,
452 f32 r, f32 g, f32 b, f32 a,
453 f32 depthValue, u8 stencilValue,
454 GX2ClearMode clearFlags, GX2HiStencilInfo *hiStencil,
455 GX2UTRect *dstRect)
456 {
457 u32 cbFirstSlice = 0;
458 u32 dbFirstSlice = 0;
459 u32 numSlices = 0;
460 u32 dstWidth, dstHeight, uDepthValue;
461 u32 dstMip, dstSlice;
462 GX2CompareFunction stencilFunc = GX2_COMPARE_NEVER;
463 GX2Boolean bColorAsDepth = GX2_FALSE;
464 GX2Boolean depthTestEnable = GX2_FALSE;
465 GX2Boolean stencilTestEnable = GX2_DISABLE;
466 GX2ColorBuffer cb;
467 GX2DepthBuffer db;
468
469 GX2UTDebugTagIndent(__func__);
470
471 // blt with width or height <= 0 does nothing
472 ASSERT((colorBuffer != NULL || depthBuffer != NULL) && (dstRect != NULL));
473 ASSERT((dstRect->bottom > dstRect->top) && (dstRect->right > dstRect->left) && "Invalid destination region!");
474 ASSERT(depthValue >= 0.0f && depthValue <= 1.0f);
475
476 // Verify the parameters
477 if (colorBuffer && depthBuffer)
478 {
479 ASSERT((colorBuffer->surface.width >> colorBuffer->viewMip) == (depthBuffer->surface.width >> depthBuffer->viewMip));
480 ASSERT((colorBuffer->surface.height >> colorBuffer->viewMip) == (depthBuffer->surface.height >> depthBuffer->viewMip));
481 ASSERT(colorBuffer->surface.aa == depthBuffer->surface.aa);
482 ASSERT(colorBuffer->viewNumSlices == depthBuffer->viewNumSlices);
483 }
484
485
486 if (clearFlags & GX2_CLEAR_D_REG) {
487 ASSERT(depthBuffer != NULL);
488 GX2SetClearDepth(depthBuffer, depthValue);
489 }
490 if (clearFlags & GX2_CLEAR_S_REG) {
491 ASSERT(depthBuffer != NULL);
492 GX2SetClearStencil(depthBuffer, stencilValue);
493 }
494
495 // Initialize the resources needed to clear surfaces.
496 // This function only does work the first time it's called.
497 GX2UTClearSurfaceRectInit();
498
499 uDepthValue = FloatToBits(depthValue);
500
501 // Some color-only clears can be accelerated by using the depth unit instead
502 if (colorBuffer != NULL && depthBuffer == NULL &&
503 ClearUsingDB(colorBuffer, dstRect))
504 {
505 u32 dv = 0;
506
507 if (colorBuffer->surface.format == GX2_SURFACE_FORMAT_TCD_R32_FLOAT)
508 {
509 // only use red component of clearcolor
510 dv = FloatToBits(r);
511 }
512 else
513 {
514 // PackColorToFp32
515 dv = PackClearColor(r, g, b, a, colorBuffer->surface.format);
516 }
517
518 if (IsValidDBFloat(dv))
519 {
520 // DB can only handle valid floats
521 uDepthValue = dv;
522 dstSlice = colorBuffer->viewFirstSlice;
523 dstMip = colorBuffer->viewMip;
524
525 // Populate db and associated surface accordingly
526 db.surface = colorBuffer->surface;
527 db.surface.format = GX2_SURFACE_FORMAT_TCD_R32_FLOAT;
528 db.surface.use = GX2_SURFACE_USE_DEPTH_BUFFER;
529 db.viewMip = dstMip;
530 db.viewFirstSlice = dstSlice;
531 db.viewNumSlices = colorBuffer->viewNumSlices;
532 db.hiZPtr = NULL;
533 db.hiZSize = 0;
534
535 // Instruct logic below that we are clearing depth only
536 clearFlags = GX2_CLEAR_DEPTH;
537 colorBuffer = NULL;
538 depthBuffer = &db;
539
540 GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR_COLOR_AS_DEPTH, GX2_ENABLE);
541 bColorAsDepth = GX2_TRUE;
542 }
543 }
544
545 // Only set if Color Writes are on
546 if (colorBuffer != NULL)
547 {
548 // If AA, must have auxPtr + auxSize
549 ASSERT(colorBuffer->surface.aa == GX2_AA_MODE_1X ||
550 (colorBuffer->auxPtr != NULL && colorBuffer->auxSize != 0));
551
552 //Currently we do not support clearing BC formats. Use GX2ClearColor() for this.
553 ASSERT(!GX2SurfaceIsCompressed(colorBuffer->surface.format));
554 ASSERT((colorBuffer->viewFirstSlice + colorBuffer->viewNumSlices <= colorBuffer->surface.depth));
555
556 dstMip = colorBuffer->viewMip;
557 dstWidth = GX2Max(1, colorBuffer->surface.width >> dstMip);
558 dstHeight = GX2Max(1, colorBuffer->surface.height >> dstMip);
559
560 // Create shallow copy of dest surface to be used as render target
561 cb = *colorBuffer;
562 cb.surface.use = GX2_SURFACE_USE_COLOR_BUFFER_TEXTURE;
563 cb.viewNumSlices = 1;
564 // Will reinit regs later
565
566 if (cb.surface.format == GX2_SURFACE_FORMAT_TC_R16_G16_FLOAT)
567 {
568 // Change format to B8G8R8A8_UNORM and PackFP16ToRGBA8
569 cb.surface.format = GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM;
570
571 // Adjust clear color
572 u32 packedColor;
573
574 // Pack 2 channel FP16 clear color into a 32 bit quantity
575 packedColor = PackClearColor(r, g, b, a,
576 GX2_SURFACE_FORMAT_TC_R16_G16_FLOAT);
577
578 // Extract RGBA8 values from packed color and convert to clear values
579 a = ((packedColor >> 24) & 0xff) * (1.0f / 255.0f);
580 b = ((packedColor >> 16) & 0xff) * (1.0f / 255.0f);
581 g = ((packedColor >> 8) & 0xff) * (1.0f / 255.0f);
582 r = (packedColor & 0xff) * (1.0f / 255.0f);
583 }
584
585 // Enable Color Writes
586 GX2SetColorControl(GX2_LOGIC_OP_COPY,
587 0, //disable blending
588 GX2_DISABLE,
589 GX2_ENABLE);
590
591 numSlices = colorBuffer->viewNumSlices;
592 cbFirstSlice = cb.viewFirstSlice;
593 }
594 else
595 {
596 // Use Depth Buffer dimensions
597 dstMip = depthBuffer->viewMip;
598 dstWidth = GX2Max(1, depthBuffer->surface.width >> dstMip);
599 dstHeight = GX2Max(1, depthBuffer->surface.height >> dstMip);
600
601 // Disable Color Writes
602 GX2SetColorControl(GX2_LOGIC_OP_COPY,
603 0, //disable blending
604 GX2_DISABLE,
605 GX2_DISABLE);
606
607 GX2SetAAMode(depthBuffer->surface.aa);
608 }
609
610 // Render to destination surface dimensions
611 GX2SetViewport(0, 0, (f32)dstWidth, (f32)dstHeight, 0.0f, 1.0f);
612 GX2SetScissor(0, 0, dstWidth, dstHeight);
613
614 // Only set if Depth or Stencil Writes are on
615 if (depthBuffer != NULL)
616 {
617 ASSERT((depthBuffer->viewFirstSlice + depthBuffer->viewNumSlices <= depthBuffer->surface.depth));
618
619 db = *depthBuffer;
620 numSlices = depthBuffer->viewNumSlices;
621 db.viewNumSlices = 1;
622 // Will reinit regs later
623
624 if (clearFlags & GX2_CLEAR_DEPTH)
625 depthTestEnable = GX2_TRUE;
626 else
627 depthTestEnable = GX2_FALSE;
628
629 if (clearFlags & GX2_CLEAR_STENCIL)
630 {
631 GX2SetStencilMask(0xff, //preMaskFront
632 0xff, //writeMaskFront
633 stencilValue, //refFront
634 0xff, //preMaskBack
635 0xff, //writeMaskBack
636 stencilValue);//refBack
637 stencilFunc = GX2_COMPARE_ALWAYS;
638 stencilTestEnable = GX2_ENABLE;
639 }
640 else
641 {
642 stencilFunc = GX2_COMPARE_NEVER;
643 stencilTestEnable = GX2_DISABLE;
644 }
645
646 // fast clears require HiZ and all edges on micro-tile boundaries
647 if ((depthBuffer->hiZPtr) &&
648 !((dstRect->bottom | dstRect->top | dstRect->left | dstRect->right) & 0x7))
649
650 {
651 GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR_HIZ, GX2_ENABLE);
652 }
653
654 dbFirstSlice = db.viewFirstSlice;
655 }
656
657 // Depth Writes
658 GX2SetDepthStencilControl(depthTestEnable, //depthTestEnable
659 GX2_ENABLE, //depthWriteEnable
660 GX2_COMPARE_ALWAYS, //depthFunc
661 stencilTestEnable, //stencilTestEnable
662 stencilTestEnable, //backStencilEnable
663
664 stencilFunc, //frontStencilFunc
665 GX2_STENCIL_REPLACE,//frontStencilZPass
666 GX2_STENCIL_REPLACE,//frontStencilZFail
667 GX2_STENCIL_REPLACE,//frontStencilFail
668
669 stencilFunc, //backStencilFunc
670 GX2_STENCIL_REPLACE,//backStencilZPass
671 GX2_STENCIL_REPLACE,//backStencilZFail
672 GX2_STENCIL_REPLACE //backStencilFail
673 );
674
675 if (hiStencil != NULL)
676 GX2SetHiStencilInfo(hiStencil);
677
678 // Only one clear shader
679 u32 shaderIdx = 0;
680
681 // Set shaders
682 GX2SetFetchShader(&fetchShader);
683 GX2SetVertexShader(g_clearSurfaceShader[shaderIdx].pVertexShader);
684 GX2SetPixelShader(g_clearSurfaceShader[shaderIdx].pPixelShader);
685
686 // Set the uniforms to be used by the vertex shader and pixel shader
687 f32 position_base_scale[] =
688 {
689 (f32)dstRect->left,
690 (f32)dstRect->top,
691 (f32)dstRect->right - (f32)dstRect->left,
692 (f32)dstRect->bottom - (f32)dstRect->top,
693 };
694
695 for (int i = 0; i < 4; i++)
696 {
697 f32 pos[] = {
698 position_base_scale[0] + position_base_scale[2] * CLEAR_SURFACE_RECT_POSITION_DATA[i].position[0],
699 position_base_scale[1] + position_base_scale[3] * CLEAR_SURFACE_RECT_POSITION_DATA[i].position[1],
700 BitsToFloat(uDepthValue),
701 1.0f
702 };
703
704 GX2SetVertexUniformReg(g_clearSurfaceShader[shaderIdx].u_positionLocation + i*4, 1*4, pos);
705 }
706
707 f32 clearColor[] = {r, g, b, a};
708
709 GX2SetPixelUniformReg(g_clearSurfaceShader[shaderIdx].u_clearColorLocation, 1*4, clearColor);
710
711 for (int slice = 0; slice < numSlices; slice++)
712 {
713 // Reset the color buffer to the next slice
714 if (colorBuffer)
715 {
716 cb.viewFirstSlice = cbFirstSlice + slice;
717 GX2InitColorBufferRegs(&cb);
718 GX2SetColorBuffer(&cb, GX2_RENDER_TARGET_0);
719 }
720
721 // Reset the depth buffer to the next slice
722 if (depthBuffer)
723 {
724 db.viewFirstSlice = dbFirstSlice + slice;
725 GX2InitDepthBufferRegs(&db);
726 GX2SetDepthBuffer(&db);
727 }
728
729 //Call the render function pointer
730 GX2Draw(GX2_PRIMITIVE_RECTS, VERTEX_COUNT);
731 }
732
733 if (bColorAsDepth)
734 {
735 GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR_COLOR_AS_DEPTH, GX2_DISABLE);
736 }
737
738 if (colorBuffer != NULL)
739 {
740 if ( colorBuffer->viewMip )
741 GX2Invalidate(GX2_INVALIDATE_COLOR_BUFFER, colorBuffer->surface.mipPtr, colorBuffer->surface.mipSize);
742 else
743 GX2Invalidate(GX2_INVALIDATE_COLOR_BUFFER, colorBuffer->surface.imagePtr, colorBuffer->surface.imageSize);
744 }
745
746 if (depthBuffer != NULL)
747 {
748 if ( depthBuffer->viewMip )
749 GX2Invalidate(GX2_INVALIDATE_DEPTH_BUFFER, depthBuffer->surface.mipPtr, depthBuffer->surface.mipSize);
750 else
751 GX2Invalidate(GX2_INVALIDATE_DEPTH_BUFFER, depthBuffer->surface.imagePtr, depthBuffer->surface.imageSize);
752
753 // Disable HiZ optimization
754 if ((depthBuffer->hiZPtr) &&
755 !((dstRect->bottom | dstRect->top | dstRect->left | dstRect->right) & 0x7))
756
757 {
758 GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR_HIZ, GX2_DISABLE);
759 }
760 }
761 GX2UTDebugTagUndent();
762 }
763
764 // Clears a region of the HiStencil buffer
765 // Setting the rectangular regions to a top-left of (0,0) and a bottom-right of (width, height)
766 // will blt the entire surface without any flipping (the right and bottom are exclusive).
767 // Must be invoked to invalidate HiStencil when changing pretest state during a frame.
GX2UTInvalidateHiStencilRect(GX2UTRect * dstRect,GX2DepthBuffer * depthBuffer)768 void GX2UTInvalidateHiStencilRect(GX2UTRect *dstRect, GX2DepthBuffer *depthBuffer)
769 {
770 u32 dstWidth, dstHeight;
771
772 GX2UTDebugTagIndent(__func__);
773
774 //Disable state shadowing. If your app is using state shadowing,
775 //you will need to restore the context after calling this function.
776 GX2SetContextState(NULL);
777
778 // Initialize the resources needed to clear surfaces.
779 // This function only does work the first time it's called.
780 GX2UTClearSurfaceRectInit();
781
782 // must have a deptBuffer w/ hiZPtr
783 // blt with width or height <= 0 does nothing
784 ASSERT((depthBuffer != NULL) && (depthBuffer->hiZPtr != NULL) && (dstRect != NULL));
785 ASSERT((dstRect->bottom > dstRect->top) && (dstRect->right > dstRect->left) && "Invalid destination region!");
786
787 // Disable Color Writes
788 GX2SetColorControl(GX2_LOGIC_OP_COPY,
789 0, //disable blending
790 GX2_DISABLE,
791 GX2_DISABLE);
792
793 // Disable Depth and Stencil Writes
794 GX2SetDepthStencilControl(GX2_DISABLE, // depthEnable
795 GX2_DISABLE, // depthWriteEnable
796 GX2_COMPARE_ALWAYS, // depthFunc
797 GX2_DISABLE, // stencilTestEnable
798 GX2_DISABLE, // backStencilEnable
799 GX2_COMPARE_ALWAYS, // frontStencilFunc
800 GX2_STENCIL_KEEP, // frontStencilZPass
801 GX2_STENCIL_KEEP, // frontStencilZFail
802 GX2_STENCIL_KEEP, // frontStencilFail
803 GX2_COMPARE_ALWAYS, // backStencilFunc
804 GX2_STENCIL_KEEP, // backStencilZPass
805 GX2_STENCIL_KEEP, // backStencilZFail
806 GX2_STENCIL_KEEP); // backStencilFail
807 GX2SetStencilMask(0x00, //preMaskFront
808 0x00, //writeMaskFront
809 0x00, //refFront
810 0x00, //preMaskBack
811 0x00, //writeMaskBack
812 0x00);//refBack
813
814 dstWidth = depthBuffer->surface.width;
815 dstHeight = depthBuffer->surface.height;
816
817 // Render to destination surface dimensions
818 GX2SetViewport(dstRect->left, dstRect->bottom, (f32)dstWidth, (f32)dstHeight, 0.0f, 1.0f);
819 GX2SetScissor(dstRect->left, dstRect->bottom, dstWidth, dstHeight);
820
821 GX2SetDepthBuffer(depthBuffer);
822
823 // Create HiStencilInfo that will clear the HiStencil pretest results
824 GX2HiStencilInfo hiStencilInfo;
825 hiStencilInfo.state[0].function = GX2_COMPARE_ALWAYS;
826 hiStencilInfo.state[0].reference = 0;
827 hiStencilInfo.state[0].mask = 0xFF;
828 hiStencilInfo.state[0].enable = GX2_FALSE;
829 hiStencilInfo.state[1].function = GX2_COMPARE_ALWAYS;
830 hiStencilInfo.state[1].reference = 0;
831 hiStencilInfo.state[1].mask = 0xFF;
832 hiStencilInfo.state[1].enable = GX2_FALSE;
833 GX2InitHiStencilInfoRegs(&hiStencilInfo);
834 GX2SetHiStencilInfo(&hiStencilInfo);
835
836 // Only one clear shader
837 u32 shaderIdx = 0;
838
839 // Set shaders
840 // NOTE: No fetch shader is needed for our shaders, see GLSL for details.
841 GX2SetVertexShader(g_clearSurfaceShader[shaderIdx].pVertexShader);
842 GX2SetPixelShader(g_clearSurfaceShader[shaderIdx].pPixelShader);
843
844 // Set the uniforms to be used by the vertex shader and pixel shader
845 f32 position_base_scale[] =
846 {
847 -1.0f + 2.0f * (f32)dstRect->left / (f32)dstWidth,
848 1.0f - 2.0f * (f32)dstRect->top / (f32)dstHeight,
849 2.0f * (f32)(dstRect->right - dstRect->left) / (f32)dstWidth,
850 -2.0f * (f32)(dstRect->bottom - dstRect->top) / (f32)dstHeight,
851 };
852
853 for (int i = 0; i < 4; i++)
854 {
855 f32 pos[] = {
856 position_base_scale[0] + position_base_scale[2] * CLEAR_SURFACE_RECT_POSITION_DATA[i].position[0],
857 position_base_scale[1] + position_base_scale[3] * CLEAR_SURFACE_RECT_POSITION_DATA[i].position[1],
858 0.0,
859 1.0
860 };
861
862 GX2SetVertexUniformReg(g_clearSurfaceShader[shaderIdx].u_positionLocation + i*4, 1*4, pos);
863 }
864
865 // We don't need to set clearColor or depthValue because we aren't writing those buffers
866
867 //Call the render function pointer
868 GX2Draw(GX2_PRIMITIVE_TRIANGLE_STRIP, VERTEX_COUNT);
869
870 GX2UTDebugTagUndent();
871 }
872
873 //Setup all of the constant renderstate needed for the clear
GX2UTSetClearState(GX2Boolean enable)874 void GX2UTSetClearState(GX2Boolean enable)
875 {
876 if (enable)
877 {
878 // If your application's steady state can be set to GX2UT common state
879 // using a small number of discrete GX2 calls, then customize here
880 // instead of using GX2UTSetCommonState()
881 GX2UTSetCommonState();
882
883 // Enable any special GX2 state
884 GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR, GX2_ENABLE);
885 // That call just clobbered RasterizerClipControl
886 }
887 else
888 {
889 // Disable any special GX2 state
890 GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR, GX2_DISABLE);
891
892 // The purpose of the following is to return the context to GX2 default
893 // state. Integration with your application's state management may
894 // differ. The clear operation itself clobbered DepthStencilControl
895 // and ColorControl
896 GX2SetDepthStencilControl(GX2_TRUE, //depthTestEnable
897 GX2_TRUE, //depthWriteEnable
898 GX2_COMPARE_LESS, //depthFunc
899 GX2_FALSE, //stencilTestEnable
900 GX2_FALSE, //backStencilEnable
901 GX2_COMPARE_ALWAYS, //frontStencilFunc
902 GX2_STENCIL_REPLACE, //frontStencilZPass
903 GX2_STENCIL_REPLACE, //frontStencilZFail
904 GX2_STENCIL_REPLACE, //frontStencilFail
905 GX2_COMPARE_ALWAYS, //backStencilFunc
906 GX2_STENCIL_REPLACE, //backStencilZPass
907 GX2_STENCIL_REPLACE, //backStencilZFail
908 GX2_STENCIL_REPLACE);//backStencilFail
909
910 GX2SetColorControl(GX2_LOGIC_OP_COPY, GX2_DISABLE, GX2_DISABLE, GX2_ENABLE);
911 }
912 }
913
GX2UTSetupColorAuxBufferOp(GX2ColorBuffer * colorBuffer)914 void GX2UTSetupColorAuxBufferOp(GX2ColorBuffer *colorBuffer)
915 {
916 if (colorBuffer->auxPtr)
917 {
918 u32 ctileOffset = colorBuffer->_regs[4]; // cmask_offset
919 u32 ctileSize = colorBuffer->auxSize - ctileOffset;
920 u8* ctilePtr = (u8*)colorBuffer->auxPtr + ctileOffset;
921 ASSERT((ctileSize & 0x1FF) == 0 && "Invalid MSAA Color Buffer auxSize!");
922
923 GX2ColorBuffer tmpBuf;
924 u32 width = 16;
925 u32 height = ctileSize / 4 / width;
926
927 GX2InitColorBuffer(&tmpBuf, width, height, GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM, GX2_AA_MODE_1X);
928 GX2InitColorBufferPtr(&tmpBuf, ctilePtr);
929 tmpBuf.surface.tileMode = GX2_TILE_MODE_1D_TILED_THIN1;
930 GX2CalcSurfaceSizeAndAlignment(&tmpBuf.surface);
931 ASSERT(tmpBuf.surface.imageSize == ctileSize && "CMask Tile Size must match calculated image size!");
932 GX2InitColorBufferRegs(&tmpBuf);
933
934 GX2UTClearOp(&tmpBuf, NULL,
935 GX2_AUX_BUFFER_CLEAR_VALUE/255.0f,
936 GX2_AUX_BUFFER_CLEAR_VALUE/255.0f,
937 GX2_AUX_BUFFER_CLEAR_VALUE/255.0f,
938 GX2_AUX_BUFFER_CLEAR_VALUE/255.0f,
939 0.0f, 0u, GX2_CLEAR_NONE, NULL);
940 }
941 }
942
943