1 /*---------------------------------------------------------------------------*
2
3 Copyright 2010-2014 Nintendo. All rights reserved.
4
5 These coded instructions, statements, and computer programs contain
6 proprietary information of Nintendo of America Inc. and/or Nintendo
7 Company Ltd., and are protected by Federal copyright law. They may
8 not be disclosed to third parties or copied or duplicated in any form,
9 in whole or in part, without the prior written consent of Nintendo.
10
11 *---------------------------------------------------------------------------*/
12
13 #include <stdio.h>
14 #include <string.h>
15 #include <math.h>
16
17 #if defined(WIN32) || defined(WIN64)
18 #include <pc/gx2.h>
19 #include <pc/demo.h>
20 #endif
21 #include <cafe/os.h>
22 #include <cafe/mem.h>
23 #include <cafe/gx2.h>
24 #include <cafe/demo.h>
25
26 #include <cafe/gfd.h>
27 #include <cafe/gx2ut.h>
28
29 //Include generated shaders
30 #include "shaders/headers/gx2utClearSurfaceRect.h"
31
32 /*
33 * 0 - Simple clear shader
34 */
35 static const GX2VertexShader* const VS_SHADERS[] = { &gx2utClearSurfaceRect_VS};
36
37 static const GX2PixelShader* const PS_SHADERS[] = { &gx2utClearSurfaceRect_PS};
38
39 static const u32 NUM_SHADERS = 1;
40
41 typedef struct _ClearSurfaceShader {
42 // These variables hold the three types of shaders needed for a call to
43 // GX2SetShaders. The vertex and pixel shaders are loaded from the
44 // header, but since the fetch shader is generated at run-time
45 // it must be handled slightly differently.
46 const GX2VertexShader *pVertexShader;
47 const GX2PixelShader *pPixelShader;
48
49 // The register locations where the offset uniforms are stored for
50 // the pixel and vertex shaders.
51 u32 u_positionLocation;
52 u32 u_clearColorLocation;
53 } ClearSurfaceShader;
54
55 //For now, share one fetch shader buffer for all shaders, since it should be identical
56 #define FETCH_SHADER_SIZE 32 //hard code this value for now
57 ALIGNVAR(GX2_SHADER_ALIGNMENT) static u8 g_GX2UTFetchShader[FETCH_SHADER_SIZE];
58
59
60 static ClearSurfaceShader g_clearSurfaceShader[NUM_SHADERS];
61 static GX2FetchShader fetchShader;
62 typedef struct _VtxFmtF32x2 {
63 f32 position[2];
64 } VtxFmtF32x2;
65
66 // This data works for both RECT and TRISTRIP
67 static const VtxFmtF32x2 CLEAR_SURFACE_RECT_POSITION_DATA[] =
68 {
69 {0.0f, 0.0f},
70 {1.0f, 0.0f},
71 {0.0f, 1.0f},
72 {1.0f, 1.0f}
73 };
74
75 static const u32 VERTEX_COUNT = sizeof(CLEAR_SURFACE_RECT_POSITION_DATA)
76 / sizeof(CLEAR_SURFACE_RECT_POSITION_DATA[0]);
77
78 // Initializes how surfaces will be copied
GX2UTClearSurfaceRectInit(void)79 void GX2UTClearSurfaceRectInit(void)
80 {
81 static GX2Boolean initDone = GX2_FALSE;
82
83 if (initDone == GX2_TRUE)
84 {
85 //OSReport("Skipping init in GX2UTClearSurfaceRectInit\n");
86 return;
87 }
88
89 // Setup shaders
90 u32 i;
91
92 GX2NotifyMemAlloc(g_GX2UTFetchShader,
93 FETCH_SHADER_SIZE,
94 GX2_SHADER_ALIGNMENT);
95
96 for (i = 0; i < NUM_SHADERS; ++i)
97 {
98 g_clearSurfaceShader[i].pVertexShader = VS_SHADERS[i];
99 g_clearSurfaceShader[i].pPixelShader = PS_SHADERS[i];
100
101 GX2Invalidate(GX2_INVALIDATE_CPU_SHADER,
102 g_clearSurfaceShader[i].pVertexShader->shaderPtr,
103 g_clearSurfaceShader[i].pVertexShader->shaderSize);
104
105 GX2NotifyMemAlloc(g_clearSurfaceShader[i].pVertexShader->shaderPtr,
106 g_clearSurfaceShader[i].pVertexShader->shaderSize,
107 GX2_SHADER_ALIGNMENT);
108
109 GX2Invalidate(GX2_INVALIDATE_CPU_SHADER,
110 g_clearSurfaceShader[i].pPixelShader->shaderPtr,
111 g_clearSurfaceShader[i].pPixelShader->shaderSize);
112
113 GX2NotifyMemAlloc(g_clearSurfaceShader[i].pPixelShader->shaderPtr,
114 g_clearSurfaceShader[i].pPixelShader->shaderSize,
115 GX2_SHADER_ALIGNMENT);
116
117 // Lookup the uniform locations in the vertex shader and pixel shader.
118 // The shader author chose the names "u_positions", "u_depth", and "u_clearColor"
119 g_clearSurfaceShader[i].u_positionLocation =
120 (u32)GX2GetVertexUniformVarOffset(g_clearSurfaceShader[i].pVertexShader, "u_positions");
121 g_clearSurfaceShader[i].u_clearColorLocation =
122 (u32)GX2GetPixelUniformVarOffset(g_clearSurfaceShader[i].pPixelShader, "u_clearColor");
123 ASSERT((g_clearSurfaceShader[i].u_positionLocation != GX2_UNIFORM_VAR_INVALID_OFFSET)
124 && (g_clearSurfaceShader[i].u_clearColorLocation != GX2_UNIFORM_VAR_INVALID_OFFSET)
125 && "Couldn't find the correct vertex and pixel shader uniforms.");
126
127 }
128
129 ASSERT(GX2CalcFetchShaderSize(0) <= sizeof(g_GX2UTFetchShader) && "g_GX2UTFetchShader too small!\n");
130 GX2InitFetchShader(&fetchShader, g_GX2UTFetchShader, 0, NULL);
131
132 initDone = GX2_TRUE;
133 }
134
135 /// returns the bits of a floating point value as an unsigned integer
FloatToBits(f32 f)136 static u32 FloatToBits(f32 f)
137 {
138 union { f32 f;
139 u32 u; } converter;
140 converter.f = f;
141 return converter.u;
142 }
143
144 /// returns the bits of a floating point value as an unsigned integer
BitsToFloat(u32 u)145 static f32 BitsToFloat(u32 u)
146 {
147 union { f32 f;
148 u32 u; } converter;
149 converter.u = u;
150 return converter.f;
151 }
152
153 /*******************************************************************************
154 * ConvertFP32ToSmallFP
155 *
156 * @brief
157 * Converts 32 bit floating point value to one with the supplied
158 * representation. Code assume the dest format follows representation
159 * similar to the fp32 IEEE format.
160 *
161 * @return
162 * Converted value as an unsigned integer.
163 *******************************************************************************/
ConvertFP32ToSmallFP(f32 fp32,BOOL signBit,u32 expBits,u32 mantBits)164 u32 ConvertFP32ToSmallFP(
165 f32 fp32, ///< Original fp32 value
166 BOOL signBit, ///< Sign bit in destination format
167 u32 expBits, ///< Exponent bits in destination format
168 u32 mantBits) ///< Mantissa bits in destination format
169 {
170 u32 uiFp32, fp32Sign, fp32Exp, fp32Mant, fp32MantBits, fp32Bias;
171 u32 maxExp, bias;
172 u32 outFp;
173
174 // Extract relevant values from input value
175 uiFp32 = FloatToBits(fp32);
176 fp32Sign = (uiFp32 & 0x80000000) >> 31;
177 fp32Exp = (uiFp32 & 0x7F800000) >> 23;
178 fp32Mant = uiFp32 & 0x007FFFFF;
179 fp32MantBits = 23;
180 fp32Bias = 127;
181
182 // Compute exponent bias for destination format. This is also the max positive (and negative)
183 // unbiased exponents in the format.
184 bias = (1 << (expBits - 1)) - 1;
185
186 // Compute max exponent reserved for NaN and Infs
187 maxExp = (1 << expBits) - 1;
188
189 if (fp32Exp == 0xff)
190 {
191 // Handle NaNs and Infs first. According to the DX10 spec these get converted to NaNs
192 // and Infs in the lower precision format when available, otherwise they go to 0. We
193 // assume the destination format has representations for NaNs and Infs except for when
194 // there is no sign bit to represent signed NaN and Inf.
195 if ((signBit == TRUE) || (fp32Sign == 0))
196 {
197 outFp = (fp32Sign << (expBits + mantBits)) | (maxExp << mantBits) |
198 (fp32Mant >> (fp32MantBits - mantBits));
199 }
200 else
201 {
202 outFp = 0;
203 }
204 }
205 else if ((signBit == FALSE) && (fp32Sign == 1))
206 {
207 // Negative numbers go to zero if they can't be represented
208 outFp = 0;
209 }
210 else if (fp32Exp > (fp32Bias + bias))
211 {
212 // Too large to be represented in the destination format are made into signed MAX_FLOAT.
213 outFp = (fp32Sign << (expBits + mantBits)) | ((maxExp - 1) << mantBits) |
214 ((1 << mantBits) - 1);
215 }
216 else if (fp32Exp < (fp32Bias - (bias - 1)))
217 {
218 // Too small to be represented as a normalized number or it's zero
219 u32 shift;
220
221 // Shift amount is the difference between the fp32 exponent and the the minimum
222 // exponent in the dest format.
223 shift = fp32Bias - (bias - 1) - fp32Exp;
224
225 // Large enough shifts will generate 0
226 if (shift > (fp32MantBits + 1))
227 {
228 fp32Mant = 0;
229 }
230 else
231 {
232 // Add in hidden bit and right shift to align to new format
233 fp32Mant = (fp32Mant | 0x00800000) >> (fp32MantBits - mantBits);
234 fp32Mant = fp32Mant >> shift;
235 }
236
237 outFp = (fp32Sign << (expBits + mantBits)) | fp32Mant;
238 }
239 else
240 {
241 // Can be represented as a normalized number in the new format
242 outFp = (fp32Sign << (expBits + mantBits)) |
243 ((fp32Exp + bias - fp32Bias) << mantBits) |
244 (fp32Mant >> (fp32MantBits - mantBits));
245 }
246
247 // Sanity check
248 ASSERT((outFp & ~((1 << (signBit + expBits + mantBits)) - 1)) == 0x0);
249
250 return outFp;
251 }
252
253
ConvertFP32ToUnorm(f32 fp32,u32 numBits)254 u32 ConvertFP32ToUnorm(
255 f32 fp32, ///< fp32 value to convert
256 u32 numBits) ///< number of bits in destination unorm
257 {
258 u32 uiFp32, out, maxVal;
259 u32 fp32Sign, fp32Exp, fp32Mant;
260
261 // Extract relevant floating point parts
262 uiFp32 = FloatToBits(fp32);
263 fp32Sign = (uiFp32 & 0x80000000) >> 31;
264 fp32Exp = (uiFp32 & 0x7F800000) >> 23;
265 fp32Mant = uiFp32 & 0x007FFFFF;
266
267 // Maximum representable unorm
268 maxVal = (1 << numBits) - 1;
269
270 // Handle NaNs and Infs values separately
271 if (fp32Exp == 0xff)
272 {
273 // Nans and -Inf go to 0
274 if ((fp32Mant != 0x0) || (fp32Sign == 1))
275 {
276 out = 0;
277 }
278 else
279 {
280 // +Inf goes to max representable value
281 out = maxVal;
282 }
283 }
284 else if (fp32 > 1.0f)
285 {
286 out = maxVal;
287 }
288 else if (fp32 < 0.0f)
289 {
290 out = 0;
291 }
292 else
293 {
294 out = static_cast<u32>((fp32 * maxVal) + 0.5f);
295 }
296
297 ASSERT(out <= maxVal);
298
299 return out;
300 }
301
302 /*******************************************************************************
303 * PackClearColor
304 *
305 * @brief
306 * Pack the clear color for the given format into a 32 bit quantity.
307 *
308 * @return
309 * Packed 32-bit clear value.
310 *******************************************************************************/
PackClearColor(f32 r,f32 g,f32 b,f32 a,GX2SurfaceFormat format)311 u32 PackClearColor(f32 r, f32 g, f32 b, f32 a, ///< Clear color
312 GX2SurfaceFormat format) ///< Color format
313 {
314 u32 clearColor = 0;
315
316 if ((format == GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM))
317 {
318 u32 red, green, blue, alpha;
319
320 red = ConvertFP32ToUnorm(r, 8);
321 green = ConvertFP32ToUnorm(g, 8);
322 blue = ConvertFP32ToUnorm(b, 8);
323 alpha = ConvertFP32ToUnorm(a, 8);
324
325 clearColor = (alpha << 24) | (blue << 16) | (green << 8) | red;
326 }
327 else if ((format == GX2_SURFACE_FORMAT_TCS_R10_G10_B10_A2_UNORM) ||
328 (format == GX2_SURFACE_FORMAT_TCS_A2_B10_G10_R10_UNORM))
329 {
330 u32 red, green, blue, alpha;
331
332 red = ConvertFP32ToUnorm(r, 10);
333 green = ConvertFP32ToUnorm(g, 10);
334 blue = ConvertFP32ToUnorm(b, 10);
335 alpha = ConvertFP32ToUnorm(a, 2);
336
337 if (format == GX2_SURFACE_FORMAT_TCS_R10_G10_B10_A2_UNORM)
338 {
339 clearColor = (alpha << 30) | (blue << 20) | (green << 10) | red;
340 }
341 else if (format == GX2_SURFACE_FORMAT_TCS_A2_B10_G10_R10_UNORM)
342 {
343 clearColor = (blue << 22) | (green << 12) | (red << 2) | alpha;
344 }
345 }
346 else if (format == GX2_SURFACE_FORMAT_TC_R11_G11_B10_FLOAT)
347 {
348 u32 redFP11, greenFP11, blueFP10;
349
350 redFP11 = ConvertFP32ToSmallFP(r, FALSE, 5, 6);
351 greenFP11 = ConvertFP32ToSmallFP(g, FALSE, 5, 6);
352 blueFP10 = ConvertFP32ToSmallFP(b, FALSE, 5, 5);
353
354 clearColor = (blueFP10 << 22) | (greenFP11 << 11) | redFP11;
355 }
356 else if (format == GX2_SURFACE_FORMAT_TC_R16_G16_FLOAT)
357 {
358 u32 redFP16, greenFP16;
359
360 redFP16 = ConvertFP32ToSmallFP(r, GX2_TRUE, 5, 10);
361 greenFP16 = ConvertFP32ToSmallFP(g, GX2_TRUE, 5, 10);
362
363 clearColor = (greenFP16 << 16) | (redFP16);
364 }
365
366 else
367 {
368 // Not implemented yet.
369 ASSERT(FALSE);
370 }
371
372 return clearColor;
373 }
374
375 // Identify CB formats that can benefit by being cleared with the DB
ClearUsingDB(GX2ColorBuffer * colorBuffer,GX2UTRect * dstRect)376 GX2Boolean ClearUsingDB(GX2ColorBuffer *colorBuffer, GX2UTRect *dstRect)
377 {
378 GX2Surface *dstSurface = &colorBuffer->surface;
379 u32 dstMip = colorBuffer->viewMip;
380
381 // Can't apply this optimization unless we are clearing the entire surface
382 if ((dstRect->left != 0) ||
383 (dstRect->top != 0) ||
384 (dstRect->right != dstSurface->width >> dstMip) ||
385 (dstRect->bottom != dstSurface->height >> dstMip))
386 {
387 return GX2_FALSE;
388 }
389 else
390 {
391 switch (dstSurface->format)
392 {
393 case GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM:
394 case GX2_SURFACE_FORMAT_TCS_R10_G10_B10_A2_UNORM:
395 case GX2_SURFACE_FORMAT_TCS_A2_B10_G10_R10_UNORM:
396 case GX2_SURFACE_FORMAT_TC_R11_G11_B10_FLOAT:
397 case GX2_SURFACE_FORMAT_TC_R16_G16_FLOAT:
398 case GX2_SURFACE_FORMAT_TCD_R32_FLOAT:
399 break;
400 default:
401 return GX2_FALSE; // NO
402 }
403 }
404
405 switch (dstSurface->tileMode)
406 {
407 case GX2_TILE_MODE_1D_TILED_THIN1:
408 case GX2_TILE_MODE_2D_TILED_THIN1:
409 case GX2_TILE_MODE_2D_TILED_THIN2:
410 case GX2_TILE_MODE_2D_TILED_THIN4:
411 case GX2_TILE_MODE_2B_TILED_THIN1:
412 case GX2_TILE_MODE_2B_TILED_THIN2:
413 case GX2_TILE_MODE_2B_TILED_THIN4:
414 break;
415
416 default:
417 return GX2_FALSE;
418 }
419
420 if (dstSurface->aa != GX2_AA_MODE_1X)
421 {
422 return GX2_FALSE;
423 }
424
425 return GX2_TRUE;
426 }
427
IsValidDBFloat(u32 val)428 GX2Boolean IsValidDBFloat(u32 val)
429 {
430 GX2Boolean valid = GX2_TRUE;
431
432 // The following floating point values are not preserved by the DB when the depth value comes
433 // from vertex Z:
434 // - DeNorms are flushed to 0
435 // - NaNs (and Infs) are converted to 0
436 if (((val != 0x0) && ((val & 0x7F800000) == 0x0)) || // Denorms
437 ((val & 0x7F800000) == 0x7F800000)) // NaNs & Infs
438 {
439 valid = GX2_FALSE;
440 }
441
442 return valid;
443 }
444
GX2UTClearRectOp(GX2ColorBuffer * colorBuffer,GX2DepthBuffer * depthBuffer,f32 r,f32 g,f32 b,f32 a,f32 depthValue,u8 stencilValue,GX2ClearMode clearFlags,GX2HiStencilInfo * hiStencil,GX2UTRect * dstRect)445 void GX2UTClearRectOp(GX2ColorBuffer *colorBuffer, GX2DepthBuffer *depthBuffer,
446 f32 r, f32 g, f32 b, f32 a,
447 f32 depthValue, u8 stencilValue,
448 GX2ClearMode clearFlags, GX2HiStencilInfo *hiStencil,
449 GX2UTRect *dstRect)
450 {
451 u32 cbFirstSlice = 0;
452 u32 dbFirstSlice = 0;
453 u32 numSlices = 0;
454 u32 dstWidth, dstHeight, uDepthValue;
455 u32 dstMip, dstSlice;
456 GX2CompareFunction stencilFunc = GX2_COMPARE_NEVER;
457 GX2Boolean bColorAsDepth = GX2_FALSE;
458 GX2Boolean depthTestEnable = GX2_FALSE;
459 GX2Boolean stencilTestEnable = GX2_DISABLE;
460 GX2ColorBuffer cb;
461 GX2DepthBuffer db;
462
463 GX2UTDebugTagIndent(__func__);
464
465 // blt with width or height <= 0 does nothing
466 ASSERT((colorBuffer != NULL || depthBuffer != NULL) && (dstRect != NULL));
467 ASSERT((dstRect->bottom > dstRect->top) && (dstRect->right > dstRect->left) && "Invalid destination region!");
468 ASSERT(depthValue >= 0.0f && depthValue <= 1.0f);
469
470 // Verify the parameters
471 if (colorBuffer && depthBuffer)
472 {
473 ASSERT((colorBuffer->surface.width >> colorBuffer->viewMip) == (depthBuffer->surface.width >> depthBuffer->viewMip));
474 ASSERT((colorBuffer->surface.height >> colorBuffer->viewMip) == (depthBuffer->surface.height >> depthBuffer->viewMip));
475 ASSERT(colorBuffer->surface.aa == depthBuffer->surface.aa);
476 ASSERT(colorBuffer->viewNumSlices == depthBuffer->viewNumSlices);
477 }
478
479
480 if (clearFlags & GX2_CLEAR_D_REG) {
481 ASSERT(depthBuffer != NULL);
482 GX2SetClearDepth(depthBuffer, depthValue);
483 }
484 if (clearFlags & GX2_CLEAR_S_REG) {
485 ASSERT(depthBuffer != NULL);
486 GX2SetClearStencil(depthBuffer, stencilValue);
487 }
488
489 // Initialize the resources needed to clear surfaces.
490 // This function only does work the first time it's called.
491 GX2UTClearSurfaceRectInit();
492
493 uDepthValue = FloatToBits(depthValue);
494
495 // Some color-only clears can be accelerated by using the depth unit instead
496 if (colorBuffer != NULL && depthBuffer == NULL &&
497 ClearUsingDB(colorBuffer, dstRect))
498 {
499 u32 dv = 0;
500
501 if (colorBuffer->surface.format == GX2_SURFACE_FORMAT_TCD_R32_FLOAT)
502 {
503 // only use red component of clearcolor
504 dv = FloatToBits(r);
505 }
506 else
507 {
508 // PackColorToFp32
509 dv = PackClearColor(r, g, b, a, colorBuffer->surface.format);
510 }
511
512 if (IsValidDBFloat(dv))
513 {
514 // DB can only handle valid floats
515 uDepthValue = dv;
516 dstSlice = colorBuffer->viewFirstSlice;
517 dstMip = colorBuffer->viewMip;
518
519 // Populate db and associated surface accordingly
520 db.surface = colorBuffer->surface;
521 db.surface.format = GX2_SURFACE_FORMAT_TCD_R32_FLOAT;
522 db.surface.use = GX2_SURFACE_USE_DEPTH_BUFFER;
523 db.viewMip = dstMip;
524 db.viewFirstSlice = dstSlice;
525 db.viewNumSlices = colorBuffer->viewNumSlices;
526 db.hiZPtr = NULL;
527 db.hiZSize = 0;
528
529 // Instruct logic below that we are clearing depth only
530 clearFlags = GX2_CLEAR_DEPTH;
531 colorBuffer = NULL;
532 depthBuffer = &db;
533
534 GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR_COLOR_AS_DEPTH, GX2_ENABLE);
535 bColorAsDepth = GX2_TRUE;
536 }
537 }
538
539 // Only set if Color Writes are on
540 if (colorBuffer != NULL)
541 {
542 // If AA, must have auxPtr + auxSize
543 ASSERT(colorBuffer->surface.aa == GX2_AA_MODE_1X ||
544 (colorBuffer->auxPtr != NULL && colorBuffer->auxSize != 0));
545
546 //Currently we do not support clearing BC formats. Use GX2ClearColor() for this.
547 ASSERT(!GX2SurfaceIsCompressed(colorBuffer->surface.format));
548 ASSERT((colorBuffer->viewFirstSlice + colorBuffer->viewNumSlices <= colorBuffer->surface.depth));
549
550 dstMip = colorBuffer->viewMip;
551 dstWidth = GX2Max(1, colorBuffer->surface.width >> dstMip);
552 dstHeight = GX2Max(1, colorBuffer->surface.height >> dstMip);
553
554 // Create shallow copy of dest surface to be used as render target
555 cb = *colorBuffer;
556 cb.surface.use = GX2_SURFACE_USE_COLOR_BUFFER_TEXTURE;
557 cb.viewNumSlices = 1;
558 // Will reinit regs later
559
560 if (cb.surface.format == GX2_SURFACE_FORMAT_TC_R16_G16_FLOAT)
561 {
562 // Change format to B8G8R8A8_UNORM and PackFP16ToRGBA8
563 cb.surface.format = GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM;
564
565 // Adjust clear color
566 u32 packedColor;
567
568 // Pack 2 channel FP16 clear color into a 32 bit quantity
569 packedColor = PackClearColor(r, g, b, a,
570 GX2_SURFACE_FORMAT_TC_R16_G16_FLOAT);
571
572 // Extract RGBA8 values from packed color and convert to clear values
573 a = ((packedColor >> 24) & 0xff) * (1.0f / 255.0f);
574 b = ((packedColor >> 16) & 0xff) * (1.0f / 255.0f);
575 g = ((packedColor >> 8) & 0xff) * (1.0f / 255.0f);
576 r = (packedColor & 0xff) * (1.0f / 255.0f);
577 }
578
579 // Enable Color Writes
580 GX2SetColorControl(GX2_LOGIC_OP_COPY,
581 0, //disable blending
582 GX2_DISABLE,
583 GX2_ENABLE);
584
585 numSlices = colorBuffer->viewNumSlices;
586 cbFirstSlice = cb.viewFirstSlice;
587 }
588 else
589 {
590 // Use Depth Buffer dimensions
591 dstMip = depthBuffer->viewMip;
592 dstWidth = GX2Max(1, depthBuffer->surface.width >> dstMip);
593 dstHeight = GX2Max(1, depthBuffer->surface.height >> dstMip);
594
595 // Disable Color Writes
596 GX2SetColorControl(GX2_LOGIC_OP_COPY,
597 0, //disable blending
598 GX2_DISABLE,
599 GX2_DISABLE);
600
601 GX2SetAAMode(depthBuffer->surface.aa);
602 }
603
604 // Render to destination surface dimensions
605 GX2SetViewport(0, 0, (f32)dstWidth, (f32)dstHeight, 0.0f, 1.0f);
606 GX2SetScissor(0, 0, dstWidth, dstHeight);
607
608 // Only set if Depth or Stencil Writes are on
609 if (depthBuffer != NULL)
610 {
611 ASSERT((depthBuffer->viewFirstSlice + depthBuffer->viewNumSlices <= depthBuffer->surface.depth));
612
613 db = *depthBuffer;
614 numSlices = depthBuffer->viewNumSlices;
615 db.viewNumSlices = 1;
616 // Will reinit regs later
617
618 if (clearFlags & GX2_CLEAR_DEPTH)
619 depthTestEnable = GX2_TRUE;
620 else
621 depthTestEnable = GX2_FALSE;
622
623 if (clearFlags & GX2_CLEAR_STENCIL)
624 {
625 GX2SetStencilMask(0xff, //preMaskFront
626 0xff, //writeMaskFront
627 stencilValue, //refFront
628 0xff, //preMaskBack
629 0xff, //writeMaskBack
630 stencilValue);//refBack
631 stencilFunc = GX2_COMPARE_ALWAYS;
632 stencilTestEnable = GX2_ENABLE;
633 }
634 else
635 {
636 stencilFunc = GX2_COMPARE_NEVER;
637 stencilTestEnable = GX2_DISABLE;
638 }
639
640 // fast clears require HiZ and all edges on micro-tile boundaries
641 if ((depthBuffer->hiZPtr) &&
642 !((dstRect->bottom | dstRect->top | dstRect->left | dstRect->right) & 0x7))
643
644 {
645 GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR_HIZ, GX2_ENABLE);
646 }
647
648 dbFirstSlice = db.viewFirstSlice;
649 }
650
651 // Depth Writes
652 GX2SetDepthStencilControl(depthTestEnable, //depthTestEnable
653 GX2_ENABLE, //depthWriteEnable
654 GX2_COMPARE_ALWAYS, //depthFunc
655 stencilTestEnable, //stencilTestEnable
656 stencilTestEnable, //backStencilEnable
657
658 stencilFunc, //frontStencilFunc
659 GX2_STENCIL_REPLACE,//frontStencilZPass
660 GX2_STENCIL_REPLACE,//frontStencilZFail
661 GX2_STENCIL_REPLACE,//frontStencilFail
662
663 stencilFunc, //backStencilFunc
664 GX2_STENCIL_REPLACE,//backStencilZPass
665 GX2_STENCIL_REPLACE,//backStencilZFail
666 GX2_STENCIL_REPLACE //backStencilFail
667 );
668
669 if (hiStencil != NULL)
670 GX2SetHiStencilInfo(hiStencil);
671
672 // Only one clear shader
673 u32 shaderIdx = 0;
674
675 // Set shaders
676 GX2SetFetchShader(&fetchShader);
677 GX2SetVertexShader(g_clearSurfaceShader[shaderIdx].pVertexShader);
678 GX2SetPixelShader(g_clearSurfaceShader[shaderIdx].pPixelShader);
679
680 // Set the uniforms to be used by the vertex shader and pixel shader
681 f32 position_base_scale[] =
682 {
683 (f32)dstRect->left,
684 (f32)dstRect->top,
685 (f32)dstRect->right - (f32)dstRect->left,
686 (f32)dstRect->bottom - (f32)dstRect->top,
687 };
688
689 for (int i = 0; i < 4; i++)
690 {
691 f32 pos[] = {
692 position_base_scale[0] + position_base_scale[2] * CLEAR_SURFACE_RECT_POSITION_DATA[i].position[0],
693 position_base_scale[1] + position_base_scale[3] * CLEAR_SURFACE_RECT_POSITION_DATA[i].position[1],
694 BitsToFloat(uDepthValue),
695 1.0f
696 };
697
698 GX2SetVertexUniformReg(g_clearSurfaceShader[shaderIdx].u_positionLocation + i*4, 1*4, pos);
699 }
700
701 f32 clearColor[] = {r, g, b, a};
702
703 GX2SetPixelUniformReg(g_clearSurfaceShader[shaderIdx].u_clearColorLocation, 1*4, clearColor);
704
705 for (int slice = 0; slice < numSlices; slice++)
706 {
707 // Reset the color buffer to the next slice
708 if (colorBuffer)
709 {
710 cb.viewFirstSlice = cbFirstSlice + slice;
711 GX2InitColorBufferRegs(&cb);
712 GX2SetColorBuffer(&cb, GX2_RENDER_TARGET_0);
713 }
714
715 // Reset the depth buffer to the next slice
716 if (depthBuffer)
717 {
718 db.viewFirstSlice = dbFirstSlice + slice;
719 GX2InitDepthBufferRegs(&db);
720 GX2SetDepthBuffer(&db);
721 }
722
723 //Call the render function pointer
724 GX2Draw(GX2_PRIMITIVE_RECTS, VERTEX_COUNT);
725 }
726
727 if (bColorAsDepth)
728 {
729 GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR_COLOR_AS_DEPTH, GX2_DISABLE);
730 }
731
732 if (colorBuffer != NULL)
733 {
734 if ( colorBuffer->viewMip )
735 GX2Invalidate(GX2_INVALIDATE_COLOR_BUFFER, colorBuffer->surface.mipPtr, colorBuffer->surface.mipSize);
736 else
737 GX2Invalidate(GX2_INVALIDATE_COLOR_BUFFER, colorBuffer->surface.imagePtr, colorBuffer->surface.imageSize);
738 }
739
740 if (depthBuffer != NULL)
741 {
742 if ( depthBuffer->viewMip )
743 GX2Invalidate(GX2_INVALIDATE_DEPTH_BUFFER, depthBuffer->surface.mipPtr, depthBuffer->surface.mipSize);
744 else
745 GX2Invalidate(GX2_INVALIDATE_DEPTH_BUFFER, depthBuffer->surface.imagePtr, depthBuffer->surface.imageSize);
746
747 // Disable HiZ optimization
748 if ((depthBuffer->hiZPtr) &&
749 !((dstRect->bottom | dstRect->top | dstRect->left | dstRect->right) & 0x7))
750
751 {
752 GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR_HIZ, GX2_DISABLE);
753 }
754 }
755 GX2UTDebugTagUndent();
756 }
757
758 // Clears a region of the HiStencil buffer
759 // Setting the rectangular regions to a top-left of (0,0) and a bottom-right of (width, height)
760 // will blt the entire surface without any flipping (the right and bottom are exclusive).
761 // Must be invoked to invalidate HiStencil when changing pretest state during a frame.
GX2UTInvalidateHiStencilRect(GX2UTRect * dstRect,GX2DepthBuffer * depthBuffer)762 void GX2UTInvalidateHiStencilRect(GX2UTRect *dstRect, GX2DepthBuffer *depthBuffer)
763 {
764 u32 dstWidth, dstHeight;
765
766 GX2UTDebugTagIndent(__func__);
767
768 //Disable state shadowing. If your app is using state shadowing,
769 //you will need to restore the context after calling this function.
770 GX2SetContextState(NULL);
771
772 // Initialize the resources needed to clear surfaces.
773 // This function only does work the first time it's called.
774 GX2UTClearSurfaceRectInit();
775
776 // must have a deptBuffer w/ hiZPtr
777 // blt with width or height <= 0 does nothing
778 ASSERT((depthBuffer != NULL) && (depthBuffer->hiZPtr != NULL) && (dstRect != NULL));
779 ASSERT((dstRect->bottom > dstRect->top) && (dstRect->right > dstRect->left) && "Invalid destination region!");
780
781 // Disable Color Writes
782 GX2SetColorControl(GX2_LOGIC_OP_COPY,
783 0, //disable blending
784 GX2_DISABLE,
785 GX2_DISABLE);
786
787 // Disable Depth and Stencil Writes
788 GX2SetDepthStencilControl(GX2_DISABLE, // depthEnable
789 GX2_DISABLE, // depthWriteEnable
790 GX2_COMPARE_ALWAYS, // depthFunc
791 GX2_DISABLE, // stencilTestEnable
792 GX2_DISABLE, // backStencilEnable
793 GX2_COMPARE_ALWAYS, // frontStencilFunc
794 GX2_STENCIL_KEEP, // frontStencilZPass
795 GX2_STENCIL_KEEP, // frontStencilZFail
796 GX2_STENCIL_KEEP, // frontStencilFail
797 GX2_COMPARE_ALWAYS, // backStencilFunc
798 GX2_STENCIL_KEEP, // backStencilZPass
799 GX2_STENCIL_KEEP, // backStencilZFail
800 GX2_STENCIL_KEEP); // backStencilFail
801 GX2SetStencilMask(0x00, //preMaskFront
802 0x00, //writeMaskFront
803 0x00, //refFront
804 0x00, //preMaskBack
805 0x00, //writeMaskBack
806 0x00);//refBack
807
808 dstWidth = depthBuffer->surface.width;
809 dstHeight = depthBuffer->surface.height;
810
811 // Render to destination surface dimensions
812 GX2SetViewport(dstRect->left, dstRect->bottom, (f32)dstWidth, (f32)dstHeight, 0.0f, 1.0f);
813 GX2SetScissor(dstRect->left, dstRect->bottom, dstWidth, dstHeight);
814
815 GX2SetDepthBuffer(depthBuffer);
816
817 // Create HiStencilInfo that will clear the HiStencil pretest results
818 GX2HiStencilInfo hiStencilInfo;
819 hiStencilInfo.state[0].function = GX2_COMPARE_ALWAYS;
820 hiStencilInfo.state[0].reference = 0;
821 hiStencilInfo.state[0].mask = 0xFF;
822 hiStencilInfo.state[0].enable = GX2_FALSE;
823 hiStencilInfo.state[1].function = GX2_COMPARE_ALWAYS;
824 hiStencilInfo.state[1].reference = 0;
825 hiStencilInfo.state[1].mask = 0xFF;
826 hiStencilInfo.state[1].enable = GX2_FALSE;
827 GX2InitHiStencilInfoRegs(&hiStencilInfo);
828 GX2SetHiStencilInfo(&hiStencilInfo);
829
830 // Only one clear shader
831 u32 shaderIdx = 0;
832
833 // Set shaders
834 // NOTE: No fetch shader is needed for our shaders, see GLSL for details.
835 GX2SetVertexShader(g_clearSurfaceShader[shaderIdx].pVertexShader);
836 GX2SetPixelShader(g_clearSurfaceShader[shaderIdx].pPixelShader);
837
838 // Set the uniforms to be used by the vertex shader and pixel shader
839 f32 position_base_scale[] =
840 {
841 -1.0f + 2.0f * (f32)dstRect->left / (f32)dstWidth,
842 1.0f - 2.0f * (f32)dstRect->top / (f32)dstHeight,
843 2.0f * (f32)(dstRect->right - dstRect->left) / (f32)dstWidth,
844 -2.0f * (f32)(dstRect->bottom - dstRect->top) / (f32)dstHeight,
845 };
846
847 for (int i = 0; i < 4; i++)
848 {
849 f32 pos[] = {
850 position_base_scale[0] + position_base_scale[2] * CLEAR_SURFACE_RECT_POSITION_DATA[i].position[0],
851 position_base_scale[1] + position_base_scale[3] * CLEAR_SURFACE_RECT_POSITION_DATA[i].position[1],
852 0.0,
853 1.0
854 };
855
856 GX2SetVertexUniformReg(g_clearSurfaceShader[shaderIdx].u_positionLocation + i*4, 1*4, pos);
857 }
858
859 // We don't need to set clearColor or depthValue because we aren't writing those buffers
860
861 //Call the render function pointer
862 GX2Draw(GX2_PRIMITIVE_TRIANGLE_STRIP, VERTEX_COUNT);
863
864 GX2UTDebugTagUndent();
865 }
866
867 //Setup all of the constant renderstate needed for the clear
GX2UTSetClearState(GX2Boolean enable)868 void GX2UTSetClearState(GX2Boolean enable)
869 {
870 if (enable)
871 {
872 // If your application's steady state can be set to GX2UT common state
873 // using a small number of discrete GX2 calls, then customize here
874 // instead of using GX2UTSetCommonState()
875 GX2UTSetCommonState();
876
877 // Enable any special GX2 state
878 GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR, GX2_ENABLE);
879 // That call just clobbered RasterizerClipControl
880 }
881 else
882 {
883 // Disable any special GX2 state
884 GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR, GX2_DISABLE);
885
886 // The purpose of the following is to return the context to GX2 default
887 // state. Integration with your application's state management may
888 // differ. The clear operation itself clobbered DepthStencilControl
889 // and ColorControl
890 GX2SetDepthStencilControl(GX2_TRUE, //depthTestEnable
891 GX2_TRUE, //depthWriteEnable
892 GX2_COMPARE_LESS, //depthFunc
893 GX2_FALSE, //stencilTestEnable
894 GX2_FALSE, //backStencilEnable
895 GX2_COMPARE_ALWAYS, //frontStencilFunc
896 GX2_STENCIL_REPLACE, //frontStencilZPass
897 GX2_STENCIL_REPLACE, //frontStencilZFail
898 GX2_STENCIL_REPLACE, //frontStencilFail
899 GX2_COMPARE_ALWAYS, //backStencilFunc
900 GX2_STENCIL_REPLACE, //backStencilZPass
901 GX2_STENCIL_REPLACE, //backStencilZFail
902 GX2_STENCIL_REPLACE);//backStencilFail
903
904 GX2SetColorControl(GX2_LOGIC_OP_COPY, GX2_DISABLE, GX2_DISABLE, GX2_ENABLE);
905 }
906 }
907
GX2UTSetupColorAuxBufferOp(GX2ColorBuffer * colorBuffer)908 void GX2UTSetupColorAuxBufferOp(GX2ColorBuffer *colorBuffer)
909 {
910 if (colorBuffer->auxPtr)
911 {
912 u32 ctileOffset = colorBuffer->_regs[4]; // cmask_offset
913 u32 ctileSize = colorBuffer->auxSize - ctileOffset;
914 u8* ctilePtr = (u8*)colorBuffer->auxPtr + ctileOffset;
915 ASSERT((ctileSize & 0x1FF) == 0 && "Invalid MSAA Color Buffer auxSize!");
916
917 GX2ColorBuffer tmpBuf;
918 u32 width = 16;
919 u32 height = ctileSize / 4 / width;
920
921 GX2InitColorBuffer(&tmpBuf, width, height, GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM, GX2_AA_MODE_1X);
922 GX2InitColorBufferPtr(&tmpBuf, ctilePtr);
923 tmpBuf.surface.tileMode = GX2_TILE_MODE_1D_TILED_THIN1;
924 GX2CalcSurfaceSizeAndAlignment(&tmpBuf.surface);
925 ASSERT(tmpBuf.surface.imageSize == ctileSize && "CMask Tile Size must match calculated image size!");
926 GX2InitColorBufferRegs(&tmpBuf);
927
928 GX2UTClearOp(&tmpBuf, NULL,
929 GX2_AUX_BUFFER_CLEAR_VALUE/255.0f,
930 GX2_AUX_BUFFER_CLEAR_VALUE/255.0f,
931 GX2_AUX_BUFFER_CLEAR_VALUE/255.0f,
932 GX2_AUX_BUFFER_CLEAR_VALUE/255.0f,
933 0.0f, 0u, GX2_CLEAR_NONE, NULL);
934 }
935 }
936
937