1 /*---------------------------------------------------------------------------*
2 
3   Copyright 2010-2014 Nintendo.  All rights reserved.
4 
5   These coded instructions, statements, and computer programs contain
6   proprietary information of Nintendo of America Inc. and/or Nintendo
7   Company Ltd., and are protected by Federal copyright law.  They may
8   not be disclosed to third parties or copied or duplicated in any form,
9   in whole or in part, without the prior written consent of Nintendo.
10 
11  *---------------------------------------------------------------------------*/
12 
13 #include <stdio.h>
14 #include <string.h>
15 #include <math.h>
16 
17 #if defined(WIN32) || defined(WIN64)
18 #include <pc/gx2.h>
19 #include <pc/demo.h>
20 #endif
21 #include <cafe/os.h>
22 #include <cafe/mem.h>
23 #include <cafe/gx2.h>
24 #include <cafe/demo.h>
25 
26 #include <cafe/gfd.h>
27 #include <cafe/gx2ut.h>
28 
29 //Include generated shaders
30 #include "shaders/headers/gx2utClearSurfaceRect.h"
31 
32 /*
33  * 0 - Simple clear shader
34  */
35 static const GX2VertexShader* const VS_SHADERS[] = { &gx2utClearSurfaceRect_VS};
36 
37 static const GX2PixelShader* const PS_SHADERS[] = { &gx2utClearSurfaceRect_PS};
38 
39 static const u32 NUM_SHADERS = 1;
40 
41 typedef struct _ClearSurfaceShader {
42     // These variables hold the three types of shaders needed for a call to
43     // GX2SetShaders. The vertex and pixel shaders are loaded from the
44     // header, but since the fetch shader is generated at run-time
45     // it must be handled slightly differently.
46     const GX2VertexShader *pVertexShader;
47     const GX2PixelShader *pPixelShader;
48 
49     // The register locations where the offset uniforms are stored for
50     // the pixel and vertex shaders.
51     u32 u_positionLocation;
52     u32 u_clearColorLocation;
53 } ClearSurfaceShader;
54 
55 //For now, share one fetch shader buffer for all shaders, since it should be identical
56 #define FETCH_SHADER_SIZE 32  //hard code this value for now
57 ALIGNVAR(GX2_SHADER_ALIGNMENT) static u8 g_GX2UTFetchShader[FETCH_SHADER_SIZE];
58 
59 
60 static ClearSurfaceShader g_clearSurfaceShader[NUM_SHADERS];
61 static GX2FetchShader fetchShader;
62 typedef struct _VtxFmtF32x2 {
63     f32 position[2];
64 } VtxFmtF32x2;
65 
66 // This data works for both RECT and TRISTRIP
67 static const VtxFmtF32x2 CLEAR_SURFACE_RECT_POSITION_DATA[] =
68 {
69     {0.0f,  0.0f},
70     {1.0f,  0.0f},
71     {0.0f,  1.0f},
72     {1.0f,  1.0f}
73 };
74 
75 static const u32 VERTEX_COUNT = sizeof(CLEAR_SURFACE_RECT_POSITION_DATA)
76                          / sizeof(CLEAR_SURFACE_RECT_POSITION_DATA[0]);
77 
78 // Initializes how surfaces will be copied
GX2UTClearSurfaceRectInit(void)79 void GX2UTClearSurfaceRectInit(void)
80 {
81     static GX2Boolean initDone = GX2_FALSE;
82 
83     if (initDone == GX2_TRUE)
84     {
85         //OSReport("Skipping init in GX2UTClearSurfaceRectInit\n");
86         return;
87     }
88 
89     // Setup shaders
90     u32 i;
91 
92     GX2NotifyMemAlloc(g_GX2UTFetchShader,
93                       FETCH_SHADER_SIZE,
94                       GX2_SHADER_ALIGNMENT);
95 
96     for (i = 0; i < NUM_SHADERS; ++i)
97     {
98         g_clearSurfaceShader[i].pVertexShader = VS_SHADERS[i];
99         g_clearSurfaceShader[i].pPixelShader = PS_SHADERS[i];
100 
101         GX2Invalidate(GX2_INVALIDATE_CPU_SHADER,
102             g_clearSurfaceShader[i].pVertexShader->shaderPtr,
103             g_clearSurfaceShader[i].pVertexShader->shaderSize);
104 
105         GX2NotifyMemAlloc(g_clearSurfaceShader[i].pVertexShader->shaderPtr,
106             g_clearSurfaceShader[i].pVertexShader->shaderSize,
107             GX2_SHADER_ALIGNMENT);
108 
109         GX2Invalidate(GX2_INVALIDATE_CPU_SHADER,
110             g_clearSurfaceShader[i].pPixelShader->shaderPtr,
111             g_clearSurfaceShader[i].pPixelShader->shaderSize);
112 
113         GX2NotifyMemAlloc(g_clearSurfaceShader[i].pPixelShader->shaderPtr,
114             g_clearSurfaceShader[i].pPixelShader->shaderSize,
115             GX2_SHADER_ALIGNMENT);
116 
117         // Lookup the uniform locations in the vertex shader and pixel shader.
118         // The shader author chose the names "u_positions", "u_depth", and "u_clearColor"
119         g_clearSurfaceShader[i].u_positionLocation =
120             (u32)GX2GetVertexUniformVarOffset(g_clearSurfaceShader[i].pVertexShader, "u_positions");
121         g_clearSurfaceShader[i].u_clearColorLocation =
122             (u32)GX2GetPixelUniformVarOffset(g_clearSurfaceShader[i].pPixelShader, "u_clearColor");
123         ASSERT((g_clearSurfaceShader[i].u_positionLocation != GX2_UNIFORM_VAR_INVALID_OFFSET)
124             && (g_clearSurfaceShader[i].u_clearColorLocation != GX2_UNIFORM_VAR_INVALID_OFFSET)
125             && "Couldn't find the correct vertex and pixel shader uniforms.");
126 
127     }
128 
129     ASSERT(GX2CalcFetchShaderSize(0) <= sizeof(g_GX2UTFetchShader) && "g_GX2UTFetchShader too small!\n");
130     GX2InitFetchShader(&fetchShader, g_GX2UTFetchShader, 0, NULL);
131 
132     initDone = GX2_TRUE;
133 }
134 
135 /// returns the bits of a floating point value as an unsigned integer
FloatToBits(f32 f)136 static u32 FloatToBits(f32 f)
137     {
138         union { f32 f;
139                 u32 u; } converter;
140         converter.f = f;
141         return converter.u;
142     }
143 
144 /// returns the bits of a floating point value as an unsigned integer
BitsToFloat(u32 u)145 static f32 BitsToFloat(u32 u)
146     {
147         union { f32 f;
148                 u32 u; } converter;
149         converter.u = u;
150         return converter.f;
151     }
152 
153 /*******************************************************************************
154 *   ConvertFP32ToSmallFP
155 *
156 *   @brief
157 *      Converts 32 bit floating point value to one with the supplied
158 *      representation.  Code assume the dest format follows representation
159 *      similar to the fp32 IEEE format.
160 *
161 *   @return
162 *      Converted value as an unsigned integer.
163 *******************************************************************************/
ConvertFP32ToSmallFP(f32 fp32,BOOL signBit,u32 expBits,u32 mantBits)164 u32 ConvertFP32ToSmallFP(
165      f32 fp32,      ///< Original fp32 value
166      BOOL signBit,  ///< Sign bit in destination format
167      u32 expBits,   ///< Exponent bits in destination format
168      u32 mantBits)  ///< Mantissa bits in destination format
169 {
170     u32 uiFp32, fp32Sign, fp32Exp, fp32Mant, fp32MantBits, fp32Bias;
171     u32 maxExp, bias;
172     u32 outFp;
173 
174     // Extract relevant values from input value
175     uiFp32 = FloatToBits(fp32);
176     fp32Sign = (uiFp32 & 0x80000000) >> 31;
177     fp32Exp = (uiFp32 & 0x7F800000) >> 23;
178     fp32Mant = uiFp32 & 0x007FFFFF;
179     fp32MantBits = 23;
180     fp32Bias = 127;
181 
182     // Compute exponent bias for destination format.  This is also the max positive (and negative)
183     // unbiased exponents in the format.
184     bias = (1 << (expBits - 1)) - 1;
185 
186     // Compute max exponent reserved for NaN and Infs
187     maxExp = (1 << expBits) - 1;
188 
189     if (fp32Exp == 0xff)
190     {
191         // Handle NaNs and Infs first.  According to the DX10 spec these get converted to NaNs
192         // and Infs in the lower precision format when available, otherwise they go to 0.  We
193         // assume the destination format has representations for NaNs and Infs except for when
194         // there is no sign bit to represent signed NaN and Inf.
195         if ((signBit == TRUE) || (fp32Sign == 0))
196         {
197             outFp = (fp32Sign << (expBits + mantBits)) | (maxExp << mantBits) |
198                 (fp32Mant >> (fp32MantBits - mantBits));
199         }
200         else
201         {
202             outFp = 0;
203         }
204     }
205     else if ((signBit == FALSE) && (fp32Sign == 1))
206     {
207         // Negative numbers go to zero if they can't be represented
208         outFp = 0;
209     }
210     else if (fp32Exp > (fp32Bias + bias))
211     {
212         // Too large to be represented in the destination format are made into signed MAX_FLOAT.
213         outFp = (fp32Sign << (expBits + mantBits)) | ((maxExp - 1) << mantBits) |
214             ((1 << mantBits) - 1);
215     }
216     else if (fp32Exp < (fp32Bias - (bias - 1)))
217     {
218         // Too small to be represented as a normalized number or it's zero
219         u32 shift;
220 
221         // Shift amount is the difference between the fp32 exponent and the the minimum
222         // exponent in the dest format.
223         shift = fp32Bias - (bias - 1) - fp32Exp;
224 
225         // Large enough shifts will generate 0
226         if (shift > (fp32MantBits + 1))
227         {
228             fp32Mant = 0;
229         }
230         else
231         {
232             // Add in hidden bit and right shift to align to new format
233             fp32Mant = (fp32Mant | 0x00800000) >> (fp32MantBits - mantBits);
234             fp32Mant = fp32Mant >> shift;
235         }
236 
237         outFp = (fp32Sign << (expBits + mantBits)) | fp32Mant;
238     }
239     else
240     {
241         // Can be represented as a normalized number in the new format
242         outFp = (fp32Sign << (expBits + mantBits)) |
243             ((fp32Exp + bias - fp32Bias) << mantBits) |
244             (fp32Mant >> (fp32MantBits - mantBits));
245     }
246 
247     // Sanity check
248     ASSERT((outFp & ~((1 << (signBit + expBits + mantBits)) - 1)) == 0x0);
249 
250     return outFp;
251 }
252 
253 
ConvertFP32ToUnorm(f32 fp32,u32 numBits)254 u32 ConvertFP32ToUnorm(
255     f32 fp32,      ///< fp32 value to convert
256     u32 numBits)   ///< number of bits in destination unorm
257 {
258     u32 uiFp32, out, maxVal;
259     u32 fp32Sign, fp32Exp, fp32Mant;
260 
261     // Extract relevant floating point parts
262     uiFp32 = FloatToBits(fp32);
263     fp32Sign = (uiFp32 & 0x80000000) >> 31;
264     fp32Exp  = (uiFp32 & 0x7F800000) >> 23;
265     fp32Mant = uiFp32 & 0x007FFFFF;
266 
267     // Maximum representable unorm
268     maxVal = (1 << numBits) - 1;
269 
270     // Handle NaNs and Infs values separately
271     if (fp32Exp == 0xff)
272     {
273         // Nans and -Inf go to 0
274         if ((fp32Mant != 0x0) || (fp32Sign == 1))
275         {
276             out = 0;
277         }
278         else
279         {
280             // +Inf goes to max representable value
281             out = maxVal;
282         }
283     }
284     else if (fp32 > 1.0f)
285     {
286         out = maxVal;
287     }
288     else if (fp32 < 0.0f)
289     {
290         out = 0;
291     }
292     else
293     {
294         out = static_cast<u32>((fp32 * maxVal) + 0.5f);
295     }
296 
297     ASSERT(out <= maxVal);
298 
299     return out;
300 }
301 
302 /*******************************************************************************
303 *   PackClearColor
304 *
305 *   @brief
306 *       Pack the clear color for the given format into a 32 bit quantity.
307 *
308 *   @return
309 *       Packed 32-bit clear value.
310 *******************************************************************************/
PackClearColor(f32 r,f32 g,f32 b,f32 a,GX2SurfaceFormat format)311 u32 PackClearColor(f32 r, f32 g, f32 b, f32 a, ///< Clear color
312                    GX2SurfaceFormat format)    ///< Color format
313 {
314     u32 clearColor = 0;
315 
316     if ((format == GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM))
317     {
318         u32 red, green, blue, alpha;
319 
320         red   = ConvertFP32ToUnorm(r, 8);
321         green = ConvertFP32ToUnorm(g, 8);
322         blue  = ConvertFP32ToUnorm(b, 8);
323         alpha = ConvertFP32ToUnorm(a, 8);
324 
325         clearColor = (alpha << 24) | (blue << 16) | (green << 8) | red;
326     }
327     else if ((format == GX2_SURFACE_FORMAT_TCS_R10_G10_B10_A2_UNORM) ||
328              (format == GX2_SURFACE_FORMAT_TCS_A2_B10_G10_R10_UNORM))
329     {
330         u32 red, green, blue, alpha;
331 
332         red =   ConvertFP32ToUnorm(r, 10);
333         green = ConvertFP32ToUnorm(g, 10);
334         blue =  ConvertFP32ToUnorm(b, 10);
335         alpha = ConvertFP32ToUnorm(a, 2);
336 
337         if (format == GX2_SURFACE_FORMAT_TCS_R10_G10_B10_A2_UNORM)
338         {
339             clearColor = (alpha << 30) | (blue << 20) | (green << 10) | red;
340         }
341         else if (format == GX2_SURFACE_FORMAT_TCS_A2_B10_G10_R10_UNORM)
342         {
343             clearColor = (blue << 22) | (green << 12) | (red << 2) | alpha;
344         }
345     }
346     else if (format == GX2_SURFACE_FORMAT_TC_R11_G11_B10_FLOAT)
347     {
348         u32 redFP11, greenFP11, blueFP10;
349 
350         redFP11 =   ConvertFP32ToSmallFP(r, FALSE, 5, 6);
351         greenFP11 = ConvertFP32ToSmallFP(g, FALSE, 5, 6);
352         blueFP10 =  ConvertFP32ToSmallFP(b, FALSE, 5, 5);
353 
354         clearColor = (blueFP10 << 22) | (greenFP11 << 11) | redFP11;
355     }
356     else if (format == GX2_SURFACE_FORMAT_TC_R16_G16_FLOAT)
357     {
358         u32 redFP16, greenFP16;
359 
360         redFP16 =   ConvertFP32ToSmallFP(r, GX2_TRUE, 5, 10);
361         greenFP16 = ConvertFP32ToSmallFP(g, GX2_TRUE, 5, 10);
362 
363         clearColor = (greenFP16 << 16) | (redFP16);
364     }
365 
366     else
367     {
368         // Not implemented yet.
369         ASSERT(FALSE);
370     }
371 
372     return clearColor;
373 }
374 
375 // Identify CB formats that can benefit by being cleared with the DB
ClearUsingDB(GX2ColorBuffer * colorBuffer,GX2UTRect * dstRect)376 GX2Boolean ClearUsingDB(GX2ColorBuffer *colorBuffer, GX2UTRect *dstRect)
377 {
378     GX2Surface *dstSurface = &colorBuffer->surface;
379     u32 dstMip = colorBuffer->viewMip;
380 
381     // Can't apply this optimization unless we are clearing the entire surface
382     if ((dstRect->left != 0) ||
383         (dstRect->top != 0) ||
384         (dstRect->right != dstSurface->width >> dstMip) ||
385         (dstRect->bottom != dstSurface->height >> dstMip))
386     {
387         return GX2_FALSE;
388     }
389     else
390     {
391         switch (dstSurface->format)
392         {
393             case GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM:
394             case GX2_SURFACE_FORMAT_TCS_R10_G10_B10_A2_UNORM:
395             case GX2_SURFACE_FORMAT_TCS_A2_B10_G10_R10_UNORM:
396             case GX2_SURFACE_FORMAT_TC_R11_G11_B10_FLOAT:
397             case GX2_SURFACE_FORMAT_TC_R16_G16_FLOAT:
398             case GX2_SURFACE_FORMAT_TCD_R32_FLOAT:
399                 break;
400             default:
401                 return GX2_FALSE; // NO
402         }
403     }
404 
405     switch (dstSurface->tileMode)
406     {
407         case GX2_TILE_MODE_1D_TILED_THIN1:
408         case GX2_TILE_MODE_2D_TILED_THIN1:
409         case GX2_TILE_MODE_2D_TILED_THIN2:
410         case GX2_TILE_MODE_2D_TILED_THIN4:
411         case GX2_TILE_MODE_2B_TILED_THIN1:
412         case GX2_TILE_MODE_2B_TILED_THIN2:
413         case GX2_TILE_MODE_2B_TILED_THIN4:
414             break;
415 
416         default:
417             return GX2_FALSE;
418     }
419 
420     if (dstSurface->aa != GX2_AA_MODE_1X)
421     {
422         return GX2_FALSE;
423     }
424 
425     return GX2_TRUE;
426 }
427 
IsValidDBFloat(u32 val)428 GX2Boolean IsValidDBFloat(u32 val)
429 {
430     GX2Boolean valid = GX2_TRUE;
431 
432     // The following floating point values are not preserved by the DB when the depth value comes
433     // from vertex Z:
434     // - DeNorms are flushed to 0
435     // - NaNs (and Infs) are converted to 0
436     if (((val != 0x0) && ((val & 0x7F800000) == 0x0)) || // Denorms
437         ((val & 0x7F800000) == 0x7F800000))              // NaNs & Infs
438     {
439         valid = GX2_FALSE;
440     }
441 
442     return valid;
443 }
444 
GX2UTClearRectOp(GX2ColorBuffer * colorBuffer,GX2DepthBuffer * depthBuffer,f32 r,f32 g,f32 b,f32 a,f32 depthValue,u8 stencilValue,GX2ClearMode clearFlags,GX2HiStencilInfo * hiStencil,GX2UTRect * dstRect)445 void GX2UTClearRectOp(GX2ColorBuffer *colorBuffer, GX2DepthBuffer *depthBuffer,
446                       f32 r, f32 g, f32 b, f32 a,
447                       f32 depthValue, u8 stencilValue,
448                       GX2ClearMode clearFlags, GX2HiStencilInfo *hiStencil,
449                       GX2UTRect *dstRect)
450 {
451     u32 cbFirstSlice = 0;
452     u32 dbFirstSlice = 0;
453     u32 numSlices = 0;
454     u32 dstWidth, dstHeight, uDepthValue;
455     u32 dstMip, dstSlice;
456     GX2CompareFunction stencilFunc = GX2_COMPARE_NEVER;
457     GX2Boolean bColorAsDepth = GX2_FALSE;
458     GX2Boolean depthTestEnable = GX2_FALSE;
459     GX2Boolean stencilTestEnable = GX2_DISABLE;
460     GX2ColorBuffer cb;
461     GX2DepthBuffer db;
462 
463     GX2UTDebugTagIndent(__func__);
464 
465     // blt with width or height <= 0 does nothing
466     ASSERT((colorBuffer != NULL || depthBuffer != NULL) && (dstRect != NULL));
467     ASSERT((dstRect->bottom > dstRect->top) && (dstRect->right > dstRect->left) && "Invalid destination region!");
468     ASSERT(depthValue >= 0.0f && depthValue <= 1.0f);
469 
470     // Verify the parameters
471     if (colorBuffer && depthBuffer)
472     {
473         ASSERT((colorBuffer->surface.width >> colorBuffer->viewMip) == (depthBuffer->surface.width >> depthBuffer->viewMip));
474         ASSERT((colorBuffer->surface.height >> colorBuffer->viewMip) == (depthBuffer->surface.height >> depthBuffer->viewMip));
475         ASSERT(colorBuffer->surface.aa == depthBuffer->surface.aa);
476         ASSERT(colorBuffer->viewNumSlices == depthBuffer->viewNumSlices);
477     }
478 
479 
480     if (clearFlags & GX2_CLEAR_D_REG) {
481         ASSERT(depthBuffer != NULL);
482         GX2SetClearDepth(depthBuffer, depthValue);
483     }
484     if (clearFlags & GX2_CLEAR_S_REG) {
485         ASSERT(depthBuffer != NULL);
486         GX2SetClearStencil(depthBuffer, stencilValue);
487     }
488 
489     // Initialize the resources needed to clear surfaces.
490     // This function only does work the first time it's called.
491     GX2UTClearSurfaceRectInit();
492 
493     uDepthValue = FloatToBits(depthValue);
494 
495     // Some color-only clears can be accelerated by using the depth unit instead
496     if (colorBuffer != NULL && depthBuffer == NULL &&
497         ClearUsingDB(colorBuffer, dstRect))
498     {
499         u32 dv = 0;
500 
501         if (colorBuffer->surface.format == GX2_SURFACE_FORMAT_TCD_R32_FLOAT)
502         {
503             // only use red component of clearcolor
504             dv = FloatToBits(r);
505         }
506         else
507         {
508             // PackColorToFp32
509             dv = PackClearColor(r, g, b, a, colorBuffer->surface.format);
510         }
511 
512         if (IsValidDBFloat(dv))
513         {
514             // DB can only handle valid floats
515             uDepthValue = dv;
516             dstSlice = colorBuffer->viewFirstSlice;
517             dstMip = colorBuffer->viewMip;
518 
519             // Populate db and associated surface accordingly
520             db.surface = colorBuffer->surface;
521             db.surface.format = GX2_SURFACE_FORMAT_TCD_R32_FLOAT;
522             db.surface.use = GX2_SURFACE_USE_DEPTH_BUFFER;
523             db.viewMip = dstMip;
524             db.viewFirstSlice = dstSlice;
525             db.viewNumSlices = colorBuffer->viewNumSlices;
526             db.hiZPtr = NULL;
527             db.hiZSize = 0;
528 
529             // Instruct logic below that we are clearing depth only
530             clearFlags = GX2_CLEAR_DEPTH;
531             colorBuffer = NULL;
532             depthBuffer = &db;
533 
534             GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR_COLOR_AS_DEPTH, GX2_ENABLE);
535             bColorAsDepth = GX2_TRUE;
536         }
537     }
538 
539     // Only set if Color Writes are on
540     if (colorBuffer != NULL)
541     {
542         // If AA, must have auxPtr + auxSize
543         ASSERT(colorBuffer->surface.aa == GX2_AA_MODE_1X ||
544                (colorBuffer->auxPtr != NULL && colorBuffer->auxSize != 0));
545 
546         //Currently we do not support clearing BC formats.  Use GX2ClearColor() for this.
547         ASSERT(!GX2SurfaceIsCompressed(colorBuffer->surface.format));
548         ASSERT((colorBuffer->viewFirstSlice + colorBuffer->viewNumSlices <= colorBuffer->surface.depth));
549 
550         dstMip = colorBuffer->viewMip;
551         dstWidth  = GX2Max(1, colorBuffer->surface.width  >> dstMip);
552         dstHeight = GX2Max(1, colorBuffer->surface.height >> dstMip);
553 
554         // Create shallow copy of dest surface to be used as render target
555         cb = *colorBuffer;
556         cb.surface.use = GX2_SURFACE_USE_COLOR_BUFFER_TEXTURE;
557         cb.viewNumSlices = 1;
558         // Will reinit regs later
559 
560         if (cb.surface.format == GX2_SURFACE_FORMAT_TC_R16_G16_FLOAT)
561         {
562             // Change format to B8G8R8A8_UNORM and PackFP16ToRGBA8
563             cb.surface.format = GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM;
564 
565             // Adjust clear color
566             u32 packedColor;
567 
568             // Pack 2 channel FP16 clear color into a 32 bit quantity
569             packedColor = PackClearColor(r, g, b, a,
570                                          GX2_SURFACE_FORMAT_TC_R16_G16_FLOAT);
571 
572             // Extract RGBA8 values from packed color and convert to clear values
573             a = ((packedColor >> 24) & 0xff) * (1.0f / 255.0f);
574             b = ((packedColor >> 16) & 0xff) * (1.0f / 255.0f);
575             g = ((packedColor >> 8) & 0xff) * (1.0f / 255.0f);
576             r = (packedColor & 0xff) * (1.0f / 255.0f);
577         }
578 
579         // Enable Color Writes
580         GX2SetColorControl(GX2_LOGIC_OP_COPY,
581                            0, //disable blending
582                            GX2_DISABLE,
583                            GX2_ENABLE);
584 
585         numSlices = colorBuffer->viewNumSlices;
586         cbFirstSlice = cb.viewFirstSlice;
587     }
588     else
589     {
590         // Use Depth Buffer dimensions
591         dstMip = depthBuffer->viewMip;
592         dstWidth  = GX2Max(1, depthBuffer->surface.width >> dstMip);
593         dstHeight = GX2Max(1, depthBuffer->surface.height >> dstMip);
594 
595         // Disable Color Writes
596         GX2SetColorControl(GX2_LOGIC_OP_COPY,
597                            0, //disable blending
598                            GX2_DISABLE,
599                            GX2_DISABLE);
600 
601         GX2SetAAMode(depthBuffer->surface.aa);
602     }
603 
604     // Render to destination surface dimensions
605     GX2SetViewport(0, 0, (f32)dstWidth, (f32)dstHeight, 0.0f, 1.0f);
606     GX2SetScissor(0, 0, dstWidth, dstHeight);
607 
608     // Only set if Depth or Stencil Writes are on
609     if (depthBuffer != NULL)
610     {
611         ASSERT((depthBuffer->viewFirstSlice + depthBuffer->viewNumSlices <= depthBuffer->surface.depth));
612 
613         db = *depthBuffer;
614         numSlices = depthBuffer->viewNumSlices;
615         db.viewNumSlices = 1;
616         // Will reinit regs later
617 
618         if (clearFlags & GX2_CLEAR_DEPTH)
619             depthTestEnable = GX2_TRUE;
620         else
621             depthTestEnable = GX2_FALSE;
622 
623         if (clearFlags & GX2_CLEAR_STENCIL)
624         {
625             GX2SetStencilMask(0xff,         //preMaskFront
626                               0xff,         //writeMaskFront
627                               stencilValue, //refFront
628                               0xff,         //preMaskBack
629                               0xff,         //writeMaskBack
630                               stencilValue);//refBack
631             stencilFunc = GX2_COMPARE_ALWAYS;
632             stencilTestEnable = GX2_ENABLE;
633         }
634         else
635         {
636             stencilFunc = GX2_COMPARE_NEVER;
637             stencilTestEnable = GX2_DISABLE;
638         }
639 
640         // fast clears require HiZ and all edges on micro-tile boundaries
641         if ((depthBuffer->hiZPtr) &&
642             !((dstRect->bottom | dstRect->top | dstRect->left | dstRect->right) & 0x7))
643 
644         {
645             GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR_HIZ, GX2_ENABLE);
646         }
647 
648         dbFirstSlice = db.viewFirstSlice;
649     }
650 
651     // Depth Writes
652     GX2SetDepthStencilControl(depthTestEnable,    //depthTestEnable
653                               GX2_ENABLE,         //depthWriteEnable
654                               GX2_COMPARE_ALWAYS, //depthFunc
655                               stencilTestEnable,  //stencilTestEnable
656                               stencilTestEnable,  //backStencilEnable
657 
658                               stencilFunc,        //frontStencilFunc
659                               GX2_STENCIL_REPLACE,//frontStencilZPass
660                               GX2_STENCIL_REPLACE,//frontStencilZFail
661                               GX2_STENCIL_REPLACE,//frontStencilFail
662 
663                               stencilFunc,        //backStencilFunc
664                               GX2_STENCIL_REPLACE,//backStencilZPass
665                               GX2_STENCIL_REPLACE,//backStencilZFail
666                               GX2_STENCIL_REPLACE //backStencilFail
667                             );
668 
669     if (hiStencil != NULL)
670         GX2SetHiStencilInfo(hiStencil);
671 
672     // Only one clear shader
673     u32 shaderIdx = 0;
674 
675     // Set shaders
676     GX2SetFetchShader(&fetchShader);
677     GX2SetVertexShader(g_clearSurfaceShader[shaderIdx].pVertexShader);
678     GX2SetPixelShader(g_clearSurfaceShader[shaderIdx].pPixelShader);
679 
680     // Set the uniforms to be used by the vertex shader and pixel shader
681     f32 position_base_scale[] =
682     {
683         (f32)dstRect->left,
684         (f32)dstRect->top,
685         (f32)dstRect->right - (f32)dstRect->left,
686         (f32)dstRect->bottom - (f32)dstRect->top,
687     };
688 
689     for (int i = 0; i < 4; i++)
690     {
691         f32 pos[] = {
692             position_base_scale[0] + position_base_scale[2] * CLEAR_SURFACE_RECT_POSITION_DATA[i].position[0],
693             position_base_scale[1] + position_base_scale[3] * CLEAR_SURFACE_RECT_POSITION_DATA[i].position[1],
694             BitsToFloat(uDepthValue),
695             1.0f
696         };
697 
698         GX2SetVertexUniformReg(g_clearSurfaceShader[shaderIdx].u_positionLocation + i*4, 1*4, pos);
699     }
700 
701     f32 clearColor[] = {r, g, b, a};
702 
703     GX2SetPixelUniformReg(g_clearSurfaceShader[shaderIdx].u_clearColorLocation, 1*4, clearColor);
704 
705     for (int slice = 0; slice < numSlices; slice++)
706     {
707         // Reset the color buffer to the next slice
708         if (colorBuffer)
709         {
710             cb.viewFirstSlice = cbFirstSlice + slice;
711             GX2InitColorBufferRegs(&cb);
712             GX2SetColorBuffer(&cb, GX2_RENDER_TARGET_0);
713         }
714 
715         // Reset the depth buffer to the next slice
716         if (depthBuffer)
717         {
718             db.viewFirstSlice = dbFirstSlice + slice;
719             GX2InitDepthBufferRegs(&db);
720             GX2SetDepthBuffer(&db);
721         }
722 
723         //Call the render function pointer
724         GX2Draw(GX2_PRIMITIVE_RECTS, VERTEX_COUNT);
725     }
726 
727     if (bColorAsDepth)
728     {
729         GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR_COLOR_AS_DEPTH, GX2_DISABLE);
730     }
731 
732     if (colorBuffer != NULL)
733     {
734         if ( colorBuffer->viewMip )
735             GX2Invalidate(GX2_INVALIDATE_COLOR_BUFFER, colorBuffer->surface.mipPtr, colorBuffer->surface.mipSize);
736         else
737             GX2Invalidate(GX2_INVALIDATE_COLOR_BUFFER, colorBuffer->surface.imagePtr, colorBuffer->surface.imageSize);
738     }
739 
740     if (depthBuffer != NULL)
741     {
742         if ( depthBuffer->viewMip )
743             GX2Invalidate(GX2_INVALIDATE_DEPTH_BUFFER, depthBuffer->surface.mipPtr, depthBuffer->surface.mipSize);
744         else
745             GX2Invalidate(GX2_INVALIDATE_DEPTH_BUFFER, depthBuffer->surface.imagePtr, depthBuffer->surface.imageSize);
746 
747         // Disable HiZ optimization
748         if ((depthBuffer->hiZPtr) &&
749             !((dstRect->bottom | dstRect->top | dstRect->left | dstRect->right) & 0x7))
750 
751         {
752             GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR_HIZ, GX2_DISABLE);
753         }
754     }
755     GX2UTDebugTagUndent();
756 }
757 
758 // Clears a region of the HiStencil buffer
759 // Setting the rectangular regions to a top-left of (0,0) and a bottom-right of (width, height)
760 // will blt the entire surface without any flipping (the right and bottom are exclusive).
761 // Must be invoked to invalidate HiStencil when changing pretest state during a frame.
GX2UTInvalidateHiStencilRect(GX2UTRect * dstRect,GX2DepthBuffer * depthBuffer)762 void GX2UTInvalidateHiStencilRect(GX2UTRect *dstRect, GX2DepthBuffer *depthBuffer)
763 {
764     u32 dstWidth, dstHeight;
765 
766     GX2UTDebugTagIndent(__func__);
767 
768     //Disable state shadowing.  If your app is using state shadowing,
769     //you will need to restore the context after calling this function.
770     GX2SetContextState(NULL);
771 
772     // Initialize the resources needed to clear surfaces.
773     // This function only does work the first time it's called.
774     GX2UTClearSurfaceRectInit();
775 
776     // must have a deptBuffer w/ hiZPtr
777     // blt with width or height <= 0 does nothing
778     ASSERT((depthBuffer != NULL) && (depthBuffer->hiZPtr != NULL) && (dstRect != NULL));
779     ASSERT((dstRect->bottom > dstRect->top) && (dstRect->right > dstRect->left) && "Invalid destination region!");
780 
781     // Disable Color Writes
782     GX2SetColorControl(GX2_LOGIC_OP_COPY,
783                        0, //disable blending
784                        GX2_DISABLE,
785                        GX2_DISABLE);
786 
787     // Disable Depth and Stencil Writes
788     GX2SetDepthStencilControl(GX2_DISABLE,          // depthEnable
789                               GX2_DISABLE,          // depthWriteEnable
790                               GX2_COMPARE_ALWAYS,   // depthFunc
791                               GX2_DISABLE,          // stencilTestEnable
792                               GX2_DISABLE,          // backStencilEnable
793                               GX2_COMPARE_ALWAYS,   // frontStencilFunc
794                               GX2_STENCIL_KEEP,     // frontStencilZPass
795                               GX2_STENCIL_KEEP,     // frontStencilZFail
796                               GX2_STENCIL_KEEP,     // frontStencilFail
797                               GX2_COMPARE_ALWAYS,   // backStencilFunc
798                               GX2_STENCIL_KEEP,     // backStencilZPass
799                               GX2_STENCIL_KEEP,     // backStencilZFail
800                               GX2_STENCIL_KEEP);    // backStencilFail
801     GX2SetStencilMask(0x00, //preMaskFront
802                       0x00, //writeMaskFront
803                       0x00, //refFront
804                       0x00, //preMaskBack
805                       0x00, //writeMaskBack
806                       0x00);//refBack
807 
808     dstWidth  = depthBuffer->surface.width;
809     dstHeight = depthBuffer->surface.height;
810 
811     // Render to destination surface dimensions
812     GX2SetViewport(dstRect->left, dstRect->bottom, (f32)dstWidth, (f32)dstHeight, 0.0f, 1.0f);
813     GX2SetScissor(dstRect->left, dstRect->bottom, dstWidth, dstHeight);
814 
815     GX2SetDepthBuffer(depthBuffer);
816 
817     // Create HiStencilInfo that will clear the HiStencil pretest results
818     GX2HiStencilInfo hiStencilInfo;
819     hiStencilInfo.state[0].function = GX2_COMPARE_ALWAYS;
820     hiStencilInfo.state[0].reference = 0;
821     hiStencilInfo.state[0].mask = 0xFF;
822     hiStencilInfo.state[0].enable = GX2_FALSE;
823     hiStencilInfo.state[1].function = GX2_COMPARE_ALWAYS;
824     hiStencilInfo.state[1].reference = 0;
825     hiStencilInfo.state[1].mask = 0xFF;
826     hiStencilInfo.state[1].enable = GX2_FALSE;
827     GX2InitHiStencilInfoRegs(&hiStencilInfo);
828     GX2SetHiStencilInfo(&hiStencilInfo);
829 
830     // Only one clear shader
831     u32 shaderIdx = 0;
832 
833     // Set shaders
834     // NOTE: No fetch shader is needed for our shaders, see GLSL for details.
835     GX2SetVertexShader(g_clearSurfaceShader[shaderIdx].pVertexShader);
836     GX2SetPixelShader(g_clearSurfaceShader[shaderIdx].pPixelShader);
837 
838     // Set the uniforms to be used by the vertex shader and pixel shader
839     f32 position_base_scale[] =
840     {
841         -1.0f + 2.0f * (f32)dstRect->left / (f32)dstWidth,
842          1.0f - 2.0f * (f32)dstRect->top / (f32)dstHeight,
843          2.0f * (f32)(dstRect->right - dstRect->left) / (f32)dstWidth,
844         -2.0f * (f32)(dstRect->bottom - dstRect->top) / (f32)dstHeight,
845     };
846 
847     for (int i = 0; i < 4; i++)
848     {
849         f32 pos[] = {
850             position_base_scale[0] + position_base_scale[2] * CLEAR_SURFACE_RECT_POSITION_DATA[i].position[0],
851             position_base_scale[1] + position_base_scale[3] * CLEAR_SURFACE_RECT_POSITION_DATA[i].position[1],
852             0.0,
853             1.0
854         };
855 
856         GX2SetVertexUniformReg(g_clearSurfaceShader[shaderIdx].u_positionLocation + i*4, 1*4, pos);
857     }
858 
859     // We don't need to set clearColor or depthValue because we aren't writing those buffers
860 
861     //Call the render function pointer
862     GX2Draw(GX2_PRIMITIVE_TRIANGLE_STRIP, VERTEX_COUNT);
863 
864     GX2UTDebugTagUndent();
865 }
866 
867 //Setup all of the constant renderstate needed for the clear
GX2UTSetClearState(GX2Boolean enable)868 void GX2UTSetClearState(GX2Boolean enable)
869 {
870     if (enable)
871     {
872         // If your application's steady state can be set to GX2UT common state
873         // using a small number of discrete GX2 calls, then customize here
874         // instead of using GX2UTSetCommonState()
875         GX2UTSetCommonState();
876 
877         // Enable any special GX2 state
878         GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR, GX2_ENABLE);
879         // That call just clobbered  RasterizerClipControl
880     }
881     else
882     {
883         // Disable any special GX2 state
884         GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR, GX2_DISABLE);
885 
886         // The purpose of the following is to return the context to GX2 default
887         // state. Integration with your application's state management may
888         // differ. The clear operation itself clobbered DepthStencilControl
889         // and ColorControl
890         GX2SetDepthStencilControl(GX2_TRUE,            //depthTestEnable
891                                   GX2_TRUE,            //depthWriteEnable
892                                   GX2_COMPARE_LESS,    //depthFunc
893                                   GX2_FALSE,           //stencilTestEnable
894                                   GX2_FALSE,           //backStencilEnable
895                                   GX2_COMPARE_ALWAYS,  //frontStencilFunc
896                                   GX2_STENCIL_REPLACE, //frontStencilZPass
897                                   GX2_STENCIL_REPLACE, //frontStencilZFail
898                                   GX2_STENCIL_REPLACE, //frontStencilFail
899                                   GX2_COMPARE_ALWAYS,  //backStencilFunc
900                                   GX2_STENCIL_REPLACE, //backStencilZPass
901                                   GX2_STENCIL_REPLACE, //backStencilZFail
902                                   GX2_STENCIL_REPLACE);//backStencilFail
903 
904         GX2SetColorControl(GX2_LOGIC_OP_COPY, GX2_DISABLE, GX2_DISABLE, GX2_ENABLE);
905     }
906 }
907 
GX2UTSetupColorAuxBufferOp(GX2ColorBuffer * colorBuffer)908 void GX2UTSetupColorAuxBufferOp(GX2ColorBuffer *colorBuffer)
909 {
910     if (colorBuffer->auxPtr)
911     {
912         u32 ctileOffset = colorBuffer->_regs[4]; // cmask_offset
913         u32 ctileSize = colorBuffer->auxSize - ctileOffset;
914         u8* ctilePtr = (u8*)colorBuffer->auxPtr + ctileOffset;
915         ASSERT((ctileSize & 0x1FF) == 0 && "Invalid MSAA Color Buffer auxSize!");
916 
917         GX2ColorBuffer tmpBuf;
918         u32 width = 16;
919         u32 height = ctileSize / 4 / width;
920 
921         GX2InitColorBuffer(&tmpBuf, width, height, GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM, GX2_AA_MODE_1X);
922         GX2InitColorBufferPtr(&tmpBuf, ctilePtr);
923         tmpBuf.surface.tileMode = GX2_TILE_MODE_1D_TILED_THIN1;
924         GX2CalcSurfaceSizeAndAlignment(&tmpBuf.surface);
925         ASSERT(tmpBuf.surface.imageSize == ctileSize && "CMask Tile Size must match calculated image size!");
926         GX2InitColorBufferRegs(&tmpBuf);
927 
928         GX2UTClearOp(&tmpBuf, NULL,
929                      GX2_AUX_BUFFER_CLEAR_VALUE/255.0f,
930                      GX2_AUX_BUFFER_CLEAR_VALUE/255.0f,
931                      GX2_AUX_BUFFER_CLEAR_VALUE/255.0f,
932                      GX2_AUX_BUFFER_CLEAR_VALUE/255.0f,
933                      0.0f, 0u, GX2_CLEAR_NONE, NULL);
934     }
935 }
936 
937