1 /*---------------------------------------------------------------------------*
2 
3   Copyright (C) Nintendo.  All rights reserved.
4 
5   These coded instructions, statements, and computer programs contain
6   proprietary information of Nintendo of America Inc. and/or Nintendo
7   Company Ltd., and are protected by Federal copyright law.  They may
8   not be disclosed to third parties or copied or duplicated in any form,
9   in whole or in part, without the prior written consent of Nintendo.
10 
11  *---------------------------------------------------------------------------*/
12 
13 #include <stdio.h>
14 #include <string.h>
15 #include <math.h>
16 
17 #if defined(WIN32) || defined(WIN64)
18 #include <pc/gx2.h>
19 #include <pc/demo.h>
20 #endif
21 #include <cafe/os.h>
22 #include <cafe/mem.h>
23 #include <cafe/gx2.h>
24 #include <cafe/demo.h>
25 
26 #include <cafe/gfd.h>
27 #include <cafe/gx2ut.h>
28 
29 //Include generated shaders
30 #include "shaders/headers/gx2utCopySurfaceRect3D.h"
31 #include "shaders/headers/gx2utCopySurfaceRect.h"
32 #include "shaders/headers/gx2utCopySurfaceRect2xMS.h"
33 #include "shaders/headers/gx2utCopySurfaceRect4xMS.h"
34 #include "shaders/headers/gx2utCopySurfaceRect8xMS.h"
35 
36 #define SURFACE_IN_MEM1(surf, mipLevel) SURFACE_IN_MEMRANGE((surf), (mipLevel), g_MEM1StartAddr, g_MEM1EndAddr)
37 #define SURFACE_IN_MEM2(surf, mipLevel) SURFACE_IN_MEMRANGE((surf), (mipLevel), g_MEM2StartAddr, g_MEM2EndAddr)
38 
39 #define SRC_IN_MEM1() SURFACE_IN_MEM1(srcSurface, srcMip)
40 #define SRC_IN_MEM2() SURFACE_IN_MEM2(srcSurface, srcMip)
41 #define DST_IN_MEM1() SURFACE_IN_MEM1((const GX2Surface*)dstSurface, dstMip)
42 #define DST_IN_MEM2() SURFACE_IN_MEM2((const GX2Surface*)dstSurface, dstMip)
43 
44 /* Returns true if the specified mipmap level is within the memory range
45  * specified by start and end.
46  */
SURFACE_IN_MEMRANGE(const GX2Surface * surf,u32 mipLevel,u32 start,u32 end)47 inline bool SURFACE_IN_MEMRANGE(const GX2Surface *surf,
48                                 u32 mipLevel, u32 start, u32 end)
49 {
50     return ((!mipLevel && (u32)(surf)->imagePtr >= (start) &&
51                           (u32)(surf)->imagePtr < end) ||
52             (mipLevel && (u32)(surf)->mipPtr >= (start) &&
53                          (u32)(surf)->mipPtr < end));
54 }
55 
56 #define VS_SHADER_1X_INDEX 0
57 #define VS_SHADER_2X_INDEX 1
58 #define VS_SHADER_4X_INDEX 2
59 #define VS_SHADER_8X_INDEX 3
60 #define VS_SHADER_3D_INDEX 4
61 /*
62  * 0 - Simple copy shader
63  * 1 - Copy shader that reads a 2xMSAA surface and converts it to
64        a single sampled output.
65  * 2 - Copy shader that reads a 4xMSAA surface and converts it to
66        a single sampled output.
67  * 3 - Copy shader that reads a 8xMSAA surface and converts it to
68        a single sampled output.
69  * 4 - Copy shader correctly reads from 3D textures.
70  */
71 static const GX2VertexShader* const VS_SHADERS[] = { &gx2utCopySurfaceRect_VS,
72                                                      &gx2utCopySurfaceRect2xMS_VS,
73                                                      &gx2utCopySurfaceRect4xMS_VS,
74                                                      &gx2utCopySurfaceRect8xMS_VS,
75                                                      &gx2utCopySurfaceRect3D_VS};
76 
77 static const GX2PixelShader* const PS_SHADERS[] = { &gx2utCopySurfaceRect_PS,
78                                                     &gx2utCopySurfaceRect2xMS_PS,
79                                                     &gx2utCopySurfaceRect4xMS_PS,
80                                                     &gx2utCopySurfaceRect8xMS_PS,
81                                                     &gx2utCopySurfaceRect3D_PS};
82 
83 static const u32 NUM_SHADERS = sizeof(PS_SHADERS)/sizeof(PS_SHADERS[0]);
84 
85 typedef struct _CopySurfaceShader {
86     // These variables hold the three types of shaders needed for a call to
87     // GX2SetShaders. The vertex and pixel shaders are loaded from the
88     // header, but since the fetch shader is generated at run-time
89     // it must be handled slightly differently.
90     const GX2VertexShader *pVertexShader;
91     const GX2PixelShader *pPixelShader;
92 
93     // The register locations where the offset uniforms are stored for
94     // the pixel and vertex shaders.
95     u32 u_positionLocation;
96     u32 u_texcoordLocation;
97 
98     // The register locations where the texture uniforms are stored for
99     // the pixel and vertex shaders.
100     u32 textureLocation;
101 } CopySurfaceShader;
102 
103 static CopySurfaceShader g_copySurfaceShader[NUM_SHADERS];
104 static GX2FetchShader fetchShader;
105 
106 #define FETCH_SHADER_SIZE 32  //hard code this value for now
107 ALIGNVAR(GX2_SHADER_ALIGNMENT) static u8 g_GX2UTFetchShader[FETCH_SHADER_SIZE];
108 
109 // Bounds-checking for special optimizations
110 static u32 g_MEM1StartAddr = 0;
111 static u32 g_MEM1EndAddr = 0;
112 static u32 g_MEM2StartAddr = 0;
113 static u32 g_MEM2EndAddr = 0;
114 
115 // ----- GX2 Texture
116 
117 static GX2Sampler g_copySurfaceSampler;
118 
119 typedef struct _VtxFmtF32x2 {
120     f32 texcoord[2];
121 } VtxFmtF32x2;
122 
123 static const VtxFmtF32x2 COPY_SURFACE_TRISTRIP_POSITION_DATA[] =
124 {
125     {0.0f,  0.0f},
126     {1.0f,  0.0f},
127     {0.0f,  1.0f},
128     {1.0f,  1.0f}
129 };
130 
131 static const VtxFmtF32x2 COPY_SURFACE_RECT_POSITION_DATA[] =
132 {
133     {0.0f,  0.0f},
134     {1.0f,  0.0f},
135     {1.0f,  1.0f},
136     {0.0f,  1.0f}
137 };
138 
139 // Same for both rect and triangle strips
140 static const u32 VERTEX_COUNT = sizeof(COPY_SURFACE_TRISTRIP_POSITION_DATA)
141                          / sizeof(COPY_SURFACE_TRISTRIP_POSITION_DATA[0]);
142 
143 // Initializes how surfaces will be copied
GX2UTCopySurfaceRectInit()144 void GX2UTCopySurfaceRectInit()
145 {
146     // Get the Bounds of MEM1 and MEM2 for memory-specific
147     // optimizations.
148     OSGetMemBound(OSMem_MEM1, &g_MEM1StartAddr, &g_MEM1EndAddr);
149     g_MEM1EndAddr += g_MEM1StartAddr;
150     OSGetMemBound(OSMem_MEM2, &g_MEM2StartAddr, &g_MEM2EndAddr);
151     g_MEM2EndAddr += g_MEM2StartAddr;
152 
153     // Setup shaders
154     u32 i;
155 
156     for (i = 0; i < NUM_SHADERS; ++i)
157     {
158         g_copySurfaceShader[i].pVertexShader = VS_SHADERS[i];
159         g_copySurfaceShader[i].pPixelShader = PS_SHADERS[i];
160 
161         GX2Invalidate(GX2_INVALIDATE_CPU_SHADER,
162             g_copySurfaceShader[i].pVertexShader->shaderPtr,
163             g_copySurfaceShader[i].pVertexShader->shaderSize);
164 
165         GX2NotifyMemAlloc(g_copySurfaceShader[i].pVertexShader->shaderPtr,
166             g_copySurfaceShader[i].pVertexShader->shaderSize,
167             GX2_SHADER_ALIGNMENT);
168 
169         GX2Invalidate(GX2_INVALIDATE_CPU_SHADER,
170             g_copySurfaceShader[i].pPixelShader->shaderPtr,
171             g_copySurfaceShader[i].pPixelShader->shaderSize);
172 
173         GX2NotifyMemAlloc(g_copySurfaceShader[i].pPixelShader->shaderPtr,
174             g_copySurfaceShader[i].pPixelShader->shaderSize,
175             GX2_SHADER_ALIGNMENT);
176 
177         // Lookup the uniform locations in the vertex shader.
178         // The shader author chose the names "u_positions" and "u_texCoords"
179         g_copySurfaceShader[i].u_positionLocation =
180             (u32)GX2GetVertexUniformVarOffset(g_copySurfaceShader[i].pVertexShader, "u_positions");
181         g_copySurfaceShader[i].u_texcoordLocation =
182             (u32)GX2GetVertexUniformVarOffset(g_copySurfaceShader[i].pVertexShader, "u_texCoords");
183         ASSERT((g_copySurfaceShader[i].u_positionLocation != GX2_UNIFORM_VAR_INVALID_OFFSET)
184             && (g_copySurfaceShader[i].u_texcoordLocation != GX2_UNIFORM_VAR_INVALID_OFFSET)
185             && "Couldn't find the correct vertex shader uniforms.");
186 
187         // Lookup the texture locations in the vertex shaders and pixel shader.
188         // The shader author chose the names "s_texture"
189         g_copySurfaceShader[i].textureLocation =
190             (u32)GX2GetPixelSamplerVarLocation(g_copySurfaceShader[i].pPixelShader, "s_texture");
191     }
192 
193     // Setup sampler
194     GX2InitSampler(&g_copySurfaceSampler,
195                    GX2_TEX_CLAMP_CLAMP,
196                    GX2_TEX_XY_FILTER_BILINEAR);
197     GX2InitSamplerLOD(&g_copySurfaceSampler, 0.0f, 0.0f, 0.0f);
198     GX2InitSamplerZMFilter(&g_copySurfaceSampler, GX2_TEX_Z_FILTER_USE_XY, GX2_TEX_MIP_FILTER_POINT);
199 
200     ASSERT(GX2CalcFetchShaderSize(0) <= sizeof(g_GX2UTFetchShader) && "g_GX2UTFetchShader too small!\n");
201     GX2InitFetchShader(&fetchShader, g_GX2UTFetchShader, 0, NULL);
202 }
203 
204 // Copies a region from one surface to a region of another surface
205 // Setting the rectangular regions to a top-left of (0,0) and a bottom-right of (width, height)
206 // will blt the entire surface without any flipping (the right and bottom are exclusive).
207 // The rect dimensions should be relative to the mipmap level dimensions, not the base level dimensions.
GX2UTCopySurfaceRectOp(const GX2Surface * srcSurface,u32 srcMip,u32 srcSlice,GX2UTRect * srcRect,GX2Surface * dstSurface,u32 dstMip,u32 dstSlice,GX2UTRect * dstRect,void * dstAuxPtr,u32 dstAuxSize)208 void GX2UTCopySurfaceRectOp(const GX2Surface *srcSurface,
209                             u32 srcMip, u32 srcSlice, GX2UTRect *srcRect,
210                             GX2Surface *dstSurface,
211                             u32 dstMip, u32 dstSlice, GX2UTRect *dstRect,
212                             void* dstAuxPtr, u32 dstAuxSize)
213 {
214     GX2Boolean stretchBlt = GX2_TRUE;
215     GX2UTRect sourceRect, destinationRect;
216     GX2Boolean srcIsCompressed;
217     GX2Boolean dstIsCompressed;
218     static GX2Boolean initDone = GX2_FALSE;
219 
220     GX2UTDebugTagIndent(__func__);
221 
222     if (initDone == GX2_FALSE)
223     {
224         // Initialize the resources needed to copy surfaces.
225         GX2UTCopySurfaceRectInit();
226 
227         initDone = GX2_TRUE;
228     }
229 
230     // blt with width or height <= 0 does nothing
231     ASSERT((srcSurface != NULL) && (srcRect != NULL) && (dstSurface != NULL) && (dstRect != NULL));
232     ASSERT((srcRect->bottom > srcRect->top) && (srcRect->right > srcRect->left) && "Invalid source region!");
233     ASSERT((dstRect->bottom > dstRect->top) && (dstRect->right > dstRect->left) && "Invalid destination region!");
234 
235     // For compressed textures, we only support direct copies
236     // that are aligned on 4x4 block boundaries
237     srcIsCompressed = GX2SurfaceIsCompressed(srcSurface->format);
238     dstIsCompressed = GX2SurfaceIsCompressed(dstSurface->format);
239 
240     if ( srcIsCompressed || dstIsCompressed )
241     {
242         ASSERT(srcSurface->format == dstSurface->format);
243 
244         // Must be 4x4 block aligned
245         ASSERT(srcRect->top % 4 == 0);
246         ASSERT(srcRect->left % 4 == 0);
247 
248         // Must be 4x4 block aligned or cover the entire texture
249         ASSERT(srcRect->right % 4 == 0 || srcRect->right == GX2Max(1, srcSurface->width >> srcMip));
250         ASSERT(srcRect->bottom % 4 == 0 || srcRect->bottom == GX2Max(1, srcSurface->height >> srcMip));
251 
252         // Must be 1:1 copy
253         ASSERT((dstRect->right - dstRect->left) == (srcRect->right - srcRect->left));
254         ASSERT((dstRect->top - dstRect->bottom) == (srcRect->top - srcRect->bottom));
255     }
256 
257     ASSERT(srcMip < srcSurface->numMips);
258     ASSERT(dstMip < dstSurface->numMips);
259 
260     if(srcSurface->dim == GX2_SURFACE_DIM_3D)
261     {
262         ASSERT(srcSlice < GX2Max(1, srcSurface->depth >> srcMip));
263     }
264     else
265     {
266         ASSERT(srcSlice < srcSurface->depth);
267     }
268 
269     if(dstSurface->dim == GX2_SURFACE_DIM_3D)
270     {
271         ASSERT(dstSlice < GX2Max(1, dstSurface->depth >> dstMip));
272     }
273     else
274     {
275         ASSERT(dstSlice < dstSurface->depth);
276     }
277 
278     // MSAA Shader used to copy AA Surface
279     u32 shaderIdx = (srcSurface->dim != GX2_SURFACE_DIM_3D ? srcSurface->aa : VS_SHADER_3D_INDEX);
280 
281     u32 srcMipWidth  = GX2Max(1, srcSurface->width  >> srcMip);
282     u32 srcMipHeight = GX2Max(1, srcSurface->height >> srcMip);
283     u32 dstMipWidth  = GX2Max(1, dstSurface->width  >> dstMip);
284     u32 dstMipHeight = GX2Max(1, dstSurface->height >> dstMip);
285 
286     // Create shallow copy of dest surface to be used as render target
287     static GX2ColorBuffer colorBuffer;
288     colorBuffer.surface = *dstSurface;
289     colorBuffer.surface.width = (dstMip ? GX2UTRoundNearestPow2(dstMipWidth) : dstMipWidth);
290     colorBuffer.surface.height = (dstMip ? GX2UTRoundNearestPow2(dstMipHeight) : dstMipHeight);
291     if (colorBuffer.surface.dim == GX2_SURFACE_DIM_3D)
292     {
293         u32 depth = (dstMip ? GX2UTRoundNearestPow2(GX2Max(1, colorBuffer.surface.depth >> dstMip)) : colorBuffer.surface.depth);
294         colorBuffer.surface.depth = depth;
295     }
296     colorBuffer.surface.use = GX2_SURFACE_USE_COLOR_BUFFER_TEXTURE;
297 
298     if ( GX2UTIsTileModeThick(dstSurface) )
299     {
300         u32 sliceRemainder = dstSlice % 4;
301         u32 sliceStart = dstSlice - sliceRemainder;
302 
303         // Thick tiles are special because every 4 slices are in a single
304         // micro-tile.
305         GX2SetSurfaceSwizzle(&colorBuffer.surface, GX2UTGetSurfaceMipSliceSwizzle(&colorBuffer.surface, dstMip, sliceStart));
306         colorBuffer.surface.tileMode = GX2UTGetSurfaceMipSliceTileMode((const GX2Surface*)dstSurface, dstMip);
307         GX2InitColorBufferPtr(&colorBuffer, GX2UTGetSurfaceMipSlicePtr((const GX2Surface*)dstSurface, dstMip, sliceStart));
308 
309         // Use the HW slice logic for accessing different slices in the
310         // micro-tile.
311         colorBuffer.viewFirstSlice = sliceRemainder;
312     }
313     else
314     {
315         // Non-thick micro-tiles
316         GX2SetSurfaceSwizzle(&colorBuffer.surface, GX2UTGetSurfaceMipSliceSwizzle(&colorBuffer.surface, dstMip, dstSlice));
317         colorBuffer.surface.tileMode = GX2UTGetSurfaceMipSliceTileMode((const GX2Surface*)dstSurface, dstMip);
318         GX2InitColorBufferPtr(&colorBuffer, GX2UTGetSurfaceMipSlicePtr((const GX2Surface*)dstSurface, dstMip, dstSlice));
319 
320         // For all other formats just use the above code to select
321         // slices.
322         colorBuffer.viewFirstSlice = 0;
323     }
324     colorBuffer.viewMip = 0;
325     colorBuffer.viewNumSlices = 1;
326     colorBuffer.auxPtr = dstAuxPtr;
327     colorBuffer.auxSize = dstAuxSize;
328 
329 
330     // Create shallow copy of source surface to be used as texture
331     static GX2Texture texture;
332     texture.surface = *srcSurface;
333     srcMipWidth = (srcMip ? GX2UTRoundNearestPow2(srcMipWidth): srcMipWidth);
334     //srcMipHeight = (srcMip ? GX2UTRoundNearestPow2(srcMipHeight): srcMipHeight);
335     texture.surface.width = srcMipWidth;
336     texture.surface.height = srcMipHeight;
337     texture.surface.use = GX2_SURFACE_USE_TEXTURE;
338     if (texture.surface.dim == GX2_SURFACE_DIM_3D)
339     {
340         u32 depth = (dstMip ? GX2UTRoundNearestPow2(GX2Max(1, texture.surface.depth >> dstMip)) : texture.surface.depth);
341         texture.surface.depth = depth;
342     }
343     if(texture.surface.aa != GX2_AA_MODE_1X)
344     {
345         texture.surface.dim = GX2_SURFACE_DIM_2D_MSAA;
346     }
347 
348     if ( GX2UTIsTileModeThick(srcSurface) )
349     {
350         u32 sliceRemainder = srcSlice % 4;
351         u32 sliceStart = srcSlice - sliceRemainder;
352 
353         // Reset the swizzle, tile mode and pointers to directly
354         // show the surface
355         GX2SetSurfaceSwizzle(&texture.surface, GX2UTGetSurfaceMipSliceSwizzle(&texture.surface, srcMip, sliceStart));
356         texture.surface.tileMode = GX2UTGetSurfaceMipSliceTileMode((const GX2Surface*)srcSurface, srcMip);
357         GX2InitTexturePtrs(&texture, GX2UTGetSurfaceMipSlicePtr((const GX2Surface*)srcSurface, srcMip, sliceStart), NULL);
358 
359         // For all other formats just use the above code to select
360         // slices.
361         texture.viewFirstSlice = sliceRemainder;
362     }
363     else
364     {
365         // Reset the swizzle, tile mode and pointers to directly
366         // show the surface
367         GX2SetSurfaceSwizzle(&texture.surface, GX2UTGetSurfaceMipSliceSwizzle(&texture.surface, srcMip, srcSlice));
368         texture.surface.tileMode = GX2UTGetSurfaceMipSliceTileMode((const GX2Surface*)srcSurface, srcMip);
369         GX2InitTexturePtrs(&texture, GX2UTGetSurfaceMipSlicePtr((const GX2Surface*)srcSurface, srcMip, srcSlice), NULL);
370 
371         texture.viewFirstSlice = 0; // Slice logic was done above
372     }
373     texture.viewFirstMip = 0;
374     texture.viewNumMips = 1; // Only allow copying from srcMip
375     texture.viewNumSlices = 1;
376     texture.compSel = GX2_COMP_SEL_XYZW;
377 
378     /* For compressed textures, since we want a 1:1 copy
379      * and we can't write directly to compressed textures, we will
380      * fake the format. Its expected that the source and destination
381      * formats are the same
382      */
383     if ( srcIsCompressed )
384     {
385         texture.surface.width = (texture.surface.width + 3) / 4;
386         texture.surface.height = (texture.surface.height + 3) / 4;
387         srcMipWidth = (srcMipWidth + 3) / 4;
388         srcMipHeight = (srcMipHeight + 3) / 4;
389 
390         /* Adjust the rectangles accordingly */
391         sourceRect.left = srcRect->left / 4;
392         sourceRect.right = (srcRect->right + 3) / 4;
393         sourceRect.top = srcRect->top / 4;
394         sourceRect.bottom = (srcRect->bottom + 3) / 4;
395 
396         srcRect = &sourceRect;
397     }
398 
399     if ( dstIsCompressed )
400     {
401         colorBuffer.surface.width = (colorBuffer.surface.width + 3) /4;
402         colorBuffer.surface.height = (colorBuffer.surface.height + 3) /4;
403         dstMipWidth = (dstMipWidth + 3) / 4;
404         dstMipHeight = (dstMipHeight + 3) / 4;
405 
406         destinationRect.left = dstRect->left / 4;
407         destinationRect.right = (dstRect->right + 3) / 4;
408         destinationRect.top = dstRect->top / 4;
409         destinationRect.bottom = (dstRect->bottom + 3) / 4;
410 
411         dstRect = &destinationRect;
412     }
413 
414     switch (srcSurface->format)
415     {
416         case GX2_SURFACE_FORMAT_T_BC1_UNORM:
417         case GX2_SURFACE_FORMAT_T_BC1_SRGB:
418         case GX2_SURFACE_FORMAT_T_BC4_UNORM:
419         case GX2_SURFACE_FORMAT_T_BC4_SNORM:
420             // Use GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UINT
421             // with quater dimensions
422             texture.surface.format = GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UINT;
423             colorBuffer.surface.format = GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UINT;
424             break;
425 
426         case GX2_SURFACE_FORMAT_T_BC2_UNORM:
427         case GX2_SURFACE_FORMAT_T_BC2_SRGB:
428         case GX2_SURFACE_FORMAT_T_BC3_UNORM:
429         case GX2_SURFACE_FORMAT_T_BC3_SRGB:
430         case GX2_SURFACE_FORMAT_T_BC5_UNORM:
431         case GX2_SURFACE_FORMAT_T_BC5_SNORM:
432             // Use GX2_SURFACE_FORMAT_TC_R32_G32_B32_A32_UINT
433             // with quater dimensions
434             texture.surface.format = GX2_SURFACE_FORMAT_TC_R32_G32_B32_A32_UINT;
435             colorBuffer.surface.format = GX2_SURFACE_FORMAT_TC_R32_G32_B32_A32_UINT;
436             break;
437     }
438 
439     // Set shaders
440     GX2SetFetchShader(&fetchShader);
441     GX2SetVertexShader(g_copySurfaceShader[shaderIdx].pVertexShader);
442     GX2SetPixelShader(g_copySurfaceShader[shaderIdx].pPixelShader);
443 
444     // Set the uniforms to be used by the vertex shader
445     f32 position_base_scale[] =
446     {
447         -1.0f + 2.0f * (f32)dstRect->left / (f32)dstMipWidth,
448          1.0f - 2.0f * (f32)dstRect->top / (f32)dstMipHeight,
449          2.0f * (f32)(dstRect->right - dstRect->left) / (f32)dstMipWidth,
450         -2.0f * (f32)(dstRect->bottom - dstRect->top) / (f32)dstMipHeight,
451     };
452     f32 texCoord_base_scale[] =
453     {
454         (f32)srcRect->left / (f32)srcMipWidth,
455         (f32)srcRect->top / (f32)srcMipHeight,
456         (f32)(srcRect->right - srcRect->left) / (f32)srcMipWidth,
457         (f32)(srcRect->bottom - srcRect->top) / (f32)srcMipHeight,
458     };
459 
460     GX2Boolean bUseRects;
461     const VtxFmtF32x2 *pos_offset_data;
462     if ((dstRect->right - dstRect->left) == (srcRect->right - srcRect->left) &&
463         (dstRect->bottom - dstRect->top) == (srcRect->bottom - srcRect->top))
464     {
465         bUseRects = GX2_TRUE;
466         stretchBlt = GX2_FALSE;
467         pos_offset_data = COPY_SURFACE_RECT_POSITION_DATA;
468     }
469     else
470     {
471         bUseRects = GX2_FALSE;
472         stretchBlt = GX2_TRUE;
473         pos_offset_data = COPY_SURFACE_TRISTRIP_POSITION_DATA;
474     }
475 
476 
477     // For certain formats direct copy works better on triangle
478     // strip primitives than rectangle primitives.
479     if (srcSurface->format == dstSurface->format)
480     {
481         switch (srcSurface->format)
482         {
483             // MEM2->MEM2
484             case GX2_SURFACE_FORMAT_TC_R8_UNORM:
485             case GX2_SURFACE_FORMAT_TC_R8_SNORM:
486                 if (SRC_IN_MEM2() && DST_IN_MEM2())
487                 {
488                     bUseRects = GX2_FALSE;
489                 }
490                 break;
491 
492             // MEM1->MEM1
493             case GX2_SURFACE_FORMAT_TC_R32_G32_UINT:
494             case GX2_SURFACE_FORMAT_TC_R32_G32_SINT:
495             case GX2_SURFACE_FORMAT_TC_R32_G32_FLOAT:
496             case GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UNORM:
497             case GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UINT:
498             case GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_SNORM:
499             case GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_SINT:
500             case GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_FLOAT:
501                 if (SRC_IN_MEM1() && DST_IN_MEM1())
502                 {
503                     bUseRects = GX2_FALSE;
504                 }
505                 break;
506 
507             // MEM1->MEM1 & MEM1->MEM2
508             case GX2_SURFACE_FORMAT_TC_R32_G32_B32_A32_UINT:
509             case GX2_SURFACE_FORMAT_TC_R32_G32_B32_A32_SINT:
510             case GX2_SURFACE_FORMAT_TC_R32_G32_B32_A32_FLOAT:
511                 if (SRC_IN_MEM1())
512                 {
513                     bUseRects = GX2_FALSE;
514                 }
515                 break;
516 
517             default:
518                 break;
519         }
520 
521         if (!bUseRects)
522         {
523             // Use Triangle strips
524             pos_offset_data = COPY_SURFACE_TRISTRIP_POSITION_DATA;
525         }
526     }
527 
528     for (int i = 0; i < 4; i++)
529     {
530         f32 slice = (texture.surface.dim == GX2_SURFACE_DIM_3D) ? (srcSlice % 4) : srcSlice;
531 
532         f32 pos[] = {
533             position_base_scale[0] + position_base_scale[2] * pos_offset_data[i].texcoord[0],
534             position_base_scale[1] + position_base_scale[3] * pos_offset_data[i].texcoord[1],
535             0.0,
536             1.0
537         };
538 
539         f32 tex[] = {
540             texCoord_base_scale[0] + texCoord_base_scale[2] * pos_offset_data[i].texcoord[0],
541             texCoord_base_scale[1] + texCoord_base_scale[3] * pos_offset_data[i].texcoord[1],
542             (slice / GX2Max(1, texture.surface.depth >> srcMip)),
543             1.0
544         };
545 
546         GX2SetVertexUniformReg(g_copySurfaceShader[shaderIdx].u_positionLocation + i*4, 1*4, pos);
547         GX2SetVertexUniformReg(g_copySurfaceShader[shaderIdx].u_texcoordLocation + i*4, 1*4, tex);
548     }
549 
550     //Determine if there would be a benefit to changing the surface formats.
551     //Only change the surface format if not doing format conversion or stretching.
552     if((srcSurface->format == dstSurface->format) &&
553        (srcSurface->aa     == dstSurface->aa)     &&
554        (stretchBlt         != GX2_TRUE))
555     {
556         //We are not doing format conversion or stretching, so we can do format replacement.
557         //Don't modify the original srcSurface or dstSurface, just the copies.
558         GX2Surface *pSrcSurf = &(texture.surface);
559         GX2Surface *pDstSurf = &(colorBuffer.surface);
560 
561         u32 bitsPerPixel = GX2GetSurfaceFormatBits(pSrcSurf->format);
562         ASSERT(bitsPerPixel > 0);
563 
564         if(32 == bitsPerPixel)
565         {
566             //This can improve performance for some formats without impacting quality.
567             pSrcSurf->format = GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM;
568             pDstSurf->format = GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM;
569         }
570         else switch(pSrcSurf->format) // Change SNORM8/16 bit formats to UNORM/UINT because they
571         {                             // do not suffer from rounding errors.
572             case GX2_SURFACE_FORMAT_TC_R8_SNORM:
573                 pSrcSurf->format = pDstSurf->format = GX2_SURFACE_FORMAT_TC_R8_UNORM;
574                 break;
575 
576             case GX2_SURFACE_FORMAT_TC_R8_G8_SNORM:
577                 pSrcSurf->format = pDstSurf->format = GX2_SURFACE_FORMAT_TC_R8_G8_UNORM;
578                 break;
579 
580             case GX2_SURFACE_FORMAT_TC_R16_SNORM:
581                 pSrcSurf->format = pDstSurf->format = GX2_SURFACE_FORMAT_TC_R16_UINT;
582                 break;
583 
584             case GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_SNORM:
585                 pSrcSurf->format = pDstSurf->format = GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UINT;
586                 break;
587 
588             // Need to swizzle components to get correct results.
589             case GX2_SURFACE_FORMAT_TC_A1_B5_G5_R5_UNORM:
590                 GX2InitTextureCompSel(&texture, GX2_COMP_SEL_WZYX);
591                 break;
592 
593             // Testing showed the float to be faster and still accurate
594             case GX2_SURFACE_FORMAT_TC_R32_G32_UINT:
595             case GX2_SURFACE_FORMAT_TC_R32_G32_SINT:
596                 pSrcSurf->format = pDstSurf->format = GX2_SURFACE_FORMAT_TC_R32_G32_FLOAT;
597                 break;
598 
599             //Any format that is not T and C
600             case GX2_SURFACE_FORMAT_T_R4_G4_UNORM: //8 bit
601 
602             //32 bit formats are handled above
603 
604             case GX2_SURFACE_FORMAT_D_D32_FLOAT_S8_UINT_X24: //64 bit
605             //case GX2_SURFACE_FORMAT_T_R32_FLOAT_X8_X24: same as above
606             case GX2_SURFACE_FORMAT_T_X32_G8_UINT_X24:
607                 switch(bitsPerPixel)
608                 {
609                     case 8:
610                         pSrcSurf->format = GX2_SURFACE_FORMAT_TC_R8_UNORM;
611                         pDstSurf->format = GX2_SURFACE_FORMAT_TC_R8_UNORM;
612                         break;
613                     case 64:
614                         pSrcSurf->format = GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UINT;
615                         pDstSurf->format = GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UINT;
616                         break;
617                     default:
618                         ASSERT(0 && "Unhandled bpp");
619                 }
620                 break;
621         }
622     }
623 
624     // In-case we changed any formats
625     GX2CalcSurfaceSizeAndAlignment(&texture.surface);
626     GX2InitTextureRegs(&texture);
627     GX2CalcSurfaceSizeAndAlignment(&colorBuffer.surface);
628     GX2InitColorBufferRegs(&colorBuffer);
629     GX2SetColorBuffer(&colorBuffer, GX2_RENDER_TARGET_0);
630 
631     // This convention is used for backwards compatibility with GX2.
632     // Since we don't use viewMip, it is safe to invalidate only imagePtr
633     GX2Invalidate((GX2InvalidateType)(GX2_INVALIDATE_COLOR_BUFFER|GX2_INVALIDATE_TEXTURE), texture.surface.imagePtr, texture.surface.imageSize);
634 
635     // Setup sampler
636     if (stretchBlt)
637     {
638         GX2InitSamplerXYFilter(&g_copySurfaceSampler,
639                                GX2_TEX_XY_FILTER_BILINEAR,
640                                GX2_TEX_XY_FILTER_BILINEAR,
641                                GX2_TEX_ANISO_1_TO_1);
642     }
643     else
644     {
645         GX2InitSamplerXYFilter(&g_copySurfaceSampler,
646                                GX2_TEX_XY_FILTER_POINT,
647                                GX2_TEX_XY_FILTER_POINT,
648                                GX2_TEX_ANISO_1_TO_1);
649     }
650     // Set texture and sampler to be used by pixel shader
651     GX2SetPixelTexture(&texture, g_copySurfaceShader[shaderIdx].textureLocation);
652     GX2SetPixelSampler(&g_copySurfaceSampler, g_copySurfaceShader[shaderIdx].textureLocation);
653 
654     // Render to destination surface dimensions
655     GX2SetViewport(0, 0, (f32)dstMipWidth, (f32)dstMipHeight, 0.0f, 1.0f);
656     GX2SetScissor(0, 0, dstMipWidth, dstMipHeight);
657 
658     //Call the render function pointer
659     if (bUseRects)
660     {
661         GX2Draw(GX2_PRIMITIVE_RECTS, VERTEX_COUNT);
662     }
663     else
664     {
665         GX2Draw(GX2_PRIMITIVE_TRIANGLE_STRIP, VERTEX_COUNT);
666     }
667 
668     // Invalidate the color buffer output
669     GX2Invalidate(GX2_INVALIDATE_COLOR_BUFFER, colorBuffer.surface.imagePtr, colorBuffer.surface.imageSize);
670 
671     GX2UTDebugTagUndent();
672 }
673 
674 
675 //Setup all of the constant renderstate needed for the copy.
GX2UTSetCopyState(GX2Boolean enable)676 void GX2UTSetCopyState(GX2Boolean enable)
677 {
678     if (enable)
679     {
680         // If your application's steady state can be set to GX2UT common state
681         // using a small number of discrete GX2 calls, then customize here
682         // instead of using GX2UTSetCommonState().
683         GX2UTSetCommonState();
684 
685         // Set additional required GX2 state
686         // Disable depth test and writes
687         GX2SetDepthOnlyControl(GX2_FALSE, GX2_FALSE, GX2_COMPARE_ALWAYS);
688 
689         // GX2 default for ColorControl
690         GX2SetColorControl(GX2_LOGIC_OP_COPY, GX2_DISABLE, GX2_DISABLE, GX2_ENABLE);
691 
692         // Set Complex GX2 State for our operation
693         GX2SetSpecialState(GX2_SPECIAL_STATE_COPY, GX2_ENABLE);
694     }
695     else
696     {
697         // Disable any complex GX2 state
698         GX2SetSpecialState(GX2_SPECIAL_STATE_COPY, GX2_DISABLE);
699 
700         // The purpose of the following is to return the context to GX2 default
701         // state.  If your application uses a different "steady state", then
702         // customize
703         GX2SetDepthStencilControl(
704               GX2_TRUE,            //depthTestEnable
705               GX2_TRUE,            //depthWriteEnable
706               GX2_COMPARE_LESS,    //depthFunc
707               GX2_FALSE,           //stencilTestEnable
708               GX2_FALSE,           //backStencilEnable
709               GX2_COMPARE_ALWAYS,  //frontStencilFunc
710               GX2_STENCIL_REPLACE, //frontStencilZPass
711               GX2_STENCIL_REPLACE, //frontStencilZFail
712               GX2_STENCIL_REPLACE, //frontStencilFail
713               GX2_COMPARE_ALWAYS,  //backStencilFunc
714               GX2_STENCIL_REPLACE, //backStencilZPass
715               GX2_STENCIL_REPLACE, //backStencilZFail
716               GX2_STENCIL_REPLACE);//backStencilFail
717     }
718 }
719