1 /*---------------------------------------------------------------------------*
2
3 Copyright (C) Nintendo. All rights reserved.
4
5 These coded instructions, statements, and computer programs contain
6 proprietary information of Nintendo of America Inc. and/or Nintendo
7 Company Ltd., and are protected by Federal copyright law. They may
8 not be disclosed to third parties or copied or duplicated in any form,
9 in whole or in part, without the prior written consent of Nintendo.
10
11 *---------------------------------------------------------------------------*/
12
13 #include <stdio.h>
14 #include <string.h>
15 #include <math.h>
16
17 #if defined(WIN32) || defined(WIN64)
18 #include <pc/gx2.h>
19 #include <pc/demo.h>
20 #endif
21 #include <cafe/os.h>
22 #include <cafe/mem.h>
23 #include <cafe/gx2.h>
24 #include <cafe/demo.h>
25
26 #include <cafe/gfd.h>
27 #include <cafe/gx2ut.h>
28
29 //Include generated shaders
30 #include "shaders/headers/gx2utCopySurfaceRect3D.h"
31 #include "shaders/headers/gx2utCopySurfaceRect.h"
32 #include "shaders/headers/gx2utCopySurfaceRect2xMS.h"
33 #include "shaders/headers/gx2utCopySurfaceRect4xMS.h"
34 #include "shaders/headers/gx2utCopySurfaceRect8xMS.h"
35
36 #define SURFACE_IN_MEM1(surf, mipLevel) SURFACE_IN_MEMRANGE((surf), (mipLevel), g_MEM1StartAddr, g_MEM1EndAddr)
37 #define SURFACE_IN_MEM2(surf, mipLevel) SURFACE_IN_MEMRANGE((surf), (mipLevel), g_MEM2StartAddr, g_MEM2EndAddr)
38
39 #define SRC_IN_MEM1() SURFACE_IN_MEM1(srcSurface, srcMip)
40 #define SRC_IN_MEM2() SURFACE_IN_MEM2(srcSurface, srcMip)
41 #define DST_IN_MEM1() SURFACE_IN_MEM1((const GX2Surface*)dstSurface, dstMip)
42 #define DST_IN_MEM2() SURFACE_IN_MEM2((const GX2Surface*)dstSurface, dstMip)
43
44 /* Returns true if the specified mipmap level is within the memory range
45 * specified by start and end.
46 */
SURFACE_IN_MEMRANGE(const GX2Surface * surf,u32 mipLevel,u32 start,u32 end)47 inline bool SURFACE_IN_MEMRANGE(const GX2Surface *surf,
48 u32 mipLevel, u32 start, u32 end)
49 {
50 return ((!mipLevel && (u32)(surf)->imagePtr >= (start) &&
51 (u32)(surf)->imagePtr < end) ||
52 (mipLevel && (u32)(surf)->mipPtr >= (start) &&
53 (u32)(surf)->mipPtr < end));
54 }
55
56 #define VS_SHADER_1X_INDEX 0
57 #define VS_SHADER_2X_INDEX 1
58 #define VS_SHADER_4X_INDEX 2
59 #define VS_SHADER_8X_INDEX 3
60 #define VS_SHADER_3D_INDEX 4
61 /*
62 * 0 - Simple copy shader
63 * 1 - Copy shader that reads a 2xMSAA surface and converts it to
64 a single sampled output.
65 * 2 - Copy shader that reads a 4xMSAA surface and converts it to
66 a single sampled output.
67 * 3 - Copy shader that reads a 8xMSAA surface and converts it to
68 a single sampled output.
69 * 4 - Copy shader correctly reads from 3D textures.
70 */
71 static const GX2VertexShader* const VS_SHADERS[] = { &gx2utCopySurfaceRect_VS,
72 &gx2utCopySurfaceRect2xMS_VS,
73 &gx2utCopySurfaceRect4xMS_VS,
74 &gx2utCopySurfaceRect8xMS_VS,
75 &gx2utCopySurfaceRect3D_VS};
76
77 static const GX2PixelShader* const PS_SHADERS[] = { &gx2utCopySurfaceRect_PS,
78 &gx2utCopySurfaceRect2xMS_PS,
79 &gx2utCopySurfaceRect4xMS_PS,
80 &gx2utCopySurfaceRect8xMS_PS,
81 &gx2utCopySurfaceRect3D_PS};
82
83 static const u32 NUM_SHADERS = sizeof(PS_SHADERS)/sizeof(PS_SHADERS[0]);
84
85 typedef struct _CopySurfaceShader {
86 // These variables hold the three types of shaders needed for a call to
87 // GX2SetShaders. The vertex and pixel shaders are loaded from the
88 // header, but since the fetch shader is generated at run-time
89 // it must be handled slightly differently.
90 const GX2VertexShader *pVertexShader;
91 const GX2PixelShader *pPixelShader;
92
93 // The register locations where the offset uniforms are stored for
94 // the pixel and vertex shaders.
95 u32 u_positionLocation;
96 u32 u_texcoordLocation;
97
98 // The register locations where the texture uniforms are stored for
99 // the pixel and vertex shaders.
100 u32 textureLocation;
101 } CopySurfaceShader;
102
103 static CopySurfaceShader g_copySurfaceShader[NUM_SHADERS];
104 static GX2FetchShader fetchShader;
105
106 #define FETCH_SHADER_SIZE 32 //hard code this value for now
107 ALIGNVAR(GX2_SHADER_ALIGNMENT) static u8 g_GX2UTFetchShader[FETCH_SHADER_SIZE];
108
109 // Bounds-checking for special optimizations
110 static u32 g_MEM1StartAddr = 0;
111 static u32 g_MEM1EndAddr = 0;
112 static u32 g_MEM2StartAddr = 0;
113 static u32 g_MEM2EndAddr = 0;
114
115 // ----- GX2 Texture
116
117 static GX2Sampler g_copySurfaceSampler;
118
119 typedef struct _VtxFmtF32x2 {
120 f32 texcoord[2];
121 } VtxFmtF32x2;
122
123 static const VtxFmtF32x2 COPY_SURFACE_TRISTRIP_POSITION_DATA[] =
124 {
125 {0.0f, 0.0f},
126 {1.0f, 0.0f},
127 {0.0f, 1.0f},
128 {1.0f, 1.0f}
129 };
130
131 static const VtxFmtF32x2 COPY_SURFACE_RECT_POSITION_DATA[] =
132 {
133 {0.0f, 0.0f},
134 {1.0f, 0.0f},
135 {1.0f, 1.0f},
136 {0.0f, 1.0f}
137 };
138
139 // Same for both rect and triangle strips
140 static const u32 VERTEX_COUNT = sizeof(COPY_SURFACE_TRISTRIP_POSITION_DATA)
141 / sizeof(COPY_SURFACE_TRISTRIP_POSITION_DATA[0]);
142
143 // Initializes how surfaces will be copied
GX2UTCopySurfaceRectInit()144 void GX2UTCopySurfaceRectInit()
145 {
146 // Get the Bounds of MEM1 and MEM2 for memory-specific
147 // optimizations.
148 OSGetMemBound(OSMem_MEM1, &g_MEM1StartAddr, &g_MEM1EndAddr);
149 g_MEM1EndAddr += g_MEM1StartAddr;
150 OSGetMemBound(OSMem_MEM2, &g_MEM2StartAddr, &g_MEM2EndAddr);
151 g_MEM2EndAddr += g_MEM2StartAddr;
152
153 // Setup shaders
154 u32 i;
155
156 for (i = 0; i < NUM_SHADERS; ++i)
157 {
158 g_copySurfaceShader[i].pVertexShader = VS_SHADERS[i];
159 g_copySurfaceShader[i].pPixelShader = PS_SHADERS[i];
160
161 GX2Invalidate(GX2_INVALIDATE_CPU_SHADER,
162 g_copySurfaceShader[i].pVertexShader->shaderPtr,
163 g_copySurfaceShader[i].pVertexShader->shaderSize);
164
165 GX2NotifyMemAlloc(g_copySurfaceShader[i].pVertexShader->shaderPtr,
166 g_copySurfaceShader[i].pVertexShader->shaderSize,
167 GX2_SHADER_ALIGNMENT);
168
169 GX2Invalidate(GX2_INVALIDATE_CPU_SHADER,
170 g_copySurfaceShader[i].pPixelShader->shaderPtr,
171 g_copySurfaceShader[i].pPixelShader->shaderSize);
172
173 GX2NotifyMemAlloc(g_copySurfaceShader[i].pPixelShader->shaderPtr,
174 g_copySurfaceShader[i].pPixelShader->shaderSize,
175 GX2_SHADER_ALIGNMENT);
176
177 // Lookup the uniform locations in the vertex shader.
178 // The shader author chose the names "u_positions" and "u_texCoords"
179 g_copySurfaceShader[i].u_positionLocation =
180 (u32)GX2GetVertexUniformVarOffset(g_copySurfaceShader[i].pVertexShader, "u_positions");
181 g_copySurfaceShader[i].u_texcoordLocation =
182 (u32)GX2GetVertexUniformVarOffset(g_copySurfaceShader[i].pVertexShader, "u_texCoords");
183 ASSERT((g_copySurfaceShader[i].u_positionLocation != GX2_UNIFORM_VAR_INVALID_OFFSET)
184 && (g_copySurfaceShader[i].u_texcoordLocation != GX2_UNIFORM_VAR_INVALID_OFFSET)
185 && "Couldn't find the correct vertex shader uniforms.");
186
187 // Lookup the texture locations in the vertex shaders and pixel shader.
188 // The shader author chose the names "s_texture"
189 g_copySurfaceShader[i].textureLocation =
190 (u32)GX2GetPixelSamplerVarLocation(g_copySurfaceShader[i].pPixelShader, "s_texture");
191 }
192
193 // Setup sampler
194 GX2InitSampler(&g_copySurfaceSampler,
195 GX2_TEX_CLAMP_CLAMP,
196 GX2_TEX_XY_FILTER_BILINEAR);
197 GX2InitSamplerLOD(&g_copySurfaceSampler, 0.0f, 0.0f, 0.0f);
198 GX2InitSamplerZMFilter(&g_copySurfaceSampler, GX2_TEX_Z_FILTER_USE_XY, GX2_TEX_MIP_FILTER_POINT);
199
200 ASSERT(GX2CalcFetchShaderSize(0) <= sizeof(g_GX2UTFetchShader) && "g_GX2UTFetchShader too small!\n");
201 GX2InitFetchShader(&fetchShader, g_GX2UTFetchShader, 0, NULL);
202 }
203
204 // Copies a region from one surface to a region of another surface
205 // Setting the rectangular regions to a top-left of (0,0) and a bottom-right of (width, height)
206 // will blt the entire surface without any flipping (the right and bottom are exclusive).
207 // The rect dimensions should be relative to the mipmap level dimensions, not the base level dimensions.
GX2UTCopySurfaceRectOp(const GX2Surface * srcSurface,u32 srcMip,u32 srcSlice,GX2UTRect * srcRect,GX2Surface * dstSurface,u32 dstMip,u32 dstSlice,GX2UTRect * dstRect,void * dstAuxPtr,u32 dstAuxSize)208 void GX2UTCopySurfaceRectOp(const GX2Surface *srcSurface,
209 u32 srcMip, u32 srcSlice, GX2UTRect *srcRect,
210 GX2Surface *dstSurface,
211 u32 dstMip, u32 dstSlice, GX2UTRect *dstRect,
212 void* dstAuxPtr, u32 dstAuxSize)
213 {
214 GX2Boolean stretchBlt = GX2_TRUE;
215 GX2UTRect sourceRect, destinationRect;
216 GX2Boolean srcIsCompressed;
217 GX2Boolean dstIsCompressed;
218 static GX2Boolean initDone = GX2_FALSE;
219
220 GX2UTDebugTagIndent(__func__);
221
222 if (initDone == GX2_FALSE)
223 {
224 // Initialize the resources needed to copy surfaces.
225 GX2UTCopySurfaceRectInit();
226
227 initDone = GX2_TRUE;
228 }
229
230 // blt with width or height <= 0 does nothing
231 ASSERT((srcSurface != NULL) && (srcRect != NULL) && (dstSurface != NULL) && (dstRect != NULL));
232 ASSERT((srcRect->bottom > srcRect->top) && (srcRect->right > srcRect->left) && "Invalid source region!");
233 ASSERT((dstRect->bottom > dstRect->top) && (dstRect->right > dstRect->left) && "Invalid destination region!");
234
235 // For compressed textures, we only support direct copies
236 // that are aligned on 4x4 block boundaries
237 srcIsCompressed = GX2SurfaceIsCompressed(srcSurface->format);
238 dstIsCompressed = GX2SurfaceIsCompressed(dstSurface->format);
239
240 if ( srcIsCompressed || dstIsCompressed )
241 {
242 ASSERT(srcSurface->format == dstSurface->format);
243
244 // Must be 4x4 block aligned
245 ASSERT(srcRect->top % 4 == 0);
246 ASSERT(srcRect->left % 4 == 0);
247
248 // Must be 4x4 block aligned or cover the entire texture
249 ASSERT(srcRect->right % 4 == 0 || srcRect->right == GX2Max(1, srcSurface->width >> srcMip));
250 ASSERT(srcRect->bottom % 4 == 0 || srcRect->bottom == GX2Max(1, srcSurface->height >> srcMip));
251
252 // Must be 1:1 copy
253 ASSERT((dstRect->right - dstRect->left) == (srcRect->right - srcRect->left));
254 ASSERT((dstRect->top - dstRect->bottom) == (srcRect->top - srcRect->bottom));
255 }
256
257 ASSERT(srcMip < srcSurface->numMips);
258 ASSERT(dstMip < dstSurface->numMips);
259
260 if(srcSurface->dim == GX2_SURFACE_DIM_3D)
261 {
262 ASSERT(srcSlice < GX2Max(1, srcSurface->depth >> srcMip));
263 }
264 else
265 {
266 ASSERT(srcSlice < srcSurface->depth);
267 }
268
269 if(dstSurface->dim == GX2_SURFACE_DIM_3D)
270 {
271 ASSERT(dstSlice < GX2Max(1, dstSurface->depth >> dstMip));
272 }
273 else
274 {
275 ASSERT(dstSlice < dstSurface->depth);
276 }
277
278 // MSAA Shader used to copy AA Surface
279 u32 shaderIdx = (srcSurface->dim != GX2_SURFACE_DIM_3D ? srcSurface->aa : VS_SHADER_3D_INDEX);
280
281 u32 srcMipWidth = GX2Max(1, srcSurface->width >> srcMip);
282 u32 srcMipHeight = GX2Max(1, srcSurface->height >> srcMip);
283 u32 dstMipWidth = GX2Max(1, dstSurface->width >> dstMip);
284 u32 dstMipHeight = GX2Max(1, dstSurface->height >> dstMip);
285
286 // Create shallow copy of dest surface to be used as render target
287 static GX2ColorBuffer colorBuffer;
288 colorBuffer.surface = *dstSurface;
289 colorBuffer.surface.width = (dstMip ? GX2UTRoundNearestPow2(dstMipWidth) : dstMipWidth);
290 colorBuffer.surface.height = (dstMip ? GX2UTRoundNearestPow2(dstMipHeight) : dstMipHeight);
291 if (colorBuffer.surface.dim == GX2_SURFACE_DIM_3D)
292 {
293 u32 depth = (dstMip ? GX2UTRoundNearestPow2(GX2Max(1, colorBuffer.surface.depth >> dstMip)) : colorBuffer.surface.depth);
294 colorBuffer.surface.depth = depth;
295 }
296 colorBuffer.surface.use = GX2_SURFACE_USE_COLOR_BUFFER_TEXTURE;
297
298 if ( GX2UTIsTileModeThick(dstSurface) )
299 {
300 u32 sliceRemainder = dstSlice % 4;
301 u32 sliceStart = dstSlice - sliceRemainder;
302
303 // Thick tiles are special because every 4 slices are in a single
304 // micro-tile.
305 GX2SetSurfaceSwizzle(&colorBuffer.surface, GX2UTGetSurfaceMipSliceSwizzle(&colorBuffer.surface, dstMip, sliceStart));
306 colorBuffer.surface.tileMode = GX2UTGetSurfaceMipSliceTileMode((const GX2Surface*)dstSurface, dstMip);
307 GX2InitColorBufferPtr(&colorBuffer, GX2UTGetSurfaceMipSlicePtr((const GX2Surface*)dstSurface, dstMip, sliceStart));
308
309 // Use the HW slice logic for accessing different slices in the
310 // micro-tile.
311 colorBuffer.viewFirstSlice = sliceRemainder;
312 }
313 else
314 {
315 // Non-thick micro-tiles
316 GX2SetSurfaceSwizzle(&colorBuffer.surface, GX2UTGetSurfaceMipSliceSwizzle(&colorBuffer.surface, dstMip, dstSlice));
317 colorBuffer.surface.tileMode = GX2UTGetSurfaceMipSliceTileMode((const GX2Surface*)dstSurface, dstMip);
318 GX2InitColorBufferPtr(&colorBuffer, GX2UTGetSurfaceMipSlicePtr((const GX2Surface*)dstSurface, dstMip, dstSlice));
319
320 // For all other formats just use the above code to select
321 // slices.
322 colorBuffer.viewFirstSlice = 0;
323 }
324 colorBuffer.viewMip = 0;
325 colorBuffer.viewNumSlices = 1;
326 colorBuffer.auxPtr = dstAuxPtr;
327 colorBuffer.auxSize = dstAuxSize;
328
329
330 // Create shallow copy of source surface to be used as texture
331 static GX2Texture texture;
332 texture.surface = *srcSurface;
333 srcMipWidth = (srcMip ? GX2UTRoundNearestPow2(srcMipWidth): srcMipWidth);
334 //srcMipHeight = (srcMip ? GX2UTRoundNearestPow2(srcMipHeight): srcMipHeight);
335 texture.surface.width = srcMipWidth;
336 texture.surface.height = srcMipHeight;
337 texture.surface.use = GX2_SURFACE_USE_TEXTURE;
338 if (texture.surface.dim == GX2_SURFACE_DIM_3D)
339 {
340 u32 depth = (dstMip ? GX2UTRoundNearestPow2(GX2Max(1, texture.surface.depth >> dstMip)) : texture.surface.depth);
341 texture.surface.depth = depth;
342 }
343 if(texture.surface.aa != GX2_AA_MODE_1X)
344 {
345 texture.surface.dim = GX2_SURFACE_DIM_2D_MSAA;
346 }
347
348 if ( GX2UTIsTileModeThick(srcSurface) )
349 {
350 u32 sliceRemainder = srcSlice % 4;
351 u32 sliceStart = srcSlice - sliceRemainder;
352
353 // Reset the swizzle, tile mode and pointers to directly
354 // show the surface
355 GX2SetSurfaceSwizzle(&texture.surface, GX2UTGetSurfaceMipSliceSwizzle(&texture.surface, srcMip, sliceStart));
356 texture.surface.tileMode = GX2UTGetSurfaceMipSliceTileMode((const GX2Surface*)srcSurface, srcMip);
357 GX2InitTexturePtrs(&texture, GX2UTGetSurfaceMipSlicePtr((const GX2Surface*)srcSurface, srcMip, sliceStart), NULL);
358
359 // For all other formats just use the above code to select
360 // slices.
361 texture.viewFirstSlice = sliceRemainder;
362 }
363 else
364 {
365 // Reset the swizzle, tile mode and pointers to directly
366 // show the surface
367 GX2SetSurfaceSwizzle(&texture.surface, GX2UTGetSurfaceMipSliceSwizzle(&texture.surface, srcMip, srcSlice));
368 texture.surface.tileMode = GX2UTGetSurfaceMipSliceTileMode((const GX2Surface*)srcSurface, srcMip);
369 GX2InitTexturePtrs(&texture, GX2UTGetSurfaceMipSlicePtr((const GX2Surface*)srcSurface, srcMip, srcSlice), NULL);
370
371 texture.viewFirstSlice = 0; // Slice logic was done above
372 }
373 texture.viewFirstMip = 0;
374 texture.viewNumMips = 1; // Only allow copying from srcMip
375 texture.viewNumSlices = 1;
376 texture.compSel = GX2_COMP_SEL_XYZW;
377
378 /* For compressed textures, since we want a 1:1 copy
379 * and we can't write directly to compressed textures, we will
380 * fake the format. Its expected that the source and destination
381 * formats are the same
382 */
383 if ( srcIsCompressed )
384 {
385 texture.surface.width = (texture.surface.width + 3) / 4;
386 texture.surface.height = (texture.surface.height + 3) / 4;
387 srcMipWidth = (srcMipWidth + 3) / 4;
388 srcMipHeight = (srcMipHeight + 3) / 4;
389
390 /* Adjust the rectangles accordingly */
391 sourceRect.left = srcRect->left / 4;
392 sourceRect.right = (srcRect->right + 3) / 4;
393 sourceRect.top = srcRect->top / 4;
394 sourceRect.bottom = (srcRect->bottom + 3) / 4;
395
396 srcRect = &sourceRect;
397 }
398
399 if ( dstIsCompressed )
400 {
401 colorBuffer.surface.width = (colorBuffer.surface.width + 3) /4;
402 colorBuffer.surface.height = (colorBuffer.surface.height + 3) /4;
403 dstMipWidth = (dstMipWidth + 3) / 4;
404 dstMipHeight = (dstMipHeight + 3) / 4;
405
406 destinationRect.left = dstRect->left / 4;
407 destinationRect.right = (dstRect->right + 3) / 4;
408 destinationRect.top = dstRect->top / 4;
409 destinationRect.bottom = (dstRect->bottom + 3) / 4;
410
411 dstRect = &destinationRect;
412 }
413
414 switch (srcSurface->format)
415 {
416 case GX2_SURFACE_FORMAT_T_BC1_UNORM:
417 case GX2_SURFACE_FORMAT_T_BC1_SRGB:
418 case GX2_SURFACE_FORMAT_T_BC4_UNORM:
419 case GX2_SURFACE_FORMAT_T_BC4_SNORM:
420 // Use GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UINT
421 // with quater dimensions
422 texture.surface.format = GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UINT;
423 colorBuffer.surface.format = GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UINT;
424 break;
425
426 case GX2_SURFACE_FORMAT_T_BC2_UNORM:
427 case GX2_SURFACE_FORMAT_T_BC2_SRGB:
428 case GX2_SURFACE_FORMAT_T_BC3_UNORM:
429 case GX2_SURFACE_FORMAT_T_BC3_SRGB:
430 case GX2_SURFACE_FORMAT_T_BC5_UNORM:
431 case GX2_SURFACE_FORMAT_T_BC5_SNORM:
432 // Use GX2_SURFACE_FORMAT_TC_R32_G32_B32_A32_UINT
433 // with quater dimensions
434 texture.surface.format = GX2_SURFACE_FORMAT_TC_R32_G32_B32_A32_UINT;
435 colorBuffer.surface.format = GX2_SURFACE_FORMAT_TC_R32_G32_B32_A32_UINT;
436 break;
437 }
438
439 // Set shaders
440 GX2SetFetchShader(&fetchShader);
441 GX2SetVertexShader(g_copySurfaceShader[shaderIdx].pVertexShader);
442 GX2SetPixelShader(g_copySurfaceShader[shaderIdx].pPixelShader);
443
444 // Set the uniforms to be used by the vertex shader
445 f32 position_base_scale[] =
446 {
447 -1.0f + 2.0f * (f32)dstRect->left / (f32)dstMipWidth,
448 1.0f - 2.0f * (f32)dstRect->top / (f32)dstMipHeight,
449 2.0f * (f32)(dstRect->right - dstRect->left) / (f32)dstMipWidth,
450 -2.0f * (f32)(dstRect->bottom - dstRect->top) / (f32)dstMipHeight,
451 };
452 f32 texCoord_base_scale[] =
453 {
454 (f32)srcRect->left / (f32)srcMipWidth,
455 (f32)srcRect->top / (f32)srcMipHeight,
456 (f32)(srcRect->right - srcRect->left) / (f32)srcMipWidth,
457 (f32)(srcRect->bottom - srcRect->top) / (f32)srcMipHeight,
458 };
459
460 GX2Boolean bUseRects;
461 const VtxFmtF32x2 *pos_offset_data;
462 if ((dstRect->right - dstRect->left) == (srcRect->right - srcRect->left) &&
463 (dstRect->bottom - dstRect->top) == (srcRect->bottom - srcRect->top))
464 {
465 bUseRects = GX2_TRUE;
466 stretchBlt = GX2_FALSE;
467 pos_offset_data = COPY_SURFACE_RECT_POSITION_DATA;
468 }
469 else
470 {
471 bUseRects = GX2_FALSE;
472 stretchBlt = GX2_TRUE;
473 pos_offset_data = COPY_SURFACE_TRISTRIP_POSITION_DATA;
474 }
475
476
477 // For certain formats direct copy works better on triangle
478 // strip primitives than rectangle primitives.
479 if (srcSurface->format == dstSurface->format)
480 {
481 switch (srcSurface->format)
482 {
483 // MEM2->MEM2
484 case GX2_SURFACE_FORMAT_TC_R8_UNORM:
485 case GX2_SURFACE_FORMAT_TC_R8_SNORM:
486 if (SRC_IN_MEM2() && DST_IN_MEM2())
487 {
488 bUseRects = GX2_FALSE;
489 }
490 break;
491
492 // MEM1->MEM1
493 case GX2_SURFACE_FORMAT_TC_R32_G32_UINT:
494 case GX2_SURFACE_FORMAT_TC_R32_G32_SINT:
495 case GX2_SURFACE_FORMAT_TC_R32_G32_FLOAT:
496 case GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UNORM:
497 case GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UINT:
498 case GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_SNORM:
499 case GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_SINT:
500 case GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_FLOAT:
501 if (SRC_IN_MEM1() && DST_IN_MEM1())
502 {
503 bUseRects = GX2_FALSE;
504 }
505 break;
506
507 // MEM1->MEM1 & MEM1->MEM2
508 case GX2_SURFACE_FORMAT_TC_R32_G32_B32_A32_UINT:
509 case GX2_SURFACE_FORMAT_TC_R32_G32_B32_A32_SINT:
510 case GX2_SURFACE_FORMAT_TC_R32_G32_B32_A32_FLOAT:
511 if (SRC_IN_MEM1())
512 {
513 bUseRects = GX2_FALSE;
514 }
515 break;
516
517 default:
518 break;
519 }
520
521 if (!bUseRects)
522 {
523 // Use Triangle strips
524 pos_offset_data = COPY_SURFACE_TRISTRIP_POSITION_DATA;
525 }
526 }
527
528 for (int i = 0; i < 4; i++)
529 {
530 f32 slice = (texture.surface.dim == GX2_SURFACE_DIM_3D) ? (srcSlice % 4) : srcSlice;
531
532 f32 pos[] = {
533 position_base_scale[0] + position_base_scale[2] * pos_offset_data[i].texcoord[0],
534 position_base_scale[1] + position_base_scale[3] * pos_offset_data[i].texcoord[1],
535 0.0,
536 1.0
537 };
538
539 f32 tex[] = {
540 texCoord_base_scale[0] + texCoord_base_scale[2] * pos_offset_data[i].texcoord[0],
541 texCoord_base_scale[1] + texCoord_base_scale[3] * pos_offset_data[i].texcoord[1],
542 (slice / GX2Max(1, texture.surface.depth >> srcMip)),
543 1.0
544 };
545
546 GX2SetVertexUniformReg(g_copySurfaceShader[shaderIdx].u_positionLocation + i*4, 1*4, pos);
547 GX2SetVertexUniformReg(g_copySurfaceShader[shaderIdx].u_texcoordLocation + i*4, 1*4, tex);
548 }
549
550 //Determine if there would be a benefit to changing the surface formats.
551 //Only change the surface format if not doing format conversion or stretching.
552 if((srcSurface->format == dstSurface->format) &&
553 (srcSurface->aa == dstSurface->aa) &&
554 (stretchBlt != GX2_TRUE))
555 {
556 //We are not doing format conversion or stretching, so we can do format replacement.
557 //Don't modify the original srcSurface or dstSurface, just the copies.
558 GX2Surface *pSrcSurf = &(texture.surface);
559 GX2Surface *pDstSurf = &(colorBuffer.surface);
560
561 u32 bitsPerPixel = GX2GetSurfaceFormatBits(pSrcSurf->format);
562 ASSERT(bitsPerPixel > 0);
563
564 if(32 == bitsPerPixel)
565 {
566 //This can improve performance for some formats without impacting quality.
567 pSrcSurf->format = GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM;
568 pDstSurf->format = GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM;
569 }
570 else switch(pSrcSurf->format) // Change SNORM8/16 bit formats to UNORM/UINT because they
571 { // do not suffer from rounding errors.
572 case GX2_SURFACE_FORMAT_TC_R8_SNORM:
573 pSrcSurf->format = pDstSurf->format = GX2_SURFACE_FORMAT_TC_R8_UNORM;
574 break;
575
576 case GX2_SURFACE_FORMAT_TC_R8_G8_SNORM:
577 pSrcSurf->format = pDstSurf->format = GX2_SURFACE_FORMAT_TC_R8_G8_UNORM;
578 break;
579
580 case GX2_SURFACE_FORMAT_TC_R16_SNORM:
581 pSrcSurf->format = pDstSurf->format = GX2_SURFACE_FORMAT_TC_R16_UINT;
582 break;
583
584 case GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_SNORM:
585 pSrcSurf->format = pDstSurf->format = GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UINT;
586 break;
587
588 // Need to swizzle components to get correct results.
589 case GX2_SURFACE_FORMAT_TC_A1_B5_G5_R5_UNORM:
590 GX2InitTextureCompSel(&texture, GX2_COMP_SEL_WZYX);
591 break;
592
593 // Testing showed the float to be faster and still accurate
594 case GX2_SURFACE_FORMAT_TC_R32_G32_UINT:
595 case GX2_SURFACE_FORMAT_TC_R32_G32_SINT:
596 pSrcSurf->format = pDstSurf->format = GX2_SURFACE_FORMAT_TC_R32_G32_FLOAT;
597 break;
598
599 //Any format that is not T and C
600 case GX2_SURFACE_FORMAT_T_R4_G4_UNORM: //8 bit
601
602 //32 bit formats are handled above
603
604 case GX2_SURFACE_FORMAT_D_D32_FLOAT_S8_UINT_X24: //64 bit
605 //case GX2_SURFACE_FORMAT_T_R32_FLOAT_X8_X24: same as above
606 case GX2_SURFACE_FORMAT_T_X32_G8_UINT_X24:
607 switch(bitsPerPixel)
608 {
609 case 8:
610 pSrcSurf->format = GX2_SURFACE_FORMAT_TC_R8_UNORM;
611 pDstSurf->format = GX2_SURFACE_FORMAT_TC_R8_UNORM;
612 break;
613 case 64:
614 pSrcSurf->format = GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UINT;
615 pDstSurf->format = GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UINT;
616 break;
617 default:
618 ASSERT(0 && "Unhandled bpp");
619 }
620 break;
621 }
622 }
623
624 // In-case we changed any formats
625 GX2CalcSurfaceSizeAndAlignment(&texture.surface);
626 GX2InitTextureRegs(&texture);
627 GX2CalcSurfaceSizeAndAlignment(&colorBuffer.surface);
628 GX2InitColorBufferRegs(&colorBuffer);
629 GX2SetColorBuffer(&colorBuffer, GX2_RENDER_TARGET_0);
630
631 // This convention is used for backwards compatibility with GX2.
632 // Since we don't use viewMip, it is safe to invalidate only imagePtr
633 GX2Invalidate((GX2InvalidateType)(GX2_INVALIDATE_COLOR_BUFFER|GX2_INVALIDATE_TEXTURE), texture.surface.imagePtr, texture.surface.imageSize);
634
635 // Setup sampler
636 if (stretchBlt)
637 {
638 GX2InitSamplerXYFilter(&g_copySurfaceSampler,
639 GX2_TEX_XY_FILTER_BILINEAR,
640 GX2_TEX_XY_FILTER_BILINEAR,
641 GX2_TEX_ANISO_1_TO_1);
642 }
643 else
644 {
645 GX2InitSamplerXYFilter(&g_copySurfaceSampler,
646 GX2_TEX_XY_FILTER_POINT,
647 GX2_TEX_XY_FILTER_POINT,
648 GX2_TEX_ANISO_1_TO_1);
649 }
650 // Set texture and sampler to be used by pixel shader
651 GX2SetPixelTexture(&texture, g_copySurfaceShader[shaderIdx].textureLocation);
652 GX2SetPixelSampler(&g_copySurfaceSampler, g_copySurfaceShader[shaderIdx].textureLocation);
653
654 // Render to destination surface dimensions
655 GX2SetViewport(0, 0, (f32)dstMipWidth, (f32)dstMipHeight, 0.0f, 1.0f);
656 GX2SetScissor(0, 0, dstMipWidth, dstMipHeight);
657
658 //Call the render function pointer
659 if (bUseRects)
660 {
661 GX2Draw(GX2_PRIMITIVE_RECTS, VERTEX_COUNT);
662 }
663 else
664 {
665 GX2Draw(GX2_PRIMITIVE_TRIANGLE_STRIP, VERTEX_COUNT);
666 }
667
668 // Invalidate the color buffer output
669 GX2Invalidate(GX2_INVALIDATE_COLOR_BUFFER, colorBuffer.surface.imagePtr, colorBuffer.surface.imageSize);
670
671 GX2UTDebugTagUndent();
672 }
673
674
675 //Setup all of the constant renderstate needed for the copy.
GX2UTSetCopyState(GX2Boolean enable)676 void GX2UTSetCopyState(GX2Boolean enable)
677 {
678 if (enable)
679 {
680 // If your application's steady state can be set to GX2UT common state
681 // using a small number of discrete GX2 calls, then customize here
682 // instead of using GX2UTSetCommonState().
683 GX2UTSetCommonState();
684
685 // Set additional required GX2 state
686 // Disable depth test and writes
687 GX2SetDepthOnlyControl(GX2_FALSE, GX2_FALSE, GX2_COMPARE_ALWAYS);
688
689 // GX2 default for ColorControl
690 GX2SetColorControl(GX2_LOGIC_OP_COPY, GX2_DISABLE, GX2_DISABLE, GX2_ENABLE);
691
692 // Set Complex GX2 State for our operation
693 GX2SetSpecialState(GX2_SPECIAL_STATE_COPY, GX2_ENABLE);
694 }
695 else
696 {
697 // Disable any complex GX2 state
698 GX2SetSpecialState(GX2_SPECIAL_STATE_COPY, GX2_DISABLE);
699
700 // The purpose of the following is to return the context to GX2 default
701 // state. If your application uses a different "steady state", then
702 // customize
703 GX2SetDepthStencilControl(
704 GX2_TRUE, //depthTestEnable
705 GX2_TRUE, //depthWriteEnable
706 GX2_COMPARE_LESS, //depthFunc
707 GX2_FALSE, //stencilTestEnable
708 GX2_FALSE, //backStencilEnable
709 GX2_COMPARE_ALWAYS, //frontStencilFunc
710 GX2_STENCIL_REPLACE, //frontStencilZPass
711 GX2_STENCIL_REPLACE, //frontStencilZFail
712 GX2_STENCIL_REPLACE, //frontStencilFail
713 GX2_COMPARE_ALWAYS, //backStencilFunc
714 GX2_STENCIL_REPLACE, //backStencilZPass
715 GX2_STENCIL_REPLACE, //backStencilZFail
716 GX2_STENCIL_REPLACE);//backStencilFail
717 }
718 }
719