/*---------------------------------------------------------------------------* Copyright 2010-2014 Nintendo. All rights reserved. These coded instructions, statements, and computer programs contain proprietary information of Nintendo of America Inc. and/or Nintendo Company Ltd., and are protected by Federal copyright law. They may not be disclosed to third parties or copied or duplicated in any form, in whole or in part, without the prior written consent of Nintendo. *---------------------------------------------------------------------------*/ #include #include #include #if defined(WIN32) || defined(WIN64) #include #include #endif #include #include #include #include #include #include //Include generated shaders #include "shaders/headers/gx2utCopySurfaceRect3D.h" #include "shaders/headers/gx2utCopySurfaceRect.h" #include "shaders/headers/gx2utCopySurfaceRect2xMS.h" #include "shaders/headers/gx2utCopySurfaceRect4xMS.h" #include "shaders/headers/gx2utCopySurfaceRect8xMS.h" #define SURFACE_IN_MEM1(surf, mipLevel) SURFACE_IN_MEMRANGE((surf), (mipLevel), g_MEM1StartAddr, g_MEM1EndAddr) #define SURFACE_IN_MEM2(surf, mipLevel) SURFACE_IN_MEMRANGE((surf), (mipLevel), g_MEM2StartAddr, g_MEM2EndAddr) #define SRC_IN_MEM1() SURFACE_IN_MEM1(srcSurface, srcMip) #define SRC_IN_MEM2() SURFACE_IN_MEM2(srcSurface, srcMip) #define DST_IN_MEM1() SURFACE_IN_MEM1((const GX2Surface*)dstSurface, dstMip) #define DST_IN_MEM2() SURFACE_IN_MEM2((const GX2Surface*)dstSurface, dstMip) /* Returns true if the specified mipmap level is within the memory range * specified by start and end. */ inline bool SURFACE_IN_MEMRANGE(const GX2Surface *surf, u32 mipLevel, u32 start, u32 end) { return ((!mipLevel && (u32)(surf)->imagePtr >= (start) && (u32)(surf)->imagePtr < end) || (mipLevel && (u32)(surf)->mipPtr >= (start) && (u32)(surf)->mipPtr < end)); } #define VS_SHADER_1X_INDEX 0 #define VS_SHADER_2X_INDEX 1 #define VS_SHADER_4X_INDEX 2 #define VS_SHADER_8X_INDEX 3 #define VS_SHADER_3D_INDEX 4 /* * 0 - Simple copy shader * 1 - Copy shader that reads a 2xMSAA surface and converts it to a single sampled output. * 2 - Copy shader that reads a 4xMSAA surface and converts it to a single sampled output. * 3 - Copy shader that reads a 8xMSAA surface and converts it to a single sampled output. * 4 - Copy shader correctly reads from 3D textures. */ static const GX2VertexShader* const VS_SHADERS[] = { &gx2utCopySurfaceRect_VS, &gx2utCopySurfaceRect2xMS_VS, &gx2utCopySurfaceRect4xMS_VS, &gx2utCopySurfaceRect8xMS_VS, &gx2utCopySurfaceRect3D_VS}; static const GX2PixelShader* const PS_SHADERS[] = { &gx2utCopySurfaceRect_PS, &gx2utCopySurfaceRect2xMS_PS, &gx2utCopySurfaceRect4xMS_PS, &gx2utCopySurfaceRect8xMS_PS, &gx2utCopySurfaceRect3D_PS}; static const u32 NUM_SHADERS = sizeof(PS_SHADERS)/sizeof(PS_SHADERS[0]); typedef struct _CopySurfaceShader { // These variables hold the three types of shaders needed for a call to // GX2SetShaders. The vertex and pixel shaders are loaded from the // header, but since the fetch shader is generated at run-time // it must be handled slightly differently. const GX2VertexShader *pVertexShader; const GX2PixelShader *pPixelShader; // The register locations where the offset uniforms are stored for // the pixel and vertex shaders. u32 u_positionLocation; u32 u_texcoordLocation; // The register locations where the texture uniforms are stored for // the pixel and vertex shaders. u32 textureLocation; } CopySurfaceShader; static CopySurfaceShader g_copySurfaceShader[NUM_SHADERS]; static GX2FetchShader fetchShader; #define FETCH_SHADER_SIZE 32 //hard code this value for now ALIGNVAR(GX2_SHADER_ALIGNMENT) static u8 g_GX2UTFetchShader[FETCH_SHADER_SIZE]; // Bounds-checking for special optimizations static u32 g_MEM1StartAddr = 0; static u32 g_MEM1EndAddr = 0; static u32 g_MEM2StartAddr = 0; static u32 g_MEM2EndAddr = 0; // ----- GX2 Texture static GX2Sampler g_copySurfaceSampler; typedef struct _VtxFmtF32x2 { f32 texcoord[2]; } VtxFmtF32x2; static const VtxFmtF32x2 COPY_SURFACE_TRISTRIP_POSITION_DATA[] = { {0.0f, 0.0f}, {1.0f, 0.0f}, {0.0f, 1.0f}, {1.0f, 1.0f} }; static const VtxFmtF32x2 COPY_SURFACE_RECT_POSITION_DATA[] = { {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f} }; // Same for both rect and triangle strips static const u32 VERTEX_COUNT = sizeof(COPY_SURFACE_TRISTRIP_POSITION_DATA) / sizeof(COPY_SURFACE_TRISTRIP_POSITION_DATA[0]); // Initializes how surfaces will be copied void GX2UTCopySurfaceRectInit() { // Get the Bounds of MEM1 and MEM2 for memory-specific // optimizations. OSGetMemBound(OSMem_MEM1, &g_MEM1StartAddr, &g_MEM1EndAddr); g_MEM1EndAddr += g_MEM1StartAddr; OSGetMemBound(OSMem_MEM2, &g_MEM2StartAddr, &g_MEM2EndAddr); g_MEM2EndAddr += g_MEM2StartAddr; // Setup shaders u32 i; for (i = 0; i < NUM_SHADERS; ++i) { g_copySurfaceShader[i].pVertexShader = VS_SHADERS[i]; g_copySurfaceShader[i].pPixelShader = PS_SHADERS[i]; GX2Invalidate(GX2_INVALIDATE_CPU_SHADER, g_copySurfaceShader[i].pVertexShader->shaderPtr, g_copySurfaceShader[i].pVertexShader->shaderSize); GX2NotifyMemAlloc(g_copySurfaceShader[i].pVertexShader->shaderPtr, g_copySurfaceShader[i].pVertexShader->shaderSize, GX2_SHADER_ALIGNMENT); GX2Invalidate(GX2_INVALIDATE_CPU_SHADER, g_copySurfaceShader[i].pPixelShader->shaderPtr, g_copySurfaceShader[i].pPixelShader->shaderSize); GX2NotifyMemAlloc(g_copySurfaceShader[i].pPixelShader->shaderPtr, g_copySurfaceShader[i].pPixelShader->shaderSize, GX2_SHADER_ALIGNMENT); // Lookup the uniform locations in the vertex shader. // The shader author chose the names "u_positions" and "u_texCoords" g_copySurfaceShader[i].u_positionLocation = (u32)GX2GetVertexUniformVarOffset(g_copySurfaceShader[i].pVertexShader, "u_positions"); g_copySurfaceShader[i].u_texcoordLocation = (u32)GX2GetVertexUniformVarOffset(g_copySurfaceShader[i].pVertexShader, "u_texCoords"); ASSERT((g_copySurfaceShader[i].u_positionLocation != GX2_UNIFORM_VAR_INVALID_OFFSET) && (g_copySurfaceShader[i].u_texcoordLocation != GX2_UNIFORM_VAR_INVALID_OFFSET) && "Couldn't find the correct vertex shader uniforms."); // Lookup the texture locations in the vertex shaders and pixel shader. // The shader author chose the names "s_texture" g_copySurfaceShader[i].textureLocation = (u32)GX2GetPixelSamplerVarLocation(g_copySurfaceShader[i].pPixelShader, "s_texture"); } // Setup sampler GX2InitSampler(&g_copySurfaceSampler, GX2_TEX_CLAMP_CLAMP, GX2_TEX_XY_FILTER_BILINEAR); GX2InitSamplerLOD(&g_copySurfaceSampler, 0.0f, 0.0f, 0.0f); GX2InitSamplerZMFilter(&g_copySurfaceSampler, GX2_TEX_Z_FILTER_USE_XY, GX2_TEX_MIP_FILTER_POINT); ASSERT(GX2CalcFetchShaderSize(0) <= sizeof(g_GX2UTFetchShader) && "g_GX2UTFetchShader too small!\n"); GX2InitFetchShader(&fetchShader, g_GX2UTFetchShader, 0, NULL); } // Copies a region from one surface to a region of another surface // Setting the rectangular regions to a top-left of (0,0) and a bottom-right of (width, height) // will blt the entire surface without any flipping (the right and bottom are exclusive). // The rect dimensions should be relative to the mipmap level dimensions, not the base level dimensions. void GX2UTCopySurfaceRectOp(const GX2Surface *srcSurface, u32 srcMip, u32 srcSlice, GX2UTRect *srcRect, GX2Surface *dstSurface, u32 dstMip, u32 dstSlice, GX2UTRect *dstRect, void* dstAuxPtr, u32 dstAuxSize) { GX2Boolean stretchBlt = GX2_TRUE; GX2UTRect sourceRect, destinationRect; GX2Boolean srcIsCompressed; GX2Boolean dstIsCompressed; static GX2Boolean initDone = GX2_FALSE; GX2UTDebugTagIndent(__func__); if (initDone == GX2_FALSE) { // Initialize the resources needed to copy surfaces. GX2UTCopySurfaceRectInit(); initDone = GX2_TRUE; } // blt with width or height <= 0 does nothing ASSERT((srcSurface != NULL) && (srcRect != NULL) && (dstSurface != NULL) && (dstRect != NULL)); ASSERT((srcRect->bottom > srcRect->top) && (srcRect->right > srcRect->left) && "Invalid source region!"); ASSERT((dstRect->bottom > dstRect->top) && (dstRect->right > dstRect->left) && "Invalid destination region!"); // For compressed textures, we only support direct copies // that are aligned on 4x4 block boundaries srcIsCompressed = GX2SurfaceIsCompressed(srcSurface->format); dstIsCompressed = GX2SurfaceIsCompressed(dstSurface->format); if ( srcIsCompressed || dstIsCompressed ) { ASSERT(srcSurface->format == dstSurface->format); // Must be 4x4 block aligned ASSERT(srcRect->top % 4 == 0); ASSERT(srcRect->left % 4 == 0); // Must be 4x4 block aligned or cover the entire texture ASSERT(srcRect->right % 4 == 0 || srcRect->right == GX2Max(1, srcSurface->width >> srcMip)); ASSERT(srcRect->bottom % 4 == 0 || srcRect->bottom == GX2Max(1, srcSurface->height >> srcMip)); // Must be 1:1 copy ASSERT((dstRect->right - dstRect->left) == (srcRect->right - srcRect->left)); ASSERT((dstRect->top - dstRect->bottom) == (srcRect->top - srcRect->bottom)); } ASSERT(srcMip < srcSurface->numMips); ASSERT(dstMip < dstSurface->numMips); if(srcSurface->dim == GX2_SURFACE_DIM_3D) { ASSERT(srcSlice < GX2Max(1, srcSurface->depth >> srcMip)); } else { ASSERT(srcSlice < srcSurface->depth); } if(dstSurface->dim == GX2_SURFACE_DIM_3D) { ASSERT(dstSlice < GX2Max(1, dstSurface->depth >> dstMip)); } else { ASSERT(dstSlice < dstSurface->depth); } // MSAA Shader used to copy AA Surface u32 shaderIdx = (srcSurface->dim != GX2_SURFACE_DIM_3D ? srcSurface->aa : VS_SHADER_3D_INDEX); u32 srcMipWidth = GX2Max(1, srcSurface->width >> srcMip); u32 srcMipHeight = GX2Max(1, srcSurface->height >> srcMip); u32 dstMipWidth = GX2Max(1, dstSurface->width >> dstMip); u32 dstMipHeight = GX2Max(1, dstSurface->height >> dstMip); // Create shallow copy of dest surface to be used as render target static GX2ColorBuffer colorBuffer; colorBuffer.surface = *dstSurface; colorBuffer.surface.width = (dstMip ? GX2UTRoundNearestPow2(dstMipWidth) : dstMipWidth); colorBuffer.surface.height = (dstMip ? GX2UTRoundNearestPow2(dstMipHeight) : dstMipHeight); if (colorBuffer.surface.dim == GX2_SURFACE_DIM_3D) { u32 depth = (dstMip ? GX2UTRoundNearestPow2(GX2Max(1, colorBuffer.surface.depth >> dstMip)) : colorBuffer.surface.depth); colorBuffer.surface.depth = depth; } colorBuffer.surface.use = GX2_SURFACE_USE_COLOR_BUFFER_TEXTURE; if ( GX2UTIsTileModeThick(dstSurface) ) { u32 sliceRemainder = dstSlice % 4; u32 sliceStart = dstSlice - sliceRemainder; // Thick tiles are special because every 4 slices are in a single // micro-tile. GX2SetSurfaceSwizzle(&colorBuffer.surface, GX2UTGetSurfaceMipSliceSwizzle(&colorBuffer.surface, dstMip, sliceStart)); colorBuffer.surface.tileMode = GX2UTGetSurfaceMipSliceTileMode((const GX2Surface*)dstSurface, dstMip); GX2InitColorBufferPtr(&colorBuffer, GX2UTGetSurfaceMipSlicePtr((const GX2Surface*)dstSurface, dstMip, sliceStart)); // Use the HW slice logic for accessing different slices in the // micro-tile. colorBuffer.viewFirstSlice = sliceRemainder; } else { // Non-thick micro-tiles GX2SetSurfaceSwizzle(&colorBuffer.surface, GX2UTGetSurfaceMipSliceSwizzle(&colorBuffer.surface, dstMip, dstSlice)); colorBuffer.surface.tileMode = GX2UTGetSurfaceMipSliceTileMode((const GX2Surface*)dstSurface, dstMip); GX2InitColorBufferPtr(&colorBuffer, GX2UTGetSurfaceMipSlicePtr((const GX2Surface*)dstSurface, dstMip, dstSlice)); // For all other formats just use the above code to select // slices. colorBuffer.viewFirstSlice = 0; } colorBuffer.viewMip = 0; colorBuffer.viewNumSlices = 1; colorBuffer.auxPtr = dstAuxPtr; colorBuffer.auxSize = dstAuxSize; // Create shallow copy of source surface to be used as texture static GX2Texture texture; texture.surface = *srcSurface; srcMipWidth = (srcMip ? GX2UTRoundNearestPow2(srcMipWidth): srcMipWidth); //srcMipHeight = (srcMip ? GX2UTRoundNearestPow2(srcMipHeight): srcMipHeight); texture.surface.width = srcMipWidth; texture.surface.height = srcMipHeight; texture.surface.use = GX2_SURFACE_USE_TEXTURE; if (texture.surface.dim == GX2_SURFACE_DIM_3D) { u32 depth = (dstMip ? GX2UTRoundNearestPow2(GX2Max(1, texture.surface.depth >> dstMip)) : texture.surface.depth); texture.surface.depth = depth; } if(texture.surface.aa != GX2_AA_MODE_1X) { texture.surface.dim = GX2_SURFACE_DIM_2D_MSAA; } if ( GX2UTIsTileModeThick(srcSurface) ) { u32 sliceRemainder = srcSlice % 4; u32 sliceStart = srcSlice - sliceRemainder; // Reset the swizzle, tile mode and pointers to directly // show the surface GX2SetSurfaceSwizzle(&texture.surface, GX2UTGetSurfaceMipSliceSwizzle(&texture.surface, srcMip, sliceStart)); texture.surface.tileMode = GX2UTGetSurfaceMipSliceTileMode((const GX2Surface*)srcSurface, srcMip); GX2InitTexturePtrs(&texture, GX2UTGetSurfaceMipSlicePtr((const GX2Surface*)srcSurface, srcMip, sliceStart), NULL); // For all other formats just use the above code to select // slices. texture.viewFirstSlice = sliceRemainder; } else { // Reset the swizzle, tile mode and pointers to directly // show the surface GX2SetSurfaceSwizzle(&texture.surface, GX2UTGetSurfaceMipSliceSwizzle(&texture.surface, srcMip, srcSlice)); texture.surface.tileMode = GX2UTGetSurfaceMipSliceTileMode((const GX2Surface*)srcSurface, srcMip); GX2InitTexturePtrs(&texture, GX2UTGetSurfaceMipSlicePtr((const GX2Surface*)srcSurface, srcMip, srcSlice), NULL); texture.viewFirstSlice = 0; // Slice logic was done above } texture.viewFirstMip = 0; texture.viewNumMips = 1; // Only allow copying from srcMip texture.viewNumSlices = 1; texture.compSel = GX2_COMP_SEL_XYZW; /* For compressed textures, since we want a 1:1 copy * and we can't write directly to compressed textures, we will * fake the format. Its expected that the source and destination * formats are the same */ if ( srcIsCompressed ) { texture.surface.width = (texture.surface.width + 3) / 4; texture.surface.height = (texture.surface.height + 3) / 4; srcMipWidth = (srcMipWidth + 3) / 4; srcMipHeight = (srcMipHeight + 3) / 4; /* Adjust the rectangles accordingly */ sourceRect.left = srcRect->left / 4; sourceRect.right = (srcRect->right + 3) / 4; sourceRect.top = srcRect->top / 4; sourceRect.bottom = (srcRect->bottom + 3) / 4; srcRect = &sourceRect; } if ( dstIsCompressed ) { colorBuffer.surface.width = (colorBuffer.surface.width + 3) /4; colorBuffer.surface.height = (colorBuffer.surface.height + 3) /4; dstMipWidth = (dstMipWidth + 3) / 4; dstMipHeight = (dstMipHeight + 3) / 4; destinationRect.left = dstRect->left / 4; destinationRect.right = (dstRect->right + 3) / 4; destinationRect.top = dstRect->top / 4; destinationRect.bottom = (dstRect->bottom + 3) / 4; dstRect = &destinationRect; } switch (srcSurface->format) { case GX2_SURFACE_FORMAT_T_BC1_UNORM: case GX2_SURFACE_FORMAT_T_BC1_SRGB: case GX2_SURFACE_FORMAT_T_BC4_UNORM: case GX2_SURFACE_FORMAT_T_BC4_SNORM: // Use GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UINT // with quater dimensions texture.surface.format = GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UINT; colorBuffer.surface.format = GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UINT; break; case GX2_SURFACE_FORMAT_T_BC2_UNORM: case GX2_SURFACE_FORMAT_T_BC2_SRGB: case GX2_SURFACE_FORMAT_T_BC3_UNORM: case GX2_SURFACE_FORMAT_T_BC3_SRGB: case GX2_SURFACE_FORMAT_T_BC5_UNORM: case GX2_SURFACE_FORMAT_T_BC5_SNORM: // Use GX2_SURFACE_FORMAT_TC_R32_G32_B32_A32_UINT // with quater dimensions texture.surface.format = GX2_SURFACE_FORMAT_TC_R32_G32_B32_A32_UINT; colorBuffer.surface.format = GX2_SURFACE_FORMAT_TC_R32_G32_B32_A32_UINT; break; } // Set shaders GX2SetFetchShader(&fetchShader); GX2SetVertexShader(g_copySurfaceShader[shaderIdx].pVertexShader); GX2SetPixelShader(g_copySurfaceShader[shaderIdx].pPixelShader); // Set the uniforms to be used by the vertex shader f32 position_base_scale[] = { -1.0f + 2.0f * (f32)dstRect->left / (f32)dstMipWidth, 1.0f - 2.0f * (f32)dstRect->top / (f32)dstMipHeight, 2.0f * (f32)(dstRect->right - dstRect->left) / (f32)dstMipWidth, -2.0f * (f32)(dstRect->bottom - dstRect->top) / (f32)dstMipHeight, }; f32 texCoord_base_scale[] = { (f32)srcRect->left / (f32)srcMipWidth, (f32)srcRect->top / (f32)srcMipHeight, (f32)(srcRect->right - srcRect->left) / (f32)srcMipWidth, (f32)(srcRect->bottom - srcRect->top) / (f32)srcMipHeight, }; GX2Boolean bUseRects; const VtxFmtF32x2 *pos_offset_data; if ((dstRect->right - dstRect->left) == (srcRect->right - srcRect->left) && (dstRect->bottom - dstRect->top) == (srcRect->bottom - srcRect->top)) { bUseRects = GX2_TRUE; stretchBlt = GX2_FALSE; pos_offset_data = COPY_SURFACE_RECT_POSITION_DATA; } else { bUseRects = GX2_FALSE; stretchBlt = GX2_TRUE; pos_offset_data = COPY_SURFACE_TRISTRIP_POSITION_DATA; } // For certain formats direct copy works better on triangle // strip primitives than rectangle primitives. if (srcSurface->format == dstSurface->format) { switch (srcSurface->format) { // MEM2->MEM2 case GX2_SURFACE_FORMAT_TC_R8_UNORM: case GX2_SURFACE_FORMAT_TC_R8_SNORM: if (SRC_IN_MEM2() && DST_IN_MEM2()) { bUseRects = GX2_FALSE; } break; // MEM1->MEM1 case GX2_SURFACE_FORMAT_TC_R32_G32_UINT: case GX2_SURFACE_FORMAT_TC_R32_G32_SINT: case GX2_SURFACE_FORMAT_TC_R32_G32_FLOAT: case GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UNORM: case GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UINT: case GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_SNORM: case GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_SINT: case GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_FLOAT: if (SRC_IN_MEM1() && DST_IN_MEM1()) { bUseRects = GX2_FALSE; } break; // MEM1->MEM1 & MEM1->MEM2 case GX2_SURFACE_FORMAT_TC_R32_G32_B32_A32_UINT: case GX2_SURFACE_FORMAT_TC_R32_G32_B32_A32_SINT: case GX2_SURFACE_FORMAT_TC_R32_G32_B32_A32_FLOAT: if (SRC_IN_MEM1()) { bUseRects = GX2_FALSE; } break; default: break; } if (!bUseRects) { // Use Triangle strips pos_offset_data = COPY_SURFACE_TRISTRIP_POSITION_DATA; } } for (int i = 0; i < 4; i++) { f32 slice = (texture.surface.dim == GX2_SURFACE_DIM_3D) ? (srcSlice % 4) : srcSlice; f32 pos[] = { position_base_scale[0] + position_base_scale[2] * pos_offset_data[i].texcoord[0], position_base_scale[1] + position_base_scale[3] * pos_offset_data[i].texcoord[1], 0.0, 1.0 }; f32 tex[] = { texCoord_base_scale[0] + texCoord_base_scale[2] * pos_offset_data[i].texcoord[0], texCoord_base_scale[1] + texCoord_base_scale[3] * pos_offset_data[i].texcoord[1], (slice / GX2Max(1, texture.surface.depth >> srcMip)), 1.0 }; GX2SetVertexUniformReg(g_copySurfaceShader[shaderIdx].u_positionLocation + i*4, 1*4, pos); GX2SetVertexUniformReg(g_copySurfaceShader[shaderIdx].u_texcoordLocation + i*4, 1*4, tex); } //Determine if there would be a benefit to changing the surface formats. //Only change the surface format if not doing format conversion or stretching. if((srcSurface->format == dstSurface->format) && (srcSurface->aa == dstSurface->aa) && (stretchBlt != GX2_TRUE)) { //We are not doing format conversion or stretching, so we can do format replacement. //Don't modify the original srcSurface or dstSurface, just the copies. GX2Surface *pSrcSurf = &(texture.surface); GX2Surface *pDstSurf = &(colorBuffer.surface); u32 bitsPerPixel = GX2GetSurfaceFormatBits(pSrcSurf->format); ASSERT(bitsPerPixel > 0); if(32 == bitsPerPixel) { //This can improve performance for some formats without impacting quality. pSrcSurf->format = GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM; pDstSurf->format = GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM; } else switch(pSrcSurf->format) // Change SNORM8/16 bit formats to UNORM/UINT because they { // do not suffer from rounding errors. case GX2_SURFACE_FORMAT_TC_R8_SNORM: pSrcSurf->format = pDstSurf->format = GX2_SURFACE_FORMAT_TC_R8_UNORM; break; case GX2_SURFACE_FORMAT_TC_R8_G8_SNORM: pSrcSurf->format = pDstSurf->format = GX2_SURFACE_FORMAT_TC_R8_G8_UNORM; break; case GX2_SURFACE_FORMAT_TC_R16_SNORM: pSrcSurf->format = pDstSurf->format = GX2_SURFACE_FORMAT_TC_R16_UINT; break; case GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_SNORM: pSrcSurf->format = pDstSurf->format = GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UINT; break; // Need to swizzle components to get correct results. case GX2_SURFACE_FORMAT_TC_A1_B5_G5_R5_UNORM: GX2InitTextureCompSel(&texture, GX2_COMP_SEL_WZYX); break; // Testing showed the float to be faster and still accurate case GX2_SURFACE_FORMAT_TC_R32_G32_UINT: case GX2_SURFACE_FORMAT_TC_R32_G32_SINT: pSrcSurf->format = pDstSurf->format = GX2_SURFACE_FORMAT_TC_R32_G32_FLOAT; break; //Any format that is not T and C case GX2_SURFACE_FORMAT_T_R4_G4_UNORM: //8 bit //32 bit formats are handled above case GX2_SURFACE_FORMAT_D_D32_FLOAT_S8_UINT_X24: //64 bit //case GX2_SURFACE_FORMAT_T_R32_FLOAT_X8_X24: same as above case GX2_SURFACE_FORMAT_T_X32_G8_UINT_X24: switch(bitsPerPixel) { case 8: pSrcSurf->format = GX2_SURFACE_FORMAT_TC_R8_UNORM; pDstSurf->format = GX2_SURFACE_FORMAT_TC_R8_UNORM; break; case 64: pSrcSurf->format = GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UINT; pDstSurf->format = GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UINT; break; default: ASSERT(0 && "Unhandled bpp"); } break; } } // In-case we changed any formats GX2CalcSurfaceSizeAndAlignment(&texture.surface); GX2InitTextureRegs(&texture); GX2CalcSurfaceSizeAndAlignment(&colorBuffer.surface); GX2InitColorBufferRegs(&colorBuffer); GX2SetColorBuffer(&colorBuffer, GX2_RENDER_TARGET_0); // This convention is used for backwards compatibility with GX2. // Since we don't use viewMip, it is safe to invalidate only imagePtr GX2Invalidate((GX2InvalidateType)(GX2_INVALIDATE_COLOR_BUFFER|GX2_INVALIDATE_TEXTURE), texture.surface.imagePtr, texture.surface.imageSize); // Setup sampler if (stretchBlt) { GX2InitSamplerXYFilter(&g_copySurfaceSampler, GX2_TEX_XY_FILTER_BILINEAR, GX2_TEX_XY_FILTER_BILINEAR, GX2_TEX_ANISO_1_TO_1); } else { GX2InitSamplerXYFilter(&g_copySurfaceSampler, GX2_TEX_XY_FILTER_POINT, GX2_TEX_XY_FILTER_POINT, GX2_TEX_ANISO_1_TO_1); } // Set texture and sampler to be used by pixel shader GX2SetPixelTexture(&texture, g_copySurfaceShader[shaderIdx].textureLocation); GX2SetPixelSampler(&g_copySurfaceSampler, g_copySurfaceShader[shaderIdx].textureLocation); // Render to destination surface dimensions GX2SetViewport(0, 0, (f32)dstMipWidth, (f32)dstMipHeight, 0.0f, 1.0f); GX2SetScissor(0, 0, dstMipWidth, dstMipHeight); //Call the render function pointer if (bUseRects) { GX2Draw(GX2_PRIMITIVE_RECTS, VERTEX_COUNT); } else { GX2Draw(GX2_PRIMITIVE_TRIANGLE_STRIP, VERTEX_COUNT); } // Invalidate the color buffer output GX2Invalidate(GX2_INVALIDATE_COLOR_BUFFER, colorBuffer.surface.imagePtr, colorBuffer.surface.imageSize); GX2UTDebugTagUndent(); } //Setup all of the constant renderstate needed for the copy. void GX2UTSetCopyState(GX2Boolean enable) { if (enable) { // If your application's steady state can be set to GX2UT common state // using a small number of discrete GX2 calls, then customize here // instead of using GX2UTSetCommonState(). GX2UTSetCommonState(); // Set additional required GX2 state // Disable depth test and writes GX2SetDepthOnlyControl(GX2_FALSE, GX2_FALSE, GX2_COMPARE_ALWAYS); // GX2 default for ColorControl GX2SetColorControl(GX2_LOGIC_OP_COPY, GX2_DISABLE, GX2_DISABLE, GX2_ENABLE); // Set Complex GX2 State for our operation GX2SetSpecialState(GX2_SPECIAL_STATE_COPY, GX2_ENABLE); } else { // Disable any complex GX2 state GX2SetSpecialState(GX2_SPECIAL_STATE_COPY, GX2_DISABLE); // The purpose of the following is to return the context to GX2 default // state. If your application uses a different "steady state", then // customize GX2SetDepthStencilControl( GX2_TRUE, //depthTestEnable GX2_TRUE, //depthWriteEnable GX2_COMPARE_LESS, //depthFunc GX2_FALSE, //stencilTestEnable GX2_FALSE, //backStencilEnable GX2_COMPARE_ALWAYS, //frontStencilFunc GX2_STENCIL_REPLACE, //frontStencilZPass GX2_STENCIL_REPLACE, //frontStencilZFail GX2_STENCIL_REPLACE, //frontStencilFail GX2_COMPARE_ALWAYS, //backStencilFunc GX2_STENCIL_REPLACE, //backStencilZPass GX2_STENCIL_REPLACE, //backStencilZFail GX2_STENCIL_REPLACE);//backStencilFail } }