/*---------------------------------------------------------------------------* Copyright 2014 Nintendo. All rights reserved. These coded instructions, statements, and computer programs contain proprietary information of Nintendo of America Inc. and/or Nintendo Company Ltd., and are protected by Federal copyright law. They may not be disclosed to third parties or copied or duplicated in any form, in whole or in part, without the prior written consent of Nintendo. *---------------------------------------------------------------------------*/ #include #include #include #if defined(WIN32) || defined(WIN64) #include #include #endif #include #include #include #include #include #include //Include generated shaders #include "shaders/headers/gx2utConvertDepth.h" #define MAX(a, b) ((a) > (b) ? (a) : (b)) // ----- GX2 Shader static const GX2VertexShader* const VS_SHADERS[] = { &gx2utConvertDepth_VS,}; static const GX2PixelShader* const PS_SHADERS[] = { &gx2utConvertDepth_PS, }; static const u32 NUM_SHADERS = 1; typedef struct _ConvertDepthShader { // These variables hold the three types of shaders needed for a call to // GX2SetShaders. The vertex and pixel shaders are loaded from the // header, but since the fetch shader is generated at run-time // it must be handled slightly differently. const GX2VertexShader *pVertexShader; const GX2PixelShader *pPixelShader; // The register locations where the offset uniforms are stored for // the pixel and vertex shaders. u32 u_positionLocation; u32 u_texcoordLocation; } ConvertDepthShader; static ConvertDepthShader g_expandDepthShader[NUM_SHADERS]; static GX2FetchShader fetchShader; #define FETCH_SHADER_SIZE 32 //hard code this value for now ALIGNVAR(GX2_SHADER_ALIGNMENT) static u8 g_GX2UTFetchShader[FETCH_SHADER_SIZE]; // ----- GX2 Texture typedef struct _VtxFmtF32x2 { f32 texcoord[2]; } VtxFmtF32x2; static const VtxFmtF32x2 CONVERT_SURFACE_RECT_POSITION_DATA[] = { {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f} }; // Same for both rect and triangle strips static const u32 VERTEX_COUNT = sizeof(CONVERT_SURFACE_RECT_POSITION_DATA) / sizeof(CONVERT_SURFACE_RECT_POSITION_DATA[0]); // Initializes how surfaces will be copied static void GX2UTConvertDepthInit() { // Setup shaders u32 i; for (i = 0; i < NUM_SHADERS; ++i) { g_expandDepthShader[i].pVertexShader = VS_SHADERS[i]; g_expandDepthShader[i].pPixelShader = PS_SHADERS[i]; GX2Invalidate(GX2_INVALIDATE_CPU_SHADER, g_expandDepthShader[i].pVertexShader->shaderPtr, g_expandDepthShader[i].pVertexShader->shaderSize); GX2NotifyMemAlloc(g_expandDepthShader[i].pVertexShader->shaderPtr, g_expandDepthShader[i].pVertexShader->shaderSize, GX2_SHADER_ALIGNMENT); GX2Invalidate(GX2_INVALIDATE_CPU_SHADER, g_expandDepthShader[i].pPixelShader->shaderPtr, g_expandDepthShader[i].pPixelShader->shaderSize); GX2NotifyMemAlloc(g_expandDepthShader[i].pPixelShader->shaderPtr, g_expandDepthShader[i].pPixelShader->shaderSize, GX2_SHADER_ALIGNMENT); // Lookup the uniform locations in the vertex shader. // The shader author chose the name "u_positions" g_expandDepthShader[i].u_positionLocation = (u32)GX2GetVertexUniformVarOffset(g_expandDepthShader[i].pVertexShader, "u_positions"); ASSERT((g_expandDepthShader[i].u_positionLocation != GX2_UNIFORM_VAR_INVALID_OFFSET) && "Couldn't find the correct vertex shader uniforms."); } ASSERT(GX2CalcFetchShaderSize(0) <= sizeof(g_GX2UTFetchShader) && "g_GX2UTFetchShader too small!\n"); GX2InitFetchShader(&fetchShader, g_GX2UTFetchShader, 0, NULL); } void GX2UTConvertDepthBufferToTextureSurfaceOp( const GX2DepthBuffer* depthBuffer, GX2Surface* dstSurface, u32 dstMip, u32 dstSlice) { static GX2Boolean initDone = GX2_FALSE; GX2ColorBuffer cb; u32 expandWidth; u32 expandHeight; u32 shaderIdx = 0; GX2SurfaceDim dim = GX2_SURFACE_DIM_2D; u32 depth = depthBuffer->surface.depth; if (depthBuffer->surface.aa == GX2_AA_MODE_1X) { if (depth > 1) dim = GX2_SURFACE_DIM_2D_ARRAY; else dim = GX2_SURFACE_DIM_2D; } else { if (depth > 1) dim = GX2_SURFACE_DIM_2D_MSAA_ARRAY; else dim = GX2_SURFACE_DIM_2D_MSAA; } GX2UTDebugTagIndent(__func__); if (initDone == GX2_FALSE) { // Initialize the resources needed to expand a depth buffer GX2UTConvertDepthInit(); initDone = GX2_TRUE; } ASSERT((depthBuffer != NULL)); // GPU7 cannot copy one aa mode to the other directly, // Use GX2UTResolveAAColorBuffer to resolve after converting. ASSERT((depthBuffer->surface.aa == dstSurface->aa)); if (depthBuffer->surface.aa == GX2_AA_MODE_1X) { GX2SetSpecialState(GX2_SPECIAL_STATE_CONVERT_DEPTH, GX2_ENABLE); } else { GX2SetSpecialState(GX2_SPECIAL_STATE_CONVERT_AADEPTH, GX2_ENABLE); } // Set shaders GX2SetFetchShader(&fetchShader); GX2SetVertexShader(g_expandDepthShader[shaderIdx].pVertexShader); GX2SetPixelShader(g_expandDepthShader[shaderIdx].pPixelShader); // Set the uniforms to be used by the vertex shader f32 position_base_scale[] = { -1.0f, 1.0f, 2.0f, -2.0f, }; for (int i = 0; i < 4; i++) { f32 pos[] = { position_base_scale[0] + position_base_scale[2] * CONVERT_SURFACE_RECT_POSITION_DATA[i].texcoord[0], position_base_scale[1] + position_base_scale[3] * CONVERT_SURFACE_RECT_POSITION_DATA[i].texcoord[1], 0.0, 1.0 }; GX2SetVertexUniformReg(g_expandDepthShader[shaderIdx].u_positionLocation + i*4, 1*4, pos); } // Copy the dimensions expandWidth = MAX(1, dstSurface->width >> dstMip); expandHeight = MAX(1, dstSurface->height >> dstMip); // Invalidate the depth buffer to guarantee all writes have been flushed if ( depthBuffer->viewMip ) GX2Invalidate(GX2_INVALIDATE_DEPTH_BUFFER, depthBuffer->surface.mipPtr, depthBuffer->surface.mipSize); else GX2Invalidate(GX2_INVALIDATE_DEPTH_BUFFER, depthBuffer->surface.imagePtr, depthBuffer->surface.imageSize); for (int slice = 0; slice < depthBuffer->viewNumSlices; slice++) { // Setup color buffer // For AA Depth Convert, we need to disable the aux color buffer // while rendering AA. This is done by using GX2_AA_MODE_1X in the // call to GX2InitColorBuffer while calling GX2SetAAMode to setup // AA correctly. GX2InitColorBuffer(&cb, expandWidth, expandHeight, dstSurface->format, GX2_AA_MODE_1X); // Use hardware slice logic instead of MipLevelPtr logic GX2InitColorBufferPtr(&cb, GX2UTGetSurfaceMipSlicePtr(dstSurface, dstMip, 0)); // // Update the slice information // // The tileMode needs to be adjusted in case the incoming surface // tileMode isn't the default. cb.surface.tileMode = GX2UTGetSurfaceMipSliceTileMode(dstSurface, dstMip); cb.surface.dim = dim; cb.surface.depth = dstSurface->depth; cb.viewFirstSlice = dstSlice + slice; cb.viewNumSlices = 1; GX2CalcSurfaceSizeAndAlignment(&cb.surface); GX2InitColorBufferRegs(&cb); GX2SetColorBuffer(&cb, GX2_RENDER_TARGET_0); GX2SetAAMode(dstSurface->aa); // Bypass cmask/fmask (aux buffer) // Set the depth buffer to resolve GX2DepthBuffer db = *depthBuffer; db.viewFirstSlice = slice; db.viewNumSlices = 1; GX2InitDepthBufferRegs(&db); GX2SetDepthBuffer(&db); // Render to destination surface dimensions GX2SetViewport(0, 0, (f32)expandWidth, (f32)expandHeight, 0.0f, 1.0f); GX2SetScissor(0, 0, expandWidth, expandHeight); // Draw a full quad that covers the display GX2Draw(GX2_PRIMITIVE_RECTS, VERTEX_COUNT); } // Invalidate the color buffer to guarantee all writes have been flushed if ( depthBuffer->viewMip ) GX2Invalidate(GX2_INVALIDATE_DEPTH_BUFFER, cb.surface.mipPtr, cb.surface.mipSize); else GX2Invalidate(GX2_INVALIDATE_DEPTH_BUFFER, cb.surface.imagePtr, cb.surface.imageSize); // Disable any complex GX2 state if (depthBuffer->surface.aa == GX2_AA_MODE_1X) { GX2SetSpecialState(GX2_SPECIAL_STATE_CONVERT_DEPTH, GX2_DISABLE); } else { GX2SetSpecialState(GX2_SPECIAL_STATE_CONVERT_AADEPTH, GX2_DISABLE); } GX2UTDebugTagUndent(); } //Setup all of the constant renderstate needed for the copy. void GX2UTSetConvertDepthState(GX2Boolean enable) { if (enable) { // If your application's steady state can be set to GX2UT common state // using a small number of discrete GX2 calls, then customize here // instead of using GX2UTSetCommonState(). GX2UTSetCommonState(); // Set additional required GX2 state GX2SetDepthStencilControl(GX2_FALSE, GX2_FALSE, GX2_COMPARE_NEVER, GX2_FALSE, GX2_TRUE, GX2_COMPARE_NEVER, GX2_STENCIL_KEEP, GX2_STENCIL_KEEP, GX2_STENCIL_KEEP, GX2_COMPARE_NEVER, GX2_STENCIL_KEEP, GX2_STENCIL_KEEP, GX2_STENCIL_KEEP); // Enable color writes for the conversion GX2SetColorControl(GX2_LOGIC_OP_COPY, GX2_DISABLE, GX2_DISABLE, GX2_ENABLE); // Special GX2 state for our operation is not set until later because // we need to know about what buffers we're being asked to operate on } else { // The purpose of the following is to return the context to GX2 default // state. If your application uses a different "steady state", then // customize GX2SetDepthStencilControl( GX2_TRUE, //depthTestEnable GX2_TRUE, //depthWriteEnable GX2_COMPARE_LESS, //depthFunc GX2_FALSE, //stencilTestEnable GX2_FALSE, //backStencilEnable GX2_COMPARE_ALWAYS, //frontStencilFunc GX2_STENCIL_REPLACE, //frontStencilZPass GX2_STENCIL_REPLACE, //frontStencilZFail GX2_STENCIL_REPLACE, //frontStencilFail GX2_COMPARE_ALWAYS, //backStencilFunc GX2_STENCIL_REPLACE, //backStencilZPass GX2_STENCIL_REPLACE, //backStencilZFail GX2_STENCIL_REPLACE);//backStencilFail } }