/*---------------------------------------------------------------------------* Copyright 2010-2014 Nintendo. All rights reserved. These coded instructions, statements, and computer programs contain proprietary information of Nintendo of America Inc. and/or Nintendo Company Ltd., and are protected by Federal copyright law. They may not be disclosed to third parties or copied or duplicated in any form, in whole or in part, without the prior written consent of Nintendo. *---------------------------------------------------------------------------*/ #include #include #include #if defined(WIN32) || defined(WIN64) #include #include #endif #include #include #include #include #include #include //Include generated shaders #include "shaders/headers/gx2utClearSurfaceRect.h" /* * 0 - Simple clear shader */ static const GX2VertexShader* const VS_SHADERS[] = { &gx2utClearSurfaceRect_VS}; static const GX2PixelShader* const PS_SHADERS[] = { &gx2utClearSurfaceRect_PS}; static const u32 NUM_SHADERS = 1; typedef struct _ClearSurfaceShader { // These variables hold the three types of shaders needed for a call to // GX2SetShaders. The vertex and pixel shaders are loaded from the // header, but since the fetch shader is generated at run-time // it must be handled slightly differently. const GX2VertexShader *pVertexShader; const GX2PixelShader *pPixelShader; // The register locations where the offset uniforms are stored for // the pixel and vertex shaders. u32 u_positionLocation; u32 u_clearColorLocation; } ClearSurfaceShader; //For now, share one fetch shader buffer for all shaders, since it should be identical #define FETCH_SHADER_SIZE 32 //hard code this value for now ALIGNVAR(GX2_SHADER_ALIGNMENT) static u8 g_GX2UTFetchShader[FETCH_SHADER_SIZE]; static ClearSurfaceShader g_clearSurfaceShader[NUM_SHADERS]; static GX2FetchShader fetchShader; typedef struct _VtxFmtF32x2 { f32 position[2]; } VtxFmtF32x2; // This data works for both RECT and TRISTRIP static const VtxFmtF32x2 CLEAR_SURFACE_RECT_POSITION_DATA[] = { {0.0f, 0.0f}, {1.0f, 0.0f}, {0.0f, 1.0f}, {1.0f, 1.0f} }; static const u32 VERTEX_COUNT = sizeof(CLEAR_SURFACE_RECT_POSITION_DATA) / sizeof(CLEAR_SURFACE_RECT_POSITION_DATA[0]); // Initializes how surfaces will be copied void GX2UTClearSurfaceRectInit(void) { static GX2Boolean initDone = GX2_FALSE; if (initDone == GX2_TRUE) { //OSReport("Skipping init in GX2UTClearSurfaceRectInit\n"); return; } // Setup shaders u32 i; GX2NotifyMemAlloc(g_GX2UTFetchShader, FETCH_SHADER_SIZE, GX2_SHADER_ALIGNMENT); for (i = 0; i < NUM_SHADERS; ++i) { g_clearSurfaceShader[i].pVertexShader = VS_SHADERS[i]; g_clearSurfaceShader[i].pPixelShader = PS_SHADERS[i]; GX2Invalidate(GX2_INVALIDATE_CPU_SHADER, g_clearSurfaceShader[i].pVertexShader->shaderPtr, g_clearSurfaceShader[i].pVertexShader->shaderSize); GX2NotifyMemAlloc(g_clearSurfaceShader[i].pVertexShader->shaderPtr, g_clearSurfaceShader[i].pVertexShader->shaderSize, GX2_SHADER_ALIGNMENT); GX2Invalidate(GX2_INVALIDATE_CPU_SHADER, g_clearSurfaceShader[i].pPixelShader->shaderPtr, g_clearSurfaceShader[i].pPixelShader->shaderSize); GX2NotifyMemAlloc(g_clearSurfaceShader[i].pPixelShader->shaderPtr, g_clearSurfaceShader[i].pPixelShader->shaderSize, GX2_SHADER_ALIGNMENT); // Lookup the uniform locations in the vertex shader and pixel shader. // The shader author chose the names "u_positions", "u_depth", and "u_clearColor" g_clearSurfaceShader[i].u_positionLocation = (u32)GX2GetVertexUniformVarOffset(g_clearSurfaceShader[i].pVertexShader, "u_positions"); g_clearSurfaceShader[i].u_clearColorLocation = (u32)GX2GetPixelUniformVarOffset(g_clearSurfaceShader[i].pPixelShader, "u_clearColor"); ASSERT((g_clearSurfaceShader[i].u_positionLocation != GX2_UNIFORM_VAR_INVALID_OFFSET) && (g_clearSurfaceShader[i].u_clearColorLocation != GX2_UNIFORM_VAR_INVALID_OFFSET) && "Couldn't find the correct vertex and pixel shader uniforms."); } ASSERT(GX2CalcFetchShaderSize(0) <= sizeof(g_GX2UTFetchShader) && "g_GX2UTFetchShader too small!\n"); GX2InitFetchShader(&fetchShader, g_GX2UTFetchShader, 0, NULL); initDone = GX2_TRUE; } /// returns the bits of a floating point value as an unsigned integer static u32 FloatToBits(f32 f) { union { f32 f; u32 u; } converter; converter.f = f; return converter.u; } /// returns the bits of a floating point value as an unsigned integer static f32 BitsToFloat(u32 u) { union { f32 f; u32 u; } converter; converter.u = u; return converter.f; } /******************************************************************************* * ConvertFP32ToSmallFP * * @brief * Converts 32 bit floating point value to one with the supplied * representation. Code assume the dest format follows representation * similar to the fp32 IEEE format. * * @return * Converted value as an unsigned integer. *******************************************************************************/ u32 ConvertFP32ToSmallFP( f32 fp32, ///< Original fp32 value BOOL signBit, ///< Sign bit in destination format u32 expBits, ///< Exponent bits in destination format u32 mantBits) ///< Mantissa bits in destination format { u32 uiFp32, fp32Sign, fp32Exp, fp32Mant, fp32MantBits, fp32Bias; u32 maxExp, bias; u32 outFp; // Extract relevant values from input value uiFp32 = FloatToBits(fp32); fp32Sign = (uiFp32 & 0x80000000) >> 31; fp32Exp = (uiFp32 & 0x7F800000) >> 23; fp32Mant = uiFp32 & 0x007FFFFF; fp32MantBits = 23; fp32Bias = 127; // Compute exponent bias for destination format. This is also the max positive (and negative) // unbiased exponents in the format. bias = (1 << (expBits - 1)) - 1; // Compute max exponent reserved for NaN and Infs maxExp = (1 << expBits) - 1; if (fp32Exp == 0xff) { // Handle NaNs and Infs first. According to the DX10 spec these get converted to NaNs // and Infs in the lower precision format when available, otherwise they go to 0. We // assume the destination format has representations for NaNs and Infs except for when // there is no sign bit to represent signed NaN and Inf. if ((signBit == TRUE) || (fp32Sign == 0)) { outFp = (fp32Sign << (expBits + mantBits)) | (maxExp << mantBits) | (fp32Mant >> (fp32MantBits - mantBits)); } else { outFp = 0; } } else if ((signBit == FALSE) && (fp32Sign == 1)) { // Negative numbers go to zero if they can't be represented outFp = 0; } else if (fp32Exp > (fp32Bias + bias)) { // Too large to be represented in the destination format are made into signed MAX_FLOAT. outFp = (fp32Sign << (expBits + mantBits)) | ((maxExp - 1) << mantBits) | ((1 << mantBits) - 1); } else if (fp32Exp < (fp32Bias - (bias - 1))) { // Too small to be represented as a normalized number or it's zero u32 shift; // Shift amount is the difference between the fp32 exponent and the the minimum // exponent in the dest format. shift = fp32Bias - (bias - 1) - fp32Exp; // Large enough shifts will generate 0 if (shift > (fp32MantBits + 1)) { fp32Mant = 0; } else { // Add in hidden bit and right shift to align to new format fp32Mant = (fp32Mant | 0x00800000) >> (fp32MantBits - mantBits); fp32Mant = fp32Mant >> shift; } outFp = (fp32Sign << (expBits + mantBits)) | fp32Mant; } else { // Can be represented as a normalized number in the new format outFp = (fp32Sign << (expBits + mantBits)) | ((fp32Exp + bias - fp32Bias) << mantBits) | (fp32Mant >> (fp32MantBits - mantBits)); } // Sanity check ASSERT((outFp & ~((1 << (signBit + expBits + mantBits)) - 1)) == 0x0); return outFp; } u32 ConvertFP32ToUnorm( f32 fp32, ///< fp32 value to convert u32 numBits) ///< number of bits in destination unorm { u32 uiFp32, out, maxVal; u32 fp32Sign, fp32Exp, fp32Mant; // Extract relevant floating point parts uiFp32 = FloatToBits(fp32); fp32Sign = (uiFp32 & 0x80000000) >> 31; fp32Exp = (uiFp32 & 0x7F800000) >> 23; fp32Mant = uiFp32 & 0x007FFFFF; // Maximum representable unorm maxVal = (1 << numBits) - 1; // Handle NaNs and Infs values separately if (fp32Exp == 0xff) { // Nans and -Inf go to 0 if ((fp32Mant != 0x0) || (fp32Sign == 1)) { out = 0; } else { // +Inf goes to max representable value out = maxVal; } } else if (fp32 > 1.0f) { out = maxVal; } else if (fp32 < 0.0f) { out = 0; } else { out = static_cast((fp32 * maxVal) + 0.5f); } ASSERT(out <= maxVal); return out; } /******************************************************************************* * PackClearColor * * @brief * Pack the clear color for the given format into a 32 bit quantity. * * @return * Packed 32-bit clear value. *******************************************************************************/ u32 PackClearColor(f32 r, f32 g, f32 b, f32 a, ///< Clear color GX2SurfaceFormat format) ///< Color format { u32 clearColor = 0; if ((format == GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM)) { u32 red, green, blue, alpha; red = ConvertFP32ToUnorm(r, 8); green = ConvertFP32ToUnorm(g, 8); blue = ConvertFP32ToUnorm(b, 8); alpha = ConvertFP32ToUnorm(a, 8); clearColor = (alpha << 24) | (blue << 16) | (green << 8) | red; } else if ((format == GX2_SURFACE_FORMAT_TCS_R10_G10_B10_A2_UNORM) || (format == GX2_SURFACE_FORMAT_TCS_A2_B10_G10_R10_UNORM)) { u32 red, green, blue, alpha; red = ConvertFP32ToUnorm(r, 10); green = ConvertFP32ToUnorm(g, 10); blue = ConvertFP32ToUnorm(b, 10); alpha = ConvertFP32ToUnorm(a, 2); if (format == GX2_SURFACE_FORMAT_TCS_R10_G10_B10_A2_UNORM) { clearColor = (alpha << 30) | (blue << 20) | (green << 10) | red; } else if (format == GX2_SURFACE_FORMAT_TCS_A2_B10_G10_R10_UNORM) { clearColor = (blue << 22) | (green << 12) | (red << 2) | alpha; } } else if (format == GX2_SURFACE_FORMAT_TC_R11_G11_B10_FLOAT) { u32 redFP11, greenFP11, blueFP10; redFP11 = ConvertFP32ToSmallFP(r, FALSE, 5, 6); greenFP11 = ConvertFP32ToSmallFP(g, FALSE, 5, 6); blueFP10 = ConvertFP32ToSmallFP(b, FALSE, 5, 5); clearColor = (blueFP10 << 22) | (greenFP11 << 11) | redFP11; } else if (format == GX2_SURFACE_FORMAT_TC_R16_G16_FLOAT) { u32 redFP16, greenFP16; redFP16 = ConvertFP32ToSmallFP(r, GX2_TRUE, 5, 10); greenFP16 = ConvertFP32ToSmallFP(g, GX2_TRUE, 5, 10); clearColor = (greenFP16 << 16) | (redFP16); } else { // Not implemented yet. ASSERT(FALSE); } return clearColor; } // Identify CB formats that can benefit by being cleared with the DB GX2Boolean ClearUsingDB(GX2ColorBuffer *colorBuffer, GX2UTRect *dstRect) { GX2Surface *dstSurface = &colorBuffer->surface; u32 dstMip = colorBuffer->viewMip; // Can't apply this optimization unless we are clearing the entire surface if ((dstRect->left != 0) || (dstRect->top != 0) || (dstRect->right != dstSurface->width >> dstMip) || (dstRect->bottom != dstSurface->height >> dstMip)) { return GX2_FALSE; } else { switch (dstSurface->format) { case GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM: case GX2_SURFACE_FORMAT_TCS_R10_G10_B10_A2_UNORM: case GX2_SURFACE_FORMAT_TCS_A2_B10_G10_R10_UNORM: case GX2_SURFACE_FORMAT_TC_R11_G11_B10_FLOAT: case GX2_SURFACE_FORMAT_TC_R16_G16_FLOAT: case GX2_SURFACE_FORMAT_TCD_R32_FLOAT: break; default: return GX2_FALSE; // NO } } switch (dstSurface->tileMode) { case GX2_TILE_MODE_1D_TILED_THIN1: case GX2_TILE_MODE_2D_TILED_THIN1: case GX2_TILE_MODE_2D_TILED_THIN2: case GX2_TILE_MODE_2D_TILED_THIN4: case GX2_TILE_MODE_2B_TILED_THIN1: case GX2_TILE_MODE_2B_TILED_THIN2: case GX2_TILE_MODE_2B_TILED_THIN4: break; default: return GX2_FALSE; } if (dstSurface->aa != GX2_AA_MODE_1X) { return GX2_FALSE; } return GX2_TRUE; } GX2Boolean IsValidDBFloat(u32 val) { GX2Boolean valid = GX2_TRUE; // The following floating point values are not preserved by the DB when the depth value comes // from vertex Z: // - DeNorms are flushed to 0 // - NaNs (and Infs) are converted to 0 if (((val != 0x0) && ((val & 0x7F800000) == 0x0)) || // Denorms ((val & 0x7F800000) == 0x7F800000)) // NaNs & Infs { valid = GX2_FALSE; } return valid; } void GX2UTClearRectOp(GX2ColorBuffer *colorBuffer, GX2DepthBuffer *depthBuffer, f32 r, f32 g, f32 b, f32 a, f32 depthValue, u8 stencilValue, GX2ClearMode clearFlags, GX2HiStencilInfo *hiStencil, GX2UTRect *dstRect) { u32 cbFirstSlice = 0; u32 dbFirstSlice = 0; u32 numSlices = 0; u32 dstWidth, dstHeight, uDepthValue; u32 dstMip, dstSlice; GX2CompareFunction stencilFunc = GX2_COMPARE_NEVER; GX2Boolean bColorAsDepth = GX2_FALSE; GX2Boolean depthTestEnable = GX2_FALSE; GX2Boolean stencilTestEnable = GX2_DISABLE; GX2ColorBuffer cb; GX2DepthBuffer db; GX2UTDebugTagIndent(__func__); // blt with width or height <= 0 does nothing ASSERT((colorBuffer != NULL || depthBuffer != NULL) && (dstRect != NULL)); ASSERT((dstRect->bottom > dstRect->top) && (dstRect->right > dstRect->left) && "Invalid destination region!"); ASSERT(depthValue >= 0.0f && depthValue <= 1.0f); // Verify the parameters if (colorBuffer && depthBuffer) { ASSERT((colorBuffer->surface.width >> colorBuffer->viewMip) == (depthBuffer->surface.width >> depthBuffer->viewMip)); ASSERT((colorBuffer->surface.height >> colorBuffer->viewMip) == (depthBuffer->surface.height >> depthBuffer->viewMip)); ASSERT(colorBuffer->surface.aa == depthBuffer->surface.aa); ASSERT(colorBuffer->viewNumSlices == depthBuffer->viewNumSlices); } if (clearFlags & GX2_CLEAR_D_REG) { ASSERT(depthBuffer != NULL); GX2SetClearDepth(depthBuffer, depthValue); } if (clearFlags & GX2_CLEAR_S_REG) { ASSERT(depthBuffer != NULL); GX2SetClearStencil(depthBuffer, stencilValue); } // Initialize the resources needed to clear surfaces. // This function only does work the first time it's called. GX2UTClearSurfaceRectInit(); uDepthValue = FloatToBits(depthValue); // Some color-only clears can be accelerated by using the depth unit instead if (colorBuffer != NULL && depthBuffer == NULL && ClearUsingDB(colorBuffer, dstRect)) { u32 dv = 0; if (colorBuffer->surface.format == GX2_SURFACE_FORMAT_TCD_R32_FLOAT) { // only use red component of clearcolor dv = FloatToBits(r); } else { // PackColorToFp32 dv = PackClearColor(r, g, b, a, colorBuffer->surface.format); } if (IsValidDBFloat(dv)) { // DB can only handle valid floats uDepthValue = dv; dstSlice = colorBuffer->viewFirstSlice; dstMip = colorBuffer->viewMip; // Populate db and associated surface accordingly db.surface = colorBuffer->surface; db.surface.format = GX2_SURFACE_FORMAT_TCD_R32_FLOAT; db.surface.use = GX2_SURFACE_USE_DEPTH_BUFFER; db.viewMip = dstMip; db.viewFirstSlice = dstSlice; db.viewNumSlices = colorBuffer->viewNumSlices; db.hiZPtr = NULL; db.hiZSize = 0; // Instruct logic below that we are clearing depth only clearFlags = GX2_CLEAR_DEPTH; colorBuffer = NULL; depthBuffer = &db; GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR_COLOR_AS_DEPTH, GX2_ENABLE); bColorAsDepth = GX2_TRUE; } } // Only set if Color Writes are on if (colorBuffer != NULL) { // If AA, must have auxPtr + auxSize ASSERT(colorBuffer->surface.aa == GX2_AA_MODE_1X || (colorBuffer->auxPtr != NULL && colorBuffer->auxSize != 0)); //Currently we do not support clearing BC formats. Use GX2ClearColor() for this. ASSERT(!GX2SurfaceIsCompressed(colorBuffer->surface.format)); ASSERT((colorBuffer->viewFirstSlice + colorBuffer->viewNumSlices <= colorBuffer->surface.depth)); dstMip = colorBuffer->viewMip; dstWidth = GX2Max(1, colorBuffer->surface.width >> dstMip); dstHeight = GX2Max(1, colorBuffer->surface.height >> dstMip); // Create shallow copy of dest surface to be used as render target cb = *colorBuffer; cb.surface.use = GX2_SURFACE_USE_COLOR_BUFFER_TEXTURE; cb.viewNumSlices = 1; // Will reinit regs later if (cb.surface.format == GX2_SURFACE_FORMAT_TC_R16_G16_FLOAT) { // Change format to B8G8R8A8_UNORM and PackFP16ToRGBA8 cb.surface.format = GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM; // Adjust clear color u32 packedColor; // Pack 2 channel FP16 clear color into a 32 bit quantity packedColor = PackClearColor(r, g, b, a, GX2_SURFACE_FORMAT_TC_R16_G16_FLOAT); // Extract RGBA8 values from packed color and convert to clear values a = ((packedColor >> 24) & 0xff) * (1.0f / 255.0f); b = ((packedColor >> 16) & 0xff) * (1.0f / 255.0f); g = ((packedColor >> 8) & 0xff) * (1.0f / 255.0f); r = (packedColor & 0xff) * (1.0f / 255.0f); } // Enable Color Writes GX2SetColorControl(GX2_LOGIC_OP_COPY, 0, //disable blending GX2_DISABLE, GX2_ENABLE); numSlices = colorBuffer->viewNumSlices; cbFirstSlice = cb.viewFirstSlice; } else { // Use Depth Buffer dimensions dstMip = depthBuffer->viewMip; dstWidth = GX2Max(1, depthBuffer->surface.width >> dstMip); dstHeight = GX2Max(1, depthBuffer->surface.height >> dstMip); // Disable Color Writes GX2SetColorControl(GX2_LOGIC_OP_COPY, 0, //disable blending GX2_DISABLE, GX2_DISABLE); GX2SetAAMode(depthBuffer->surface.aa); } // Render to destination surface dimensions GX2SetViewport(0, 0, (f32)dstWidth, (f32)dstHeight, 0.0f, 1.0f); GX2SetScissor(0, 0, dstWidth, dstHeight); // Only set if Depth or Stencil Writes are on if (depthBuffer != NULL) { ASSERT((depthBuffer->viewFirstSlice + depthBuffer->viewNumSlices <= depthBuffer->surface.depth)); db = *depthBuffer; numSlices = depthBuffer->viewNumSlices; db.viewNumSlices = 1; // Will reinit regs later if (clearFlags & GX2_CLEAR_DEPTH) depthTestEnable = GX2_TRUE; else depthTestEnable = GX2_FALSE; if (clearFlags & GX2_CLEAR_STENCIL) { GX2SetStencilMask(0xff, //preMaskFront 0xff, //writeMaskFront stencilValue, //refFront 0xff, //preMaskBack 0xff, //writeMaskBack stencilValue);//refBack stencilFunc = GX2_COMPARE_ALWAYS; stencilTestEnable = GX2_ENABLE; } else { stencilFunc = GX2_COMPARE_NEVER; stencilTestEnable = GX2_DISABLE; } // fast clears require HiZ and all edges on micro-tile boundaries if ((depthBuffer->hiZPtr) && !((dstRect->bottom | dstRect->top | dstRect->left | dstRect->right) & 0x7)) { GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR_HIZ, GX2_ENABLE); } dbFirstSlice = db.viewFirstSlice; } // Depth Writes GX2SetDepthStencilControl(depthTestEnable, //depthTestEnable GX2_ENABLE, //depthWriteEnable GX2_COMPARE_ALWAYS, //depthFunc stencilTestEnable, //stencilTestEnable stencilTestEnable, //backStencilEnable stencilFunc, //frontStencilFunc GX2_STENCIL_REPLACE,//frontStencilZPass GX2_STENCIL_REPLACE,//frontStencilZFail GX2_STENCIL_REPLACE,//frontStencilFail stencilFunc, //backStencilFunc GX2_STENCIL_REPLACE,//backStencilZPass GX2_STENCIL_REPLACE,//backStencilZFail GX2_STENCIL_REPLACE //backStencilFail ); if (hiStencil != NULL) GX2SetHiStencilInfo(hiStencil); // Only one clear shader u32 shaderIdx = 0; // Set shaders GX2SetFetchShader(&fetchShader); GX2SetVertexShader(g_clearSurfaceShader[shaderIdx].pVertexShader); GX2SetPixelShader(g_clearSurfaceShader[shaderIdx].pPixelShader); // Set the uniforms to be used by the vertex shader and pixel shader f32 position_base_scale[] = { (f32)dstRect->left, (f32)dstRect->top, (f32)dstRect->right - (f32)dstRect->left, (f32)dstRect->bottom - (f32)dstRect->top, }; for (int i = 0; i < 4; i++) { f32 pos[] = { position_base_scale[0] + position_base_scale[2] * CLEAR_SURFACE_RECT_POSITION_DATA[i].position[0], position_base_scale[1] + position_base_scale[3] * CLEAR_SURFACE_RECT_POSITION_DATA[i].position[1], BitsToFloat(uDepthValue), 1.0f }; GX2SetVertexUniformReg(g_clearSurfaceShader[shaderIdx].u_positionLocation + i*4, 1*4, pos); } f32 clearColor[] = {r, g, b, a}; GX2SetPixelUniformReg(g_clearSurfaceShader[shaderIdx].u_clearColorLocation, 1*4, clearColor); for (int slice = 0; slice < numSlices; slice++) { // Reset the color buffer to the next slice if (colorBuffer) { cb.viewFirstSlice = cbFirstSlice + slice; GX2InitColorBufferRegs(&cb); GX2SetColorBuffer(&cb, GX2_RENDER_TARGET_0); } // Reset the depth buffer to the next slice if (depthBuffer) { db.viewFirstSlice = dbFirstSlice + slice; GX2InitDepthBufferRegs(&db); GX2SetDepthBuffer(&db); } //Call the render function pointer GX2Draw(GX2_PRIMITIVE_RECTS, VERTEX_COUNT); } if (bColorAsDepth) { GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR_COLOR_AS_DEPTH, GX2_DISABLE); } if (colorBuffer != NULL) { if ( colorBuffer->viewMip ) GX2Invalidate(GX2_INVALIDATE_COLOR_BUFFER, colorBuffer->surface.mipPtr, colorBuffer->surface.mipSize); else GX2Invalidate(GX2_INVALIDATE_COLOR_BUFFER, colorBuffer->surface.imagePtr, colorBuffer->surface.imageSize); } if (depthBuffer != NULL) { if ( depthBuffer->viewMip ) GX2Invalidate(GX2_INVALIDATE_DEPTH_BUFFER, depthBuffer->surface.mipPtr, depthBuffer->surface.mipSize); else GX2Invalidate(GX2_INVALIDATE_DEPTH_BUFFER, depthBuffer->surface.imagePtr, depthBuffer->surface.imageSize); // Disable HiZ optimization if ((depthBuffer->hiZPtr) && !((dstRect->bottom | dstRect->top | dstRect->left | dstRect->right) & 0x7)) { GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR_HIZ, GX2_DISABLE); } } GX2UTDebugTagUndent(); } // Clears a region of the HiStencil buffer // Setting the rectangular regions to a top-left of (0,0) and a bottom-right of (width, height) // will blt the entire surface without any flipping (the right and bottom are exclusive). // Must be invoked to invalidate HiStencil when changing pretest state during a frame. void GX2UTInvalidateHiStencilRect(GX2UTRect *dstRect, GX2DepthBuffer *depthBuffer) { u32 dstWidth, dstHeight; GX2UTDebugTagIndent(__func__); //Disable state shadowing. If your app is using state shadowing, //you will need to restore the context after calling this function. GX2SetContextState(NULL); // Initialize the resources needed to clear surfaces. // This function only does work the first time it's called. GX2UTClearSurfaceRectInit(); // must have a deptBuffer w/ hiZPtr // blt with width or height <= 0 does nothing ASSERT((depthBuffer != NULL) && (depthBuffer->hiZPtr != NULL) && (dstRect != NULL)); ASSERT((dstRect->bottom > dstRect->top) && (dstRect->right > dstRect->left) && "Invalid destination region!"); // Disable Color Writes GX2SetColorControl(GX2_LOGIC_OP_COPY, 0, //disable blending GX2_DISABLE, GX2_DISABLE); // Disable Depth and Stencil Writes GX2SetDepthStencilControl(GX2_DISABLE, // depthEnable GX2_DISABLE, // depthWriteEnable GX2_COMPARE_ALWAYS, // depthFunc GX2_DISABLE, // stencilTestEnable GX2_DISABLE, // backStencilEnable GX2_COMPARE_ALWAYS, // frontStencilFunc GX2_STENCIL_KEEP, // frontStencilZPass GX2_STENCIL_KEEP, // frontStencilZFail GX2_STENCIL_KEEP, // frontStencilFail GX2_COMPARE_ALWAYS, // backStencilFunc GX2_STENCIL_KEEP, // backStencilZPass GX2_STENCIL_KEEP, // backStencilZFail GX2_STENCIL_KEEP); // backStencilFail GX2SetStencilMask(0x00, //preMaskFront 0x00, //writeMaskFront 0x00, //refFront 0x00, //preMaskBack 0x00, //writeMaskBack 0x00);//refBack dstWidth = depthBuffer->surface.width; dstHeight = depthBuffer->surface.height; // Render to destination surface dimensions GX2SetViewport(dstRect->left, dstRect->bottom, (f32)dstWidth, (f32)dstHeight, 0.0f, 1.0f); GX2SetScissor(dstRect->left, dstRect->bottom, dstWidth, dstHeight); GX2SetDepthBuffer(depthBuffer); // Create HiStencilInfo that will clear the HiStencil pretest results GX2HiStencilInfo hiStencilInfo; hiStencilInfo.state[0].function = GX2_COMPARE_ALWAYS; hiStencilInfo.state[0].reference = 0; hiStencilInfo.state[0].mask = 0xFF; hiStencilInfo.state[0].enable = GX2_FALSE; hiStencilInfo.state[1].function = GX2_COMPARE_ALWAYS; hiStencilInfo.state[1].reference = 0; hiStencilInfo.state[1].mask = 0xFF; hiStencilInfo.state[1].enable = GX2_FALSE; GX2InitHiStencilInfoRegs(&hiStencilInfo); GX2SetHiStencilInfo(&hiStencilInfo); // Only one clear shader u32 shaderIdx = 0; // Set shaders // NOTE: No fetch shader is needed for our shaders, see GLSL for details. GX2SetVertexShader(g_clearSurfaceShader[shaderIdx].pVertexShader); GX2SetPixelShader(g_clearSurfaceShader[shaderIdx].pPixelShader); // Set the uniforms to be used by the vertex shader and pixel shader f32 position_base_scale[] = { -1.0f + 2.0f * (f32)dstRect->left / (f32)dstWidth, 1.0f - 2.0f * (f32)dstRect->top / (f32)dstHeight, 2.0f * (f32)(dstRect->right - dstRect->left) / (f32)dstWidth, -2.0f * (f32)(dstRect->bottom - dstRect->top) / (f32)dstHeight, }; for (int i = 0; i < 4; i++) { f32 pos[] = { position_base_scale[0] + position_base_scale[2] * CLEAR_SURFACE_RECT_POSITION_DATA[i].position[0], position_base_scale[1] + position_base_scale[3] * CLEAR_SURFACE_RECT_POSITION_DATA[i].position[1], 0.0, 1.0 }; GX2SetVertexUniformReg(g_clearSurfaceShader[shaderIdx].u_positionLocation + i*4, 1*4, pos); } // We don't need to set clearColor or depthValue because we aren't writing those buffers //Call the render function pointer GX2Draw(GX2_PRIMITIVE_TRIANGLE_STRIP, VERTEX_COUNT); GX2UTDebugTagUndent(); } //Setup all of the constant renderstate needed for the clear void GX2UTSetClearState(GX2Boolean enable) { if (enable) { // If your application's steady state can be set to GX2UT common state // using a small number of discrete GX2 calls, then customize here // instead of using GX2UTSetCommonState() GX2UTSetCommonState(); // Enable any special GX2 state GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR, GX2_ENABLE); // That call just clobbered RasterizerClipControl } else { // Disable any special GX2 state GX2SetSpecialState(GX2_SPECIAL_STATE_CLEAR, GX2_DISABLE); // The purpose of the following is to return the context to GX2 default // state. Integration with your application's state management may // differ. The clear operation itself clobbered DepthStencilControl // and ColorControl GX2SetDepthStencilControl(GX2_TRUE, //depthTestEnable GX2_TRUE, //depthWriteEnable GX2_COMPARE_LESS, //depthFunc GX2_FALSE, //stencilTestEnable GX2_FALSE, //backStencilEnable GX2_COMPARE_ALWAYS, //frontStencilFunc GX2_STENCIL_REPLACE, //frontStencilZPass GX2_STENCIL_REPLACE, //frontStencilZFail GX2_STENCIL_REPLACE, //frontStencilFail GX2_COMPARE_ALWAYS, //backStencilFunc GX2_STENCIL_REPLACE, //backStencilZPass GX2_STENCIL_REPLACE, //backStencilZFail GX2_STENCIL_REPLACE);//backStencilFail GX2SetColorControl(GX2_LOGIC_OP_COPY, GX2_DISABLE, GX2_DISABLE, GX2_ENABLE); } } void GX2UTSetupColorAuxBufferOp(GX2ColorBuffer *colorBuffer) { if (colorBuffer->auxPtr) { u32 ctileOffset = colorBuffer->_regs[4]; // cmask_offset u32 ctileSize = colorBuffer->auxSize - ctileOffset; u8* ctilePtr = (u8*)colorBuffer->auxPtr + ctileOffset; ASSERT((ctileSize & 0x1FF) == 0 && "Invalid MSAA Color Buffer auxSize!"); GX2ColorBuffer tmpBuf; u32 width = 16; u32 height = ctileSize / 4 / width; GX2InitColorBuffer(&tmpBuf, width, height, GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM, GX2_AA_MODE_1X); GX2InitColorBufferPtr(&tmpBuf, ctilePtr); tmpBuf.surface.tileMode = GX2_TILE_MODE_1D_TILED_THIN1; GX2CalcSurfaceSizeAndAlignment(&tmpBuf.surface); ASSERT(tmpBuf.surface.imageSize == ctileSize && "CMask Tile Size must match calculated image size!"); GX2InitColorBufferRegs(&tmpBuf); GX2UTClearOp(&tmpBuf, NULL, GX2_AUX_BUFFER_CLEAR_VALUE/255.0f, GX2_AUX_BUFFER_CLEAR_VALUE/255.0f, GX2_AUX_BUFFER_CLEAR_VALUE/255.0f, GX2_AUX_BUFFER_CLEAR_VALUE/255.0f, 0.0f, 0u, GX2_CLEAR_NONE, NULL); } }