1 /*---------------------------------------------------------------------------*
2 
3   Copyright 2014 Nintendo.  All rights reserved.
4 
5   These coded instructions, statements, and computer programs contain
6   proprietary information of Nintendo of America Inc. and/or Nintendo
7   Company Ltd., and are protected by Federal copyright law.  They may
8   not be disclosed to third parties or copied or duplicated in any form,
9   in whole or in part, without the prior written consent of Nintendo.
10 
11  *---------------------------------------------------------------------------*/
12 
13 #include <stdio.h>
14 #include <string.h>
15 #include <math.h>
16 
17 #if defined(WIN32) || defined(WIN64)
18 #include <pc/gx2.h>
19 #include <pc/demo.h>
20 #endif
21 #include <cafe/os.h>
22 #include <cafe/mem.h>
23 #include <cafe/gx2.h>
24 #include <cafe/demo.h>
25 
26 #include <cafe/gfd.h>
27 #include <cafe/gx2ut.h>
28 
29 //Include generated shaders
30 #include "shaders/headers/gx2utConvertDepth.h"
31 
32 #define MAX(a, b) ((a) > (b) ? (a) : (b))
33 
34 // ----- GX2 Shader
35 static const GX2VertexShader* const VS_SHADERS[] = { &gx2utConvertDepth_VS,};
36 
37 static const GX2PixelShader* const PS_SHADERS[] = { &gx2utConvertDepth_PS,
38 };
39 
40 static const u32 NUM_SHADERS = 1;
41 
42 typedef struct _ConvertDepthShader {
43     // These variables hold the three types of shaders needed for a call to
44     // GX2SetShaders. The vertex and pixel shaders are loaded from the
45     // header, but since the fetch shader is generated at run-time
46     // it must be handled slightly differently.
47     const GX2VertexShader *pVertexShader;
48     const GX2PixelShader *pPixelShader;
49 
50     // The register locations where the offset uniforms are stored for
51     // the pixel and vertex shaders.
52     u32 u_positionLocation;
53     u32 u_texcoordLocation;
54 
55 } ConvertDepthShader;
56 
57 static ConvertDepthShader g_expandDepthShader[NUM_SHADERS];
58 static GX2FetchShader fetchShader;
59 
60 #define FETCH_SHADER_SIZE 32  //hard code this value for now
61 ALIGNVAR(GX2_SHADER_ALIGNMENT) static u8 g_GX2UTFetchShader[FETCH_SHADER_SIZE];
62 
63 // ----- GX2 Texture
64 
65 typedef struct _VtxFmtF32x2 {
66     f32 texcoord[2];
67 } VtxFmtF32x2;
68 
69 static const VtxFmtF32x2 CONVERT_SURFACE_RECT_POSITION_DATA[] =
70 {
71     {0.0f,  0.0f},
72     {1.0f,  0.0f},
73     {1.0f,  1.0f},
74     {0.0f,  1.0f}
75 };
76 
77 // Same for both rect and triangle strips
78 static const u32 VERTEX_COUNT = sizeof(CONVERT_SURFACE_RECT_POSITION_DATA)
79                          / sizeof(CONVERT_SURFACE_RECT_POSITION_DATA[0]);
80 
81 // Initializes how surfaces will be copied
GX2UTConvertDepthInit()82 static void GX2UTConvertDepthInit()
83 {
84     // Setup shaders
85     u32 i;
86 
87     for (i = 0; i < NUM_SHADERS; ++i)
88     {
89         g_expandDepthShader[i].pVertexShader = VS_SHADERS[i];
90         g_expandDepthShader[i].pPixelShader = PS_SHADERS[i];
91 
92         GX2Invalidate(GX2_INVALIDATE_CPU_SHADER,
93             g_expandDepthShader[i].pVertexShader->shaderPtr,
94             g_expandDepthShader[i].pVertexShader->shaderSize);
95 
96         GX2NotifyMemAlloc(g_expandDepthShader[i].pVertexShader->shaderPtr,
97             g_expandDepthShader[i].pVertexShader->shaderSize,
98             GX2_SHADER_ALIGNMENT);
99 
100         GX2Invalidate(GX2_INVALIDATE_CPU_SHADER,
101             g_expandDepthShader[i].pPixelShader->shaderPtr,
102             g_expandDepthShader[i].pPixelShader->shaderSize);
103 
104         GX2NotifyMemAlloc(g_expandDepthShader[i].pPixelShader->shaderPtr,
105             g_expandDepthShader[i].pPixelShader->shaderSize,
106             GX2_SHADER_ALIGNMENT);
107 
108         // Lookup the uniform locations in the vertex shader.
109         // The shader author chose the name "u_positions"
110         g_expandDepthShader[i].u_positionLocation =
111             (u32)GX2GetVertexUniformVarOffset(g_expandDepthShader[i].pVertexShader, "u_positions");
112         ASSERT((g_expandDepthShader[i].u_positionLocation != GX2_UNIFORM_VAR_INVALID_OFFSET)
113             && "Couldn't find the correct vertex shader uniforms.");
114 
115     }
116 
117     ASSERT(GX2CalcFetchShaderSize(0) <= sizeof(g_GX2UTFetchShader) && "g_GX2UTFetchShader too small!\n");
118     GX2InitFetchShader(&fetchShader, g_GX2UTFetchShader, 0, NULL);
119 }
120 
GX2UTConvertDepthBufferToTextureSurfaceOp(const GX2DepthBuffer * depthBuffer,GX2Surface * dstSurface,u32 dstMip,u32 dstSlice)121 void GX2UTConvertDepthBufferToTextureSurfaceOp(
122                                              const GX2DepthBuffer* depthBuffer,
123                                              GX2Surface* dstSurface,
124                                              u32 dstMip, u32 dstSlice)
125 {
126     static GX2Boolean initDone = GX2_FALSE;
127     GX2ColorBuffer cb;
128     u32 expandWidth;
129     u32 expandHeight;
130     u32 shaderIdx = 0;
131     GX2SurfaceDim dim = GX2_SURFACE_DIM_2D;
132     u32 depth = depthBuffer->surface.depth;
133 
134     if (depthBuffer->surface.aa == GX2_AA_MODE_1X)
135     {
136         if (depth > 1)
137             dim = GX2_SURFACE_DIM_2D_ARRAY;
138         else
139             dim = GX2_SURFACE_DIM_2D;
140     }
141     else
142     {
143         if (depth > 1)
144             dim = GX2_SURFACE_DIM_2D_MSAA_ARRAY;
145         else
146             dim = GX2_SURFACE_DIM_2D_MSAA;
147     }
148 
149 
150     GX2UTDebugTagIndent(__func__);
151 
152     if (initDone == GX2_FALSE)
153     {
154         // Initialize the resources needed to expand a depth buffer
155         GX2UTConvertDepthInit();
156 
157         initDone = GX2_TRUE;
158     }
159 
160     ASSERT((depthBuffer != NULL));
161 
162     // GPU7 cannot copy one aa mode to the other directly,
163     // Use GX2UTResolveAAColorBuffer to resolve after converting.
164     ASSERT((depthBuffer->surface.aa == dstSurface->aa));
165 
166 
167     if (depthBuffer->surface.aa == GX2_AA_MODE_1X)
168     {
169         GX2SetSpecialState(GX2_SPECIAL_STATE_CONVERT_DEPTH, GX2_ENABLE);
170     }
171     else
172     {
173         GX2SetSpecialState(GX2_SPECIAL_STATE_CONVERT_AADEPTH, GX2_ENABLE);
174     }
175     // Set shaders
176     GX2SetFetchShader(&fetchShader);
177     GX2SetVertexShader(g_expandDepthShader[shaderIdx].pVertexShader);
178     GX2SetPixelShader(g_expandDepthShader[shaderIdx].pPixelShader);
179 
180     // Set the uniforms to be used by the vertex shader
181     f32 position_base_scale[] =
182     {
183         -1.0f,
184          1.0f,
185          2.0f,
186         -2.0f,
187     };
188 
189     for (int i = 0; i < 4; i++)
190     {
191         f32 pos[] = {
192             position_base_scale[0] + position_base_scale[2] * CONVERT_SURFACE_RECT_POSITION_DATA[i].texcoord[0],
193             position_base_scale[1] + position_base_scale[3] * CONVERT_SURFACE_RECT_POSITION_DATA[i].texcoord[1],
194             0.0,
195             1.0
196         };
197 
198         GX2SetVertexUniformReg(g_expandDepthShader[shaderIdx].u_positionLocation + i*4, 1*4, pos);
199     }
200 
201     // Copy the dimensions
202     expandWidth = MAX(1, dstSurface->width >> dstMip);
203     expandHeight = MAX(1, dstSurface->height >> dstMip);
204 
205     // Invalidate the depth buffer to guarantee all writes have been flushed
206     if ( depthBuffer->viewMip )
207         GX2Invalidate(GX2_INVALIDATE_DEPTH_BUFFER, depthBuffer->surface.mipPtr, depthBuffer->surface.mipSize);
208     else
209         GX2Invalidate(GX2_INVALIDATE_DEPTH_BUFFER, depthBuffer->surface.imagePtr, depthBuffer->surface.imageSize);
210 
211     for (int slice = 0; slice < depthBuffer->viewNumSlices; slice++)
212     {
213         // Setup color buffer
214         // For AA Depth Convert, we need to disable the aux color buffer
215         // while rendering AA. This is done by using GX2_AA_MODE_1X in the
216         // call to GX2InitColorBuffer while calling GX2SetAAMode to setup
217         // AA correctly.
218         GX2InitColorBuffer(&cb, expandWidth, expandHeight, dstSurface->format, GX2_AA_MODE_1X);
219 
220         // Use hardware slice logic instead of MipLevelPtr logic
221         GX2InitColorBufferPtr(&cb, GX2UTGetSurfaceMipSlicePtr(dstSurface, dstMip, 0));
222 
223         //
224         // Update the slice information
225         //
226 
227         // The tileMode needs to be adjusted in case the incoming surface
228         // tileMode isn't the default.
229         cb.surface.tileMode = GX2UTGetSurfaceMipSliceTileMode(dstSurface, dstMip);
230         cb.surface.dim = dim;
231         cb.surface.depth = dstSurface->depth;
232         cb.viewFirstSlice = dstSlice + slice;
233         cb.viewNumSlices = 1;
234         GX2CalcSurfaceSizeAndAlignment(&cb.surface);
235         GX2InitColorBufferRegs(&cb);
236 
237         GX2SetColorBuffer(&cb, GX2_RENDER_TARGET_0);
238         GX2SetAAMode(dstSurface->aa); // Bypass cmask/fmask (aux buffer)
239 
240         // Set the depth buffer to resolve
241         GX2DepthBuffer db = *depthBuffer;
242         db.viewFirstSlice = slice;
243         db.viewNumSlices = 1;
244         GX2InitDepthBufferRegs(&db);
245 
246         GX2SetDepthBuffer(&db);
247 
248         // Render to destination surface dimensions
249         GX2SetViewport(0, 0, (f32)expandWidth, (f32)expandHeight, 0.0f, 1.0f);
250         GX2SetScissor(0, 0, expandWidth, expandHeight);
251 
252         // Draw a full quad that covers the display
253         GX2Draw(GX2_PRIMITIVE_RECTS, VERTEX_COUNT);
254     }
255 
256     // Invalidate the color buffer to guarantee all writes have been flushed
257     if ( depthBuffer->viewMip )
258         GX2Invalidate(GX2_INVALIDATE_DEPTH_BUFFER, cb.surface.mipPtr, cb.surface.mipSize);
259     else
260         GX2Invalidate(GX2_INVALIDATE_DEPTH_BUFFER, cb.surface.imagePtr, cb.surface.imageSize);
261 
262     // Disable any complex GX2 state
263     if (depthBuffer->surface.aa == GX2_AA_MODE_1X)
264     {
265         GX2SetSpecialState(GX2_SPECIAL_STATE_CONVERT_DEPTH, GX2_DISABLE);
266     }
267     else
268     {
269         GX2SetSpecialState(GX2_SPECIAL_STATE_CONVERT_AADEPTH, GX2_DISABLE);
270     }
271 
272     GX2UTDebugTagUndent();
273 }
274 
275 
276 //Setup all of the constant renderstate needed for the copy.
GX2UTSetConvertDepthState(GX2Boolean enable)277 void GX2UTSetConvertDepthState(GX2Boolean enable)
278 {
279     if (enable)
280     {
281         // If your application's steady state can be set to GX2UT common state
282         // using a small number of discrete GX2 calls, then customize here
283         // instead of using GX2UTSetCommonState().
284         GX2UTSetCommonState();
285 
286         // Set additional required GX2 state
287         GX2SetDepthStencilControl(GX2_FALSE,
288                                   GX2_FALSE,
289                                   GX2_COMPARE_NEVER,
290                                   GX2_FALSE,
291                                   GX2_TRUE,
292                                   GX2_COMPARE_NEVER,
293                                   GX2_STENCIL_KEEP,
294                                   GX2_STENCIL_KEEP,
295                                   GX2_STENCIL_KEEP,
296                                   GX2_COMPARE_NEVER,
297                                   GX2_STENCIL_KEEP,
298                                   GX2_STENCIL_KEEP,
299                                   GX2_STENCIL_KEEP);
300 
301         // Enable color writes for the conversion
302         GX2SetColorControl(GX2_LOGIC_OP_COPY,
303                            GX2_DISABLE,
304                            GX2_DISABLE,
305                            GX2_ENABLE);
306 
307         // Special GX2 state for our operation is not set until later because
308         // we need to know about what buffers we're being asked to operate on
309     }
310     else
311     {
312         // The purpose of the following is to return the context to GX2 default
313         // state.  If your application uses a different "steady state", then
314         // customize
315         GX2SetDepthStencilControl(
316               GX2_TRUE,            //depthTestEnable
317               GX2_TRUE,            //depthWriteEnable
318               GX2_COMPARE_LESS,    //depthFunc
319               GX2_FALSE,           //stencilTestEnable
320               GX2_FALSE,           //backStencilEnable
321               GX2_COMPARE_ALWAYS,  //frontStencilFunc
322               GX2_STENCIL_REPLACE, //frontStencilZPass
323               GX2_STENCIL_REPLACE, //frontStencilZFail
324               GX2_STENCIL_REPLACE, //frontStencilFail
325               GX2_COMPARE_ALWAYS,  //backStencilFunc
326               GX2_STENCIL_REPLACE, //backStencilZPass
327               GX2_STENCIL_REPLACE, //backStencilZFail
328               GX2_STENCIL_REPLACE);//backStencilFail
329     }
330 }
331