1 /*---------------------------------------------------------------------------*
2
3 Copyright (C) Nintendo. All rights reserved.
4
5 These coded instructions, statements, and computer programs contain
6 proprietary information of Nintendo of America Inc. and/or Nintendo
7 Company Ltd., and are protected by Federal copyright law. They may
8 not be disclosed to third parties or copied or duplicated in any form,
9 in whole or in part, without the prior written consent of Nintendo.
10
11 *---------------------------------------------------------------------------*/
12
13 #include <stdio.h>
14 #include <string.h>
15 #include <math.h>
16
17 #if defined(WIN32) || defined(WIN64)
18 #include <pc/gx2.h>
19 #include <pc/demo.h>
20 #endif
21 #include <cafe/os.h>
22 #include <cafe/mem.h>
23 #include <cafe/gx2.h>
24 #include <cafe/demo.h>
25
26 #include <cafe/gfd.h>
27 #include <cafe/gx2ut.h>
28
29 //Include generated shaders
30 #include "shaders/headers/gx2utConvertDepth.h"
31
32 #define MAX(a, b) ((a) > (b) ? (a) : (b))
33
34 // ----- GX2 Shader
35 static const GX2VertexShader* const VS_SHADERS[] = { &gx2utConvertDepth_VS,};
36
37 static const GX2PixelShader* const PS_SHADERS[] = { &gx2utConvertDepth_PS,
38 };
39
40 static const u32 NUM_SHADERS = 1;
41
42 typedef struct _ConvertDepthShader {
43 // These variables hold the three types of shaders needed for a call to
44 // GX2SetShaders. The vertex and pixel shaders are loaded from the
45 // header, but since the fetch shader is generated at run-time
46 // it must be handled slightly differently.
47 const GX2VertexShader *pVertexShader;
48 const GX2PixelShader *pPixelShader;
49
50 // The register locations where the offset uniforms are stored for
51 // the pixel and vertex shaders.
52 u32 u_positionLocation;
53 u32 u_texcoordLocation;
54
55 } ConvertDepthShader;
56
57 static ConvertDepthShader g_expandDepthShader[NUM_SHADERS];
58 static GX2FetchShader fetchShader;
59
60 #define FETCH_SHADER_SIZE 32 //hard code this value for now
61 ALIGNVAR(GX2_SHADER_ALIGNMENT) static u8 g_GX2UTFetchShader[FETCH_SHADER_SIZE];
62
63 // ----- GX2 Texture
64
65 typedef struct _VtxFmtF32x2 {
66 f32 texcoord[2];
67 } VtxFmtF32x2;
68
69 static const VtxFmtF32x2 CONVERT_SURFACE_RECT_POSITION_DATA[] =
70 {
71 {0.0f, 0.0f},
72 {1.0f, 0.0f},
73 {1.0f, 1.0f},
74 {0.0f, 1.0f}
75 };
76
77 // Same for both rect and triangle strips
78 static const u32 VERTEX_COUNT = sizeof(CONVERT_SURFACE_RECT_POSITION_DATA)
79 / sizeof(CONVERT_SURFACE_RECT_POSITION_DATA[0]);
80
81 // Initializes how surfaces will be copied
GX2UTConvertDepthInit()82 static void GX2UTConvertDepthInit()
83 {
84 // Setup shaders
85 u32 i;
86
87 for (i = 0; i < NUM_SHADERS; ++i)
88 {
89 g_expandDepthShader[i].pVertexShader = VS_SHADERS[i];
90 g_expandDepthShader[i].pPixelShader = PS_SHADERS[i];
91
92 GX2Invalidate(GX2_INVALIDATE_CPU_SHADER,
93 g_expandDepthShader[i].pVertexShader->shaderPtr,
94 g_expandDepthShader[i].pVertexShader->shaderSize);
95
96 GX2NotifyMemAlloc(g_expandDepthShader[i].pVertexShader->shaderPtr,
97 g_expandDepthShader[i].pVertexShader->shaderSize,
98 GX2_SHADER_ALIGNMENT);
99
100 GX2Invalidate(GX2_INVALIDATE_CPU_SHADER,
101 g_expandDepthShader[i].pPixelShader->shaderPtr,
102 g_expandDepthShader[i].pPixelShader->shaderSize);
103
104 GX2NotifyMemAlloc(g_expandDepthShader[i].pPixelShader->shaderPtr,
105 g_expandDepthShader[i].pPixelShader->shaderSize,
106 GX2_SHADER_ALIGNMENT);
107
108 // Lookup the uniform locations in the vertex shader.
109 // The shader author chose the name "u_positions"
110 g_expandDepthShader[i].u_positionLocation =
111 (u32)GX2GetVertexUniformVarOffset(g_expandDepthShader[i].pVertexShader, "u_positions");
112 ASSERT((g_expandDepthShader[i].u_positionLocation != GX2_UNIFORM_VAR_INVALID_OFFSET)
113 && "Couldn't find the correct vertex shader uniforms.");
114
115 }
116
117 ASSERT(GX2CalcFetchShaderSize(0) <= sizeof(g_GX2UTFetchShader) && "g_GX2UTFetchShader too small!\n");
118 GX2InitFetchShader(&fetchShader, g_GX2UTFetchShader, 0, NULL);
119 }
120
GX2UTConvertDepthBufferToTextureSurfaceOp(const GX2DepthBuffer * depthBuffer,GX2Surface * dstSurface,u32 dstMip,u32 dstSlice)121 void GX2UTConvertDepthBufferToTextureSurfaceOp(
122 const GX2DepthBuffer* depthBuffer,
123 GX2Surface* dstSurface,
124 u32 dstMip, u32 dstSlice)
125 {
126 static GX2Boolean initDone = GX2_FALSE;
127 GX2ColorBuffer cb;
128 u32 expandWidth;
129 u32 expandHeight;
130 u32 shaderIdx = 0;
131 GX2SurfaceDim dim = GX2_SURFACE_DIM_2D;
132 u32 depth = depthBuffer->surface.depth;
133
134 if (depthBuffer->surface.aa == GX2_AA_MODE_1X)
135 {
136 if (depth > 1)
137 dim = GX2_SURFACE_DIM_2D_ARRAY;
138 else
139 dim = GX2_SURFACE_DIM_2D;
140 }
141 else
142 {
143 if (depth > 1)
144 dim = GX2_SURFACE_DIM_2D_MSAA_ARRAY;
145 else
146 dim = GX2_SURFACE_DIM_2D_MSAA;
147 }
148
149
150 GX2UTDebugTagIndent(__func__);
151
152 if (initDone == GX2_FALSE)
153 {
154 // Initialize the resources needed to expand a depth buffer
155 GX2UTConvertDepthInit();
156
157 initDone = GX2_TRUE;
158 }
159
160 ASSERT((depthBuffer != NULL));
161
162 // GPU7 cannot copy one aa mode to the other directly,
163 // Use GX2UTResolveAAColorBuffer to resolve after converting.
164 ASSERT((depthBuffer->surface.aa == dstSurface->aa));
165
166
167 if (depthBuffer->surface.aa == GX2_AA_MODE_1X)
168 {
169 GX2SetSpecialState(GX2_SPECIAL_STATE_CONVERT_DEPTH, GX2_ENABLE);
170 }
171 else
172 {
173 GX2SetSpecialState(GX2_SPECIAL_STATE_CONVERT_AADEPTH, GX2_ENABLE);
174 }
175 // Set shaders
176 GX2SetFetchShader(&fetchShader);
177 GX2SetVertexShader(g_expandDepthShader[shaderIdx].pVertexShader);
178 GX2SetPixelShader(g_expandDepthShader[shaderIdx].pPixelShader);
179
180 // Set the uniforms to be used by the vertex shader
181 f32 position_base_scale[] =
182 {
183 -1.0f,
184 1.0f,
185 2.0f,
186 -2.0f,
187 };
188
189 for (int i = 0; i < 4; i++)
190 {
191 f32 pos[] = {
192 position_base_scale[0] + position_base_scale[2] * CONVERT_SURFACE_RECT_POSITION_DATA[i].texcoord[0],
193 position_base_scale[1] + position_base_scale[3] * CONVERT_SURFACE_RECT_POSITION_DATA[i].texcoord[1],
194 0.0,
195 1.0
196 };
197
198 GX2SetVertexUniformReg(g_expandDepthShader[shaderIdx].u_positionLocation + i*4, 1*4, pos);
199 }
200
201 // Copy the dimensions
202 expandWidth = MAX(1, dstSurface->width >> dstMip);
203 expandHeight = MAX(1, dstSurface->height >> dstMip);
204
205 // Invalidate the depth buffer to guarantee all writes have been flushed
206 if ( depthBuffer->viewMip )
207 GX2Invalidate(GX2_INVALIDATE_DEPTH_BUFFER, depthBuffer->surface.mipPtr, depthBuffer->surface.mipSize);
208 else
209 GX2Invalidate(GX2_INVALIDATE_DEPTH_BUFFER, depthBuffer->surface.imagePtr, depthBuffer->surface.imageSize);
210
211 for (int slice = 0; slice < depthBuffer->viewNumSlices; slice++)
212 {
213 // Setup color buffer
214 // For AA Depth Convert, we need to disable the aux color buffer
215 // while rendering AA. This is done by using GX2_AA_MODE_1X in the
216 // call to GX2InitColorBuffer while calling GX2SetAAMode to setup
217 // AA correctly.
218 GX2InitColorBuffer(&cb, expandWidth, expandHeight, dstSurface->format, GX2_AA_MODE_1X);
219
220 // Use hardware slice logic instead of MipLevelPtr logic
221 GX2InitColorBufferPtr(&cb, GX2UTGetSurfaceMipSlicePtr(dstSurface, dstMip, 0));
222
223 //
224 // Update the slice information
225 //
226
227 // The tileMode needs to be adjusted in case the incoming surface
228 // tileMode isn't the default.
229 cb.surface.tileMode = GX2UTGetSurfaceMipSliceTileMode(dstSurface, dstMip);
230 cb.surface.dim = dim;
231 cb.surface.depth = dstSurface->depth;
232 cb.viewFirstSlice = dstSlice + slice;
233 cb.viewNumSlices = 1;
234 GX2CalcSurfaceSizeAndAlignment(&cb.surface);
235 GX2InitColorBufferRegs(&cb);
236
237 GX2SetColorBuffer(&cb, GX2_RENDER_TARGET_0);
238 GX2SetAAMode(dstSurface->aa); // Bypass cmask/fmask (aux buffer)
239
240 // Set the depth buffer to resolve
241 GX2DepthBuffer db = *depthBuffer;
242 db.viewFirstSlice = slice;
243 db.viewNumSlices = 1;
244 GX2InitDepthBufferRegs(&db);
245
246 GX2SetDepthBuffer(&db);
247
248 // Render to destination surface dimensions
249 GX2SetViewport(0, 0, (f32)expandWidth, (f32)expandHeight, 0.0f, 1.0f);
250 GX2SetScissor(0, 0, expandWidth, expandHeight);
251
252 // Draw a full quad that covers the display
253 GX2Draw(GX2_PRIMITIVE_RECTS, VERTEX_COUNT);
254 }
255
256 // Invalidate the color buffer to guarantee all writes have been flushed
257 if ( depthBuffer->viewMip )
258 GX2Invalidate(GX2_INVALIDATE_DEPTH_BUFFER, cb.surface.mipPtr, cb.surface.mipSize);
259 else
260 GX2Invalidate(GX2_INVALIDATE_DEPTH_BUFFER, cb.surface.imagePtr, cb.surface.imageSize);
261
262 // Disable any complex GX2 state
263 if (depthBuffer->surface.aa == GX2_AA_MODE_1X)
264 {
265 GX2SetSpecialState(GX2_SPECIAL_STATE_CONVERT_DEPTH, GX2_DISABLE);
266 }
267 else
268 {
269 GX2SetSpecialState(GX2_SPECIAL_STATE_CONVERT_AADEPTH, GX2_DISABLE);
270 }
271
272 GX2UTDebugTagUndent();
273 }
274
275
276 //Setup all of the constant renderstate needed for the copy.
GX2UTSetConvertDepthState(GX2Boolean enable)277 void GX2UTSetConvertDepthState(GX2Boolean enable)
278 {
279 if (enable)
280 {
281 // If your application's steady state can be set to GX2UT common state
282 // using a small number of discrete GX2 calls, then customize here
283 // instead of using GX2UTSetCommonState().
284 GX2UTSetCommonState();
285
286 // Set additional required GX2 state
287 GX2SetDepthStencilControl(GX2_FALSE,
288 GX2_FALSE,
289 GX2_COMPARE_NEVER,
290 GX2_FALSE,
291 GX2_TRUE,
292 GX2_COMPARE_NEVER,
293 GX2_STENCIL_KEEP,
294 GX2_STENCIL_KEEP,
295 GX2_STENCIL_KEEP,
296 GX2_COMPARE_NEVER,
297 GX2_STENCIL_KEEP,
298 GX2_STENCIL_KEEP,
299 GX2_STENCIL_KEEP);
300
301 // Enable color writes for the conversion
302 GX2SetColorControl(GX2_LOGIC_OP_COPY,
303 GX2_DISABLE,
304 GX2_DISABLE,
305 GX2_ENABLE);
306
307 // Special GX2 state for our operation is not set until later because
308 // we need to know about what buffers we're being asked to operate on
309 }
310 else
311 {
312 // The purpose of the following is to return the context to GX2 default
313 // state. If your application uses a different "steady state", then
314 // customize
315 GX2SetDepthStencilControl(
316 GX2_TRUE, //depthTestEnable
317 GX2_TRUE, //depthWriteEnable
318 GX2_COMPARE_LESS, //depthFunc
319 GX2_FALSE, //stencilTestEnable
320 GX2_FALSE, //backStencilEnable
321 GX2_COMPARE_ALWAYS, //frontStencilFunc
322 GX2_STENCIL_REPLACE, //frontStencilZPass
323 GX2_STENCIL_REPLACE, //frontStencilZFail
324 GX2_STENCIL_REPLACE, //frontStencilFail
325 GX2_COMPARE_ALWAYS, //backStencilFunc
326 GX2_STENCIL_REPLACE, //backStencilZPass
327 GX2_STENCIL_REPLACE, //backStencilZFail
328 GX2_STENCIL_REPLACE);//backStencilFail
329 }
330 }
331