1 /*---------------------------------------------------------------------------*
2
3 Copyright (C) Nintendo. All rights reserved.
4
5 These coded instructions, statements, and computer programs contain
6 proprietary information of Nintendo of America Inc. and/or Nintendo
7 Company Ltd., and are protected by Federal copyright law. They may
8 not be disclosed to third parties or copied or duplicated in any form,
9 in whole or in part, without the prior written consent of Nintendo.
10
11 *---------------------------------------------------------------------------*/
12
13 #include "types.h"
14 #include <assert.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17
18 #include "windows/gx2.h"
19
20 #include "cafe/gfd.h"
21 #include "gfdFile.h"
22
23 #define GSH_DEFAULT_FILENAME "out.gsh"
24
25 // definitions for private structures
26
27 // GFD specific Shader structures to repack structure between 32 bit and 64 bit
28
29 typedef struct _GFDVertexShaderRegs
30 {
31 u32 reg[GX2_NUM_VERTEX_SHADER_REGISTERS];
32 } GFDVertexShaderRegs;
33
34 typedef struct _GFDGeometryShaderRegs
35 {
36 u32 reg[GX2_NUM_GEOMETRY_SHADER_REGISTERS];
37 } GFDGeometryShaderRegs;
38
39 typedef struct _GFDPixelShaderRegs
40 {
41 u32 reg[GX2_NUM_PIXEL_SHADER_REGISTERS];
42 } GFDPixelShaderRegs;
43
44 typedef struct _GFDComputeShaderRegs
45 {
46 u32 reg[GX2_NUM_COMPUTE_SHADER_REGISTERS];
47 } GFDComputeShaderRegs;
48
49 typedef struct _GFDVertexShader
50 {
51 GFDVertexShaderRegs _regs;
52 u32 shaderSize;
53 u32 shaderPtr;
54 GX2ShaderMode shaderMode;
55 u32 numUniformBlocks;
56 u32 uniformBlocks;
57 u32 numUniforms;
58 u32 uniformVars;
59 u32 numInitialValues;
60 u32 initialValues;
61 u32 _numLoops;
62 u32 _loopVars;
63 u32 numSamplers;
64 u32 samplerVars;
65 u32 numAttribs;
66 u32 attribVars;
67 u32 ringItemsize;
68 u32 hasStreamOut;
69 u32 streamOutVertexStride[GX2_MAX_STREAMOUT_BUFFERS];
70 GX2RBuffer shaderProgram;
71 } GFDVertexShader;
72
73 // GFD specific Shader structures to repack structure between 32 bit and 64 bit
74
75 typedef struct _GFDGeometryShader
76 {
77 GFDGeometryShaderRegs _regs;
78 u32 shaderSize;
79 u32 shaderPtr;
80 u32 copyShaderSize;
81 u32 copyShaderPtr;
82 GX2ShaderMode shaderMode;
83 u32 numUniformBlocks;
84 u32 uniformBlocks;
85 u32 numUniforms;
86 u32 uniformVars;
87 u32 numInitialValues;
88 u32 initialValues;
89 u32 _numLoops;
90 u32 _loopVars;
91 u32 numSamplers;
92 u32 samplerVars;
93 u32 ringItemsize;
94 u32 hasStreamOut;
95 u32 streamOutVertexStride[GX2_MAX_STREAMOUT_BUFFERS];
96 GX2RBuffer shaderProgram;
97 GX2RBuffer copyShaderProgram;
98 } GFDGeometryShader;
99
100 // GFD specific Shader structures to repack structure between 32 bit and 64 bit
101
102 typedef struct _GFDPixelShader
103 {
104 GFDPixelShaderRegs _regs;
105 u32 shaderSize;
106 u32 shaderPtr;
107 GX2ShaderMode shaderMode;
108 u32 numUniformBlocks;
109 u32 uniformBlocks;
110 u32 numUniforms;
111 u32 uniformVars;
112 u32 numInitialValues;
113 u32 initialValues;
114 u32 _numLoops;
115 u32 _loopVars;
116 u32 numSamplers;
117 u32 samplerVars;
118 GX2RBuffer shaderProgram;
119 } GFDPixelShader;
120
121 typedef struct _GFDComputeShader
122 {
123 GFDComputeShaderRegs _regs;
124 u32 shaderSize;
125 u32 shaderPtr;
126 u32 numUniformBlocks;
127 u32 uniformBlocks;
128 u32 numUniforms;
129 u32 uniformVars;
130 u32 numInitialValues;
131 u32 initialValues;
132 u32 _numLoops;
133 u32 _loopVars;
134 u32 numSamplers;
135 u32 samplerVars;
136 u32 layout_size_x;
137 u32 layout_size_y;
138 u32 layout_size_z;
139 u32 Over64Mode;
140 u32 numWavesPerSIMD;
141 GX2RBuffer shaderProgram;
142 } GFDComputeShader;
143
144 // GFD specific Uniform Block structures to repack structure between 32 bit and 64 bit
145
146 typedef struct _GFDUniformBlock
147 {
148 u32 name;
149 u32 location;
150 u32 size;
151 } GFDUniformBlock;
152
153 // GFD specific Uniform Var structures to repack structure between 32 bit and 64 bit
154
155 typedef struct _GFDUniformVar
156 {
157 u32 name;
158 GX2VarType type;
159 u32 arrayCount;
160 u32 offset;
161 u32 blockIndex;
162 } GFDUniformVar;
163
164 // GFD specific Attrib Var structures to repack structure between 32 bit and 64 bit
165
166 typedef struct _GFDAttribVar
167 {
168 u32 name;
169 GX2VarType type;
170 u32 arrayCount;
171 u32 location;
172 } GFDAttribVar;
173
174 // GFD specific Sampler Var structures to repack structure between 32 bit and 64 bit
175
176 typedef struct _GFDSamplerVar
177 {
178 u32 name;
179 GX2SamplerType type;
180 u32 location;
181 } GFDSamplerVar;
182
183 typedef struct _GFDLoopVar
184 {
185 u32 reg[GX2_NUM_LOOP_VAR_U32_WORDS];
186 } GFDLoopVar;
187
188 // name conversions
189
190 // Note: These arrays must be kept in sync with the enum lists in gx2Enum.h
191 // Those lists must be kept in sync with the compiler output.
192 // The latter check is done elsewhere. The former is below.
193 static const char *varTypeName[] = {
194 "GX2_VAR_TYPE_VOID", // 0
195 "GX2_VAR_TYPE_BOOL", // 1
196 "GX2_VAR_TYPE_INT", // 2
197 "GX2_VAR_TYPE_UINT", // 3
198 "GX2_VAR_TYPE_FLOAT", // 4
199 "GX2_VAR_TYPE_DOUBLE", // 5
200 "GX2_VAR_TYPE_DVEC2", // 6
201 "GX2_VAR_TYPE_DVEC3", // 7
202 "GX2_VAR_TYPE_DVEC4", // 8
203 "GX2_VAR_TYPE_VEC2", // 9
204 "GX2_VAR_TYPE_VEC3", // 10
205 "GX2_VAR_TYPE_VEC4", // 11
206 "GX2_VAR_TYPE_BVEC2", // 12
207 "GX2_VAR_TYPE_BVEC3", // 13
208 "GX2_VAR_TYPE_BVEC4", // 14
209 "GX2_VAR_TYPE_IVEC2", // 15
210 "GX2_VAR_TYPE_IVEC3", // 16
211 "GX2_VAR_TYPE_IVEC4", // 17
212 "GX2_VAR_TYPE_UVEC2", // 18
213 "GX2_VAR_TYPE_UVEC3", // 19
214 "GX2_VAR_TYPE_UVEC4", // 20
215 "GX2_VAR_TYPE_MAT2", // 21
216 "GX2_VAR_TYPE_MAT2X3", // 22
217 "GX2_VAR_TYPE_MAT2X4", // 23
218 "GX2_VAR_TYPE_MAT3X2", // 24
219 "GX2_VAR_TYPE_MAT3", // 25
220 "GX2_VAR_TYPE_MAT3X4", // 26
221 "GX2_VAR_TYPE_MAT4X2", // 27
222 "GX2_VAR_TYPE_MAT4X3", // 28
223 "GX2_VAR_TYPE_MAT4", // 29
224 "GX2_VAR_TYPE_DMAT2", // 30
225 "GX2_VAR_TYPE_DMAT2X3", // 31
226 "GX2_VAR_TYPE_DMAT2X4", // 32
227 "GX2_VAR_TYPE_DMAT3X2", // 33
228 "GX2_VAR_TYPE_DMAT3", // 34
229 "GX2_VAR_TYPE_DMAT3X4", // 35
230 "GX2_VAR_TYPE_DMAT4X2", // 36
231 "GX2_VAR_TYPE_DMAT4X3", // 37
232 "GX2_VAR_TYPE_DMAT4" // 38
233 };
234
235 static const char *samplerTypeName[] = {
236 "GX2_SAMPLER_TYPE_1D", // 0
237 "GX2_SAMPLER_TYPE_2D", // 1
238 "GX2_SAMPLER_TYPE_2D_RECT", // 2
239 "GX2_SAMPLER_TYPE_3D", // 3
240 "GX2_SAMPLER_TYPE_CUBE", // 4
241 "GX2_SAMPLER_TYPE_1D_SHADOW", // 5
242 "GX2_SAMPLER_TYPE_2D_SHADOW", // 6
243 "GX2_SAMPLER_TYPE_2D_RECT_SHADOW", // 7
244 "GX2_SAMPLER_TYPE_CUBE_SHADOW", // 8
245 "GX2_SAMPLER_TYPE_1D_ARRAY", // 9
246 "GX2_SAMPLER_TYPE_2D_ARRAY", // 10
247 "GX2_SAMPLER_TYPE_1D_ARRAY_SHADOW", // 11
248 "GX2_SAMPLER_TYPE_2D_ARRAY_SHADOW", // 12
249 "GX2_SAMPLER_TYPE_CUBE_ARRAY", // 13
250 "GX2_SAMPLER_TYPE_CUBE_ARRAY_SHADOW", // 14
251 "GX2_SAMPLER_TYPE_BUFFER", // 15
252 "GX2_SAMPLER_TYPE_RENDERBUFFER", // 16
253 "GX2_SAMPLER_TYPE_2D_MS", // 17
254 "GX2_SAMPLER_TYPE_2D_MS_ARRAY", // 18
255 "GX2_SAMPLER_TYPE_INT_1D", // 19
256 "GX2_SAMPLER_TYPE_INT_2D", // 20
257 "GX2_SAMPLER_TYPE_INT_2D_RECT", // 21
258 "GX2_SAMPLER_TYPE_INT_3D", // 22
259 "GX2_SAMPLER_TYPE_INT_CUBE", // 23
260 "GX2_SAMPLER_TYPE_INT_1D_ARRAY", // 24
261 "GX2_SAMPLER_TYPE_INT_2D_ARRAY", // 25
262 "GX2_SAMPLER_TYPE_INT_CUBE_ARRAY", // 26
263 "GX2_SAMPLER_TYPE_INT_BUFFER", // 27
264 "GX2_SAMPLER_TYPE_INT_RENDERBUFFER", // 28
265 "GX2_SAMPLER_TYPE_INT_2D_MS", // 29
266 "GX2_SAMPLER_TYPE_INT_2D_MS_ARRAY", // 30
267 "GX2_SAMPLER_TYPE_UNSIGNED_INT_1D", // 31
268 "GX2_SAMPLER_TYPE_UNSIGNED_INT_2D", // 32
269 "GX2_SAMPLER_TYPE_UNSIGNED_INT_2D_RECT", // 33
270 "GX2_SAMPLER_TYPE_UNSIGNED_INT_3D", // 34
271 "GX2_SAMPLER_TYPE_UNSIGNED_INT_CUBE", // 35
272 "GX2_SAMPLER_TYPE_UNSIGNED_INT_1D_ARRAY", // 36
273 "GX2_SAMPLER_TYPE_UNSIGNED_INT_2D_ARRAY", // 37
274 "GX2_SAMPLER_TYPE_UNSIGNED_INT_CUBE_ARRAY", // 38
275 "GX2_SAMPLER_TYPE_UNSIGNED_INT_BUFFER", // 39
276 "GX2_SAMPLER_TYPE_UNSIGNED_INT_RENDERBUFFER", // 40
277 "GX2_SAMPLER_TYPE_UNSIGNED_INT_2D_MS", // 41
278 "GX2_SAMPLER_TYPE_UNSIGNED_INT_2D_MS_ARRAY" // 42
279 };
280
281 static const char *shaderModeName[] = {
282 "GX2_SHADER_MODE_UNIFORM_REGISTER",
283 "GX2_SHADER_MODE_UNIFORM_BLOCK",
284 "GX2_SHADER_MODE_GEOMETRY_SHADER"
285 };
286
287 /// Cleans out extra debug flags attached to offset
GFDCleanTag(u32 Offset)288 u32 GFDCleanTag(u32 Offset) {return Offset & ~GFD_TAG_MASK;}
289
290 /// Verifies offset stored in file is tagged with GFD_TAG_DAT
GFDCheckTagDAT(u32 Offset)291 BOOL GFDCheckTagDAT(u32 Offset) {return (Offset & GFD_TAG_MASK) == GFD_TAG_DAT;}
292
293 /// Verifies offset stored in file is tagged with GFD_TAG_STR
GFDCheckTagSTR(u32 Offset)294 BOOL GFDCheckTagSTR(u32 Offset) {return (Offset & GFD_TAG_MASK) == GFD_TAG_STR;}
295
296 /// A block of null terminated strings all stored in the same char array
297 /// All strings are padded with zeros at the end to be integer multiple of words (4) long.
298 typedef struct _GFDStringTable
299 {
300 u32 m_n; ///< Current number of strings
301 u32 m_nDB; ///< Current size of pDataBlock in chars
302 u32 m_maxDB; ///< Max size of data block, again in chars
303 char * m_pDB; ///< Block of data containing all the strings;
304 } GFDStringTable;
305
306 /// A table of data segments, stored contiguously in the same int array as [Size, Data] pairs.
307 /// Data Tables are very similar to String Tables. They just point arrays of data
308 /// rather than arrays null terminated strings.
309 typedef struct _GFDDataTable
310 {
311 u32 m_n; ///< Current number of data blocks in hunk (good for error checking)
312 u32 m_nDB; ///< Current size (e.g. next writeable offset) of m_pDB in chars
313 u32 m_maxDB; ///< Max size of data block, again in chars
314 char * m_pDB; ///< Block of data containing all the data
315 } GFDDataTable;
316
317 /// Create a new empty string table, with space for max strings;
GFDCreateStringTable(u32 max)318 GFDStringTable *GFDCreateStringTable(u32 max)
319 {
320 GFDStringTable *pTable = (GFDStringTable *) malloc(sizeof(GFDStringTable));
321
322 if ( pTable )
323 {
324 pTable->m_n = 0;
325
326 pTable->m_nDB = 0;
327 pTable->m_maxDB = max;
328 pTable->m_pDB = (char *) malloc(pTable->m_maxDB * sizeof(char));
329 memset(pTable->m_pDB, 0, pTable->m_maxDB * sizeof(char));
330 }
331 else
332 {
333 printf("Error! Failed to create string table!\n");
334 }
335 return pTable;
336 }
337
338 /// Frees the string table and all data associated with it
GFDDestroyStringTable(GFDStringTable * pTable)339 void GFDDestroyStringTable(GFDStringTable *pTable)
340 {
341 if(pTable)
342 {
343 free(pTable->m_pDB);
344 memset(pTable, 0, sizeof(GFDStringTable)); // paranioa to avoid any chance of reuse
345 free(pTable);
346 }
347 }
348
349 /// Grows maximum space in the string table to the new size. Will not make table smaller.
GFDGrowStringTableDB(GFDStringTable * pTable,u32 newMaxDB)350 void GFDGrowStringTableDB(GFDStringTable *pTable, u32 newMaxDB)
351 {
352 if(newMaxDB > pTable->m_maxDB) // if want more space, malloc it up, and copy old data over
353 {
354 char *pDB = (char*) malloc(newMaxDB * sizeof(char)); // create new memory
355 memset(pDB, 0, newMaxDB * sizeof(char));
356 memcpy(pDB, pTable->m_pDB, pTable->m_maxDB); // copy old data into this new location
357
358 free(pTable->m_pDB); // cleanup, and save new values
359 pTable->m_pDB = pDB;
360 pTable->m_maxDB = newMaxDB;
361 }
362 }
363
364 /// Adds null terminated string to the string table.
365 /// Returns offset into the string table of the begining of this string
GFDAddStringTable(GFDStringTable * pTable,const char * str)366 u32 GFDAddStringTable(GFDStringTable *pTable, const char *str)
367 {
368 size_t len = strlen(str);
369 // (We want our strings to be word aligned to make transfer accross 'network' easier)
370 int len_pad = (len + 1 + 3) & ~0x3; // round length up to word boundary.. (+1 for first terminating 0)
371
372 if(pTable->m_nDB + len_pad + 1 >= pTable->m_maxDB)
373 {
374 int newSize = pTable->m_nDB;
375 if(len_pad >= newSize) newSize = len_pad*2; // be safe for really long strings
376 newSize = (newSize + 0x1f) & ~0x1f; // round to nice size
377 GFDGrowStringTableDB(pTable, pTable->m_nDB + newSize); // double size if needed
378 }
379
380 // We could check here for a duplicate string already in the table,
381 // and return that index, but let's skip that optimization for now.
382
383 int n = pTable->m_n;
384 int off = pTable->m_nDB;
385
386 // copy string into the string table
387 strncpy_s(pTable->m_nDB + pTable->m_pDB, len+1, str, len+1);
388 int zero = 0; // fill trailing pad with zeros too...
389 strncpy_s(pTable->m_nDB + pTable->m_pDB + len + 1, sizeof(u32), (char*) &zero, len_pad - len-1);
390
391 // update our structs pointing to it
392 pTable->m_nDB += len_pad;
393 pTable->m_n += 1;
394
395 return off | GFD_TAG_STR;
396 }
397
398 /// Create a new empty string table, with space for max strings;
GFDCreateDataTable(u32 max)399 GFDDataTable *GFDCreateDataTable(u32 max)
400 {
401 GFDDataTable *pTable = (GFDDataTable *) malloc(sizeof(GFDDataTable));
402
403 if ( pTable )
404 {
405 pTable->m_n = 0;
406 pTable->m_nDB = 0;
407 pTable->m_maxDB = max * sizeof(u32); // assume chars are average of 8 or so in size (will grow separatly from ppStrings)
408 pTable->m_pDB = (char *) malloc(pTable->m_maxDB * sizeof(char));
409 memset(pTable->m_pDB, 0xbb, pTable->m_maxDB * sizeof(char));
410 }
411 else
412 {
413 printf("Error! Failed to create data table!\n");
414 }
415
416 return pTable;
417 }
418
419 /// Frees the string table and all data associated with it
GFDDestroyDataTable(GFDDataTable * pTable)420 void GFDDestroyDataTable(GFDDataTable *pTable)
421 {
422 if(pTable)
423 {
424 free(pTable->m_pDB);
425 memset(pTable, 0, sizeof(GFDStringTable)); // paranioa to avoid any chance of reuse
426 free(pTable);
427 }
428 }
429
430 /// Grows maximum space of data table to the new size, in bytes. Will not make table smaller.
GFDGrowDataTableDB(GFDDataTable * pTable,u32 newMaxDB)431 void GFDGrowDataTableDB(GFDDataTable *pTable, u32 newMaxDB)
432 {
433 if(newMaxDB > pTable->m_maxDB) // if want more space, malloc it up, and copy old data over
434 {
435 char *pDB = (char *) malloc(newMaxDB * sizeof(char)); // create new memory
436 memset(pDB, 0, newMaxDB * sizeof(char));
437 memcpy(pDB, pTable->m_pDB, pTable->m_maxDB); // copy old data into this new location
438
439 free(pTable->m_pDB); // cleanup, and save new values
440 pTable->m_pDB = pDB;
441 pTable->m_maxDB = newMaxDB;
442 }
443 }
444
445 /// Adds a new hunk of data to the data table, of specified length in bytes.
446 /// nBytes must be integer multiple of 4.
447 /// Returns byte offset into that table.
448 /// adds new block to our hunk table. Returns new size
GFDAddDataTable(GFDDataTable * pTable,void * data,u32 nBytes)449 u32 GFDAddDataTable(GFDDataTable *pTable, void *data, u32 nBytes)
450 {
451 assert( (nBytes & 0x3) == 0 && "nBytes must be multiple of 4");
452
453 if(pTable->m_nDB + nBytes + sizeof(u32) >= pTable->m_maxDB)
454 {
455 size_t newSize = pTable->m_nDB; // double size
456 if(nBytes >= newSize) newSize = nBytes*2; // if doubling isn't enough, make bigger
457 int finalSize = (pTable->m_nDB + newSize + 0x1f) & ~0x1f; // finally round to nearest 32 bytes
458 GFDGrowDataTableDB(pTable, finalSize);
459 }
460
461 u32 off = pTable->m_nDB; // offset is data, after the length...
462
463 memcpy(pTable->m_pDB + pTable->m_nDB, data, nBytes);
464
465 // update our structs pointing to it
466 pTable->m_nDB += nBytes;
467 pTable->m_n += 1;
468
469 return off | GFD_TAG_DAT; // OR in this silly constant to use for catching errors
470 }
471
472 //--------------------------------------------------------------------------
473
474 /// Repack a vertex shader from a 64-bit structure to a 32-bit structure.
475 /// We output a GX2VertexShader * for convenience, but it is not valid for 64-bit.
476 /// All pointers are cast to 32-bit integers. It is therefore 7*4 bytes shorter.
477 /// The return value is the resulting 32-bit structure size.
GFDRepackVertexShaderFor32Bit(GX2VertexShader * pVSin64,GFDVertexShader * pVSout32)478 u32 GFDRepackVertexShaderFor32Bit(GX2VertexShader *pVSin64, GFDVertexShader *pVSout32)
479 {
480 assert(sizeof(pVSout32->_regs) == sizeof(pVSin64->_regs));
481 memcpy(&pVSout32->_regs, pVSin64->_regs, sizeof(pVSout32->_regs));
482
483 pVSout32->shaderSize = pVSin64->shaderSize;
484 pVSout32->shaderPtr = (u32) pVSin64->shaderPtr;
485 pVSout32->shaderMode = pVSin64->shaderMode;
486 pVSout32->numUniformBlocks = pVSin64->numUniformBlocks;
487 pVSout32->uniformBlocks = (u32) pVSin64->uniformBlocks;
488 pVSout32->numUniforms = pVSin64->numUniforms;
489 pVSout32->uniformVars = (u32) pVSin64->uniformVars;
490 pVSout32->numInitialValues = pVSin64->numInitialValues;
491 pVSout32->initialValues = (u32) pVSin64->initialValues;
492 pVSout32->_numLoops = pVSin64->_numLoops;
493 pVSout32->_loopVars = (u32) pVSin64->_loopVars;
494 pVSout32->numSamplers = pVSin64->numSamplers;
495 pVSout32->samplerVars = (u32) pVSin64->samplerVars;
496 pVSout32->numAttribs = pVSin64->numAttribs;
497 pVSout32->attribVars = (u32) pVSin64->attribVars;
498 pVSout32->ringItemsize = pVSin64->ringItemsize;
499 pVSout32->hasStreamOut = (u32) pVSin64->hasStreamOut;
500
501 assert(sizeof(pVSout32->streamOutVertexStride) == sizeof(pVSin64->streamOutVertexStride));
502 memcpy(&pVSout32->streamOutVertexStride, pVSin64->streamOutVertexStride, sizeof(pVSout32->streamOutVertexStride));
503
504 pVSout32->shaderProgram = pVSin64->shaderProgram;
505
506 return sizeof(GFDVertexShader);
507 }
508
509 /// Repack a pixel shader from a 64-bit structure to a 32-bit structure.
510 /// We output a GX2PixelShader * for convenience, but it is not valid for 64-bit.
511 /// All pointers are cast to 32-bit integers. It is therefore 6*4 bytes shorter.
512 /// The return value is the resulting 32-bit structure size.
GFDRepackPixelShaderFor32Bit(GX2PixelShader * pPSin64,GFDPixelShader * pPSout32)513 u32 GFDRepackPixelShaderFor32Bit(GX2PixelShader *pPSin64, GFDPixelShader *pPSout32)
514 {
515 assert(sizeof(pPSout32->_regs) == sizeof(pPSin64->_regs));
516 memcpy(&pPSout32->_regs, pPSin64->_regs, sizeof(pPSout32->_regs));
517
518 pPSout32->shaderSize = pPSin64->shaderSize;
519 pPSout32->shaderPtr = (u32) pPSin64->shaderPtr;
520 pPSout32->shaderMode = pPSin64->shaderMode;
521 pPSout32->numUniformBlocks = pPSin64->numUniformBlocks;
522 pPSout32->uniformBlocks = (u32) pPSin64->uniformBlocks;
523 pPSout32->numUniforms = pPSin64->numUniforms;
524 pPSout32->uniformVars = (u32) pPSin64->uniformVars;
525 pPSout32->numInitialValues = pPSin64->numInitialValues;
526 pPSout32->initialValues = (u32) pPSin64->initialValues;
527 pPSout32->_numLoops = pPSin64->_numLoops;
528 pPSout32->_loopVars = (u32) pPSin64->_loopVars;
529 pPSout32->numSamplers = pPSin64->numSamplers;
530 pPSout32->samplerVars = (u32) pPSin64->samplerVars;
531 pPSout32->shaderProgram = pPSin64->shaderProgram;
532
533 return sizeof(GFDPixelShader);
534 }
535
536 /// Repack a geometry shader from a 64-bit structure to a 32-bit structure.
537 /// We output a GX2GeometryShader * for convenience, but it is not valid for 64-bit.
538 /// All pointers are cast to 32-bit integers. It is therefore 6*4 bytes shorter.
539 /// The return value is the resulting 32-bit structure size.
GFDRepackGeometryShaderFor32Bit(GX2GeometryShader * pGSin64,GFDGeometryShader * pGSout32)540 u32 GFDRepackGeometryShaderFor32Bit(GX2GeometryShader *pGSin64, GFDGeometryShader *pGSout32)
541 {
542 assert(sizeof(pGSout32->_regs) == sizeof(pGSin64->_regs));
543 memcpy(&pGSout32->_regs, pGSin64->_regs, sizeof(pGSout32->_regs));
544
545 pGSout32->shaderSize = pGSin64->shaderSize;
546 pGSout32->shaderPtr = (u32) pGSin64->shaderPtr;
547 pGSout32->copyShaderSize = pGSin64->copyShaderSize;
548 pGSout32->copyShaderPtr = (u32) pGSin64->copyShaderPtr;
549 pGSout32->shaderMode = pGSin64->shaderMode;
550 pGSout32->numUniformBlocks = pGSin64->numUniformBlocks;
551 pGSout32->uniformBlocks = (u32) pGSin64->uniformBlocks;
552 pGSout32->numUniforms = pGSin64->numUniforms;
553 pGSout32->uniformVars = (u32) pGSin64->uniformVars;
554 pGSout32->numInitialValues = pGSin64->numInitialValues;
555 pGSout32->initialValues = (u32) pGSin64->initialValues;
556 pGSout32->_numLoops = pGSin64->_numLoops;
557 pGSout32->_loopVars = (u32) pGSin64->_loopVars;
558 pGSout32->numSamplers = pGSin64->numSamplers;
559 pGSout32->samplerVars = (u32) pGSin64->samplerVars;
560 pGSout32->ringItemsize = pGSin64->ringItemsize;
561 pGSout32->hasStreamOut = (u32) pGSin64->hasStreamOut;
562
563 assert(sizeof(pGSout32->streamOutVertexStride) == sizeof(pGSin64->streamOutVertexStride));
564 memcpy(&pGSout32->streamOutVertexStride, pGSin64->streamOutVertexStride, sizeof(pGSout32->streamOutVertexStride));
565
566 pGSout32->shaderProgram = pGSin64->shaderProgram;
567 pGSout32->copyShaderProgram = pGSin64->copyShaderProgram;
568
569 return sizeof(GFDGeometryShader);
570 }
571
572 /// Repack a compute shader from a 64-bit structure to a 32-bit structure.
573 /// We output a GX2ComputeShader * for convenience, but it is not valid for 64-bit.
574 /// All pointers are cast to 32-bit integers. It is therefore 7*4 bytes shorter.
575 /// The return value is the resulting 32-bit structure size.
GFDRepackComputeShaderFor32Bit(GX2ComputeShader * pCSin64,GFDComputeShader * pCSout32)576 u32 GFDRepackComputeShaderFor32Bit(GX2ComputeShader *pCSin64, GFDComputeShader *pCSout32)
577 {
578 assert(sizeof(pCSout32->_regs) == sizeof(pCSin64->_regs));
579 memcpy(&pCSout32->_regs, pCSin64->_regs, sizeof(pCSout32->_regs));
580
581 pCSout32->shaderSize = pCSin64->shaderSize;
582 pCSout32->shaderPtr = (u32) pCSin64->shaderPtr;
583 pCSout32->numUniformBlocks = pCSin64->numUniformBlocks;
584 pCSout32->uniformBlocks = (u32) pCSin64->uniformBlocks;
585 pCSout32->numUniforms = pCSin64->numUniforms;
586 pCSout32->uniformVars = (u32) pCSin64->uniformVars;
587 pCSout32->numInitialValues = pCSin64->numInitialValues;
588 pCSout32->initialValues = (u32) pCSin64->initialValues;
589 pCSout32->_numLoops = pCSin64->_numLoops;
590 pCSout32->_loopVars = (u32) pCSin64->_loopVars;
591 pCSout32->numSamplers = pCSin64->numSamplers;
592 pCSout32->samplerVars = (u32) pCSin64->samplerVars;
593 pCSout32->layout_size_x = pCSin64->layout_size_x;
594 pCSout32->layout_size_y = pCSin64->layout_size_y;
595 pCSout32->layout_size_z = pCSin64->layout_size_z;
596 pCSout32->Over64Mode = pCSin64->Over64Mode;
597 pCSout32->numWavesPerSIMD = pCSin64->numWavesPerSIMD;
598 pCSout32->shaderProgram = pCSin64->shaderProgram;
599
600 return sizeof(GFDComputeShader);
601 }
602
603 /// Repack a uniform block array from a 64-bit structure to a 32-bit structure.
604 /// We output a GX2UniformBlock * for convenience, but it is not valid for 64-bit.
605 /// All pointers are cast to 32-bit integers. It is therefore 1*4*n bytes shorter.
606 /// The return value is the resulting 32-bit structure size.
GFDRepackUniformBlockArrayFor32Bit(GX2UniformBlock * pUBin64,GFDUniformBlock * pUBout32,u32 n)607 u32 GFDRepackUniformBlockArrayFor32Bit(GX2UniformBlock *pUBin64, GFDUniformBlock *pUBout32, u32 n)
608 {
609 for(u32 i=0; i<n; i++) {
610 pUBout32[i].name = (u32) pUBin64[i].name;
611 pUBout32[i].location = pUBin64[i].location;
612 pUBout32[i].size = pUBin64[i].size;
613 }
614 return sizeof(GFDUniformBlock)*n;
615 }
616
617 /// Repack a uniform var array from a 64-bit structure to a 32-bit structure.
618 /// We output a GX2UniformVar * for convenience, but it is not valid for 64-bit.
619 /// All pointers are cast to 32-bit integers. It is therefore 2*4*n bytes shorter.
620 /// The return value is the resulting 32-bit structure size.
GFDRepackUniformVarArrayFor32Bit(GX2UniformVar * pUVin64,GFDUniformVar * pUVout32,u32 n)621 u32 GFDRepackUniformVarArrayFor32Bit(GX2UniformVar *pUVin64, GFDUniformVar *pUVout32, u32 n)
622 {
623 for(u32 i=0; i<n; i++) {
624 pUVout32[i].name = (u32) pUVin64[i].name;
625 pUVout32[i].type = pUVin64[i].type;
626 pUVout32[i].arrayCount = pUVin64[i].arrayCount;
627 pUVout32[i].offset = pUVin64[i].offset;
628 pUVout32[i].blockIndex = pUVin64[i].blockIndex;
629 }
630 return sizeof(GFDUniformVar)*n;
631 }
632
633 /// Repack an attrib var array from a 64-bit structure to a 32-bit structure.
634 /// We output a GX2AttribVar * for convenience, but it is not valid for 64-bit.
635 /// All pointers are cast to 32-bit integers. It is therefore 1*4*n bytes shorter.
636 /// The return value is the resulting 32-bit structure size.
GFDRepackAttribVarArrayFor32Bit(GX2AttribVar * pAVin64,GFDAttribVar * pAVout32,u32 n)637 u32 GFDRepackAttribVarArrayFor32Bit(GX2AttribVar *pAVin64, GFDAttribVar *pAVout32, u32 n)
638 {
639 for(u32 i=0; i<n; i++) {
640 pAVout32[i].name = (u32) pAVin64[i].name;
641 pAVout32[i].type = pAVin64[i].type;
642 pAVout32[i].arrayCount = pAVin64[i].arrayCount;
643 pAVout32[i].location = pAVin64[i].location;
644 }
645 return sizeof(GFDAttribVar)*n;
646 }
647
648 /// Repack a (texture) sampler var array from a 64-bit structure to a 32-bit structure.
649 /// We output a GX2SamplerVar * for convenience, but it is not valid for 64-bit.
650 /// All pointers are cast to 32-bit integers. It is therefore 1*4*n bytes shorter.
651 /// The return value is the resulting 32-bit structure size.
GFDRepackSamplerVarArrayFor32Bit(GX2SamplerVar * pSVin64,GFDSamplerVar * pSVout32,u32 n)652 u32 GFDRepackSamplerVarArrayFor32Bit(GX2SamplerVar *pSVin64, GFDSamplerVar *pSVout32, u32 n)
653 {
654 for(u32 i=0; i<n; i++) {
655 pSVout32[i].name = (u32) pSVin64[i].name;
656 pSVout32[i].type = pSVin64[i].type;
657 pSVout32[i].location = pSVin64[i].location;
658 }
659 return sizeof(GFDSamplerVar)*n;
660 }
661 // ------------------------------------------------------------
662
663 /// Create the flat datablock representation of a GX2VertexShader structure
664 /// Call GFDDataTableDestroy() on returned object once doen with it.
GFDCreateBlockRelocateHeaderVSH(GX2VertexShader * pVS)665 GFDDataTable* GFDCreateBlockRelocateHeaderVSH(GX2VertexShader *pVS)
666 {
667 // Create second data structure to hold flattened, offseted version of our original shader
668 GX2VertexShader vsCopy;
669 memcpy(&vsCopy, pVS, sizeof(GX2VertexShader));
670
671 // Walk thru copy, converting all pointers to data blocks in table, and changing
672 // addresses to offsets into the data block
673
674 // Create data table to hold the structure elements
675 GFDDataTable *pDT = GFDCreateDataTable(sizeof(GX2VertexShader) + vsCopy.numUniforms * 8 + 512); // todo - pick better number
676
677 // How many pointers do we need to patch? (The 11 here is empirically determined - asserts at end if if wrong)
678 // 11 = 1 (shader itself) + 7 in GX2VertexShader + 1 (string table) + 1 (patch list) + 1 (trailer)
679 int nElements = 11 + vsCopy.numUniformBlocks + vsCopy.numUniforms + vsCopy.numSamplers + vsCopy.numAttribs;
680 int nE = 0;
681 u32 size;
682 u32 *pAddr = (u32*) malloc( nElements * sizeof(u32)); // src offsets into data block that need patching
683 u32 *pOffset = (u32*) malloc( nElements * sizeof(u32)); // dst offsets into data block for, containing offets to write
684
685 memset(pOffset, 0, nElements * sizeof(u32));
686 memset(pAddr, 0, nElements * sizeof(u32));
687
688 // 0: Store main structure itself (we'll rewrite offsets at the end).
689 // For allocation and alignment purposes, this needs to be the first hunk in the data table
690 GFDVertexShader vsCopy32;
691 size = GFDRepackVertexShaderFor32Bit(&vsCopy, &vsCopy32);
692 int oMain = nE;
693 pOffset[nE] = GFDAddDataTable(pDT, &vsCopy32, size);
694 pAddr[nE] = 0; // don't patch this location
695 nE++;
696
697 // 1: Store uniform block/buffer array
698 GFDUniformBlock *pUB = (GFDUniformBlock *) malloc(sizeof(GFDUniformBlock)*vsCopy.numUniformBlocks);
699 if ( !pUB )
700 {
701 printf("Error! Failed to allocate Uniform Block structure!\n");
702 return NULL;
703 }
704 size = GFDRepackUniformBlockArrayFor32Bit(vsCopy.uniformBlocks, pUB, vsCopy.numUniformBlocks);
705 int oUniformBuffers = nE;
706 pOffset[nE] = GFDAddDataTable(pDT, pUB, size);
707 pAddr[nE] = (vsCopy.numUniformBlocks == 0) ? 0 : pOffset[oMain] + (u32)&vsCopy32.uniformBlocks - (u32)&vsCopy32;
708 nE++;
709 free(pUB);
710
711 // 2: Store uniform array
712 GFDUniformVar *pUV = (GFDUniformVar *) malloc(sizeof(GFDUniformVar)*vsCopy.numUniforms);
713 if ( !pUV )
714 {
715 printf("Error! Failed to allocate Uniform Variable structure!\n");
716 return NULL;
717 }
718 size = GFDRepackUniformVarArrayFor32Bit(vsCopy.uniformVars, pUV, vsCopy.numUniforms);
719 int oUniforms = nE;
720 pOffset[nE] = GFDAddDataTable(pDT, pUV, size);
721 pAddr[nE] = (vsCopy.numUniforms == 0) ? 0 : pOffset[oMain] + (u32)&vsCopy32.uniformVars - (u32)&vsCopy32;
722 nE++;
723 free(pUV);
724
725 // 3: Store uniform initial values
726 // Note initial values points to uniform block, a contiguous section of GX2UniformInitialValues
727 int oUniformData = nE;
728 pOffset[nE] = GFDAddDataTable(pDT, vsCopy.initialValues, vsCopy.numInitialValues * sizeof(GX2UniformInitialValue));
729 pAddr[nE] = (vsCopy.numInitialValues == 0) ? 0 : pOffset[oMain] + (u32)&vsCopy32.initialValues - (u32)&vsCopy32;
730 nE++;
731
732 // 4: Store loop
733 int oLoops = nE;
734 pOffset[nE] = GFDAddDataTable(pDT, vsCopy._loopVars, vsCopy._numLoops * sizeof(GFDLoopVar));
735 pAddr[nE] = (vsCopy._numLoops == 0) ? 0 : pOffset[oMain] + (u32)&vsCopy32._loopVars - (u32)&vsCopy32;
736 nE++;
737
738 // 5: Store sampler descriptors
739 GFDSamplerVar *pSV = (GFDSamplerVar *) malloc(sizeof(GFDSamplerVar)*vsCopy.numSamplers);
740 if ( !pSV )
741 {
742 printf("Error! Failed to allocate Sampler Variable structure!\n");
743 return NULL;
744 }
745 size = GFDRepackSamplerVarArrayFor32Bit(vsCopy.samplerVars, pSV, vsCopy.numSamplers);
746 int oSamplers = nE;
747 pOffset[nE] = GFDAddDataTable(pDT, pSV, size);
748 pAddr[nE] = (vsCopy.numSamplers == 0) ? 0 : pOffset[oMain] + (u32)&vsCopy32.samplerVars - (u32)&vsCopy32;
749 nE++;
750 free(pSV);
751
752 // 6: Store Attributes
753 GFDAttribVar *pAV = (GFDAttribVar *) malloc(sizeof(GFDAttribVar)*vsCopy.numAttribs);
754 if ( !pAV )
755 {
756 printf("Error! Failed to allocate Attribute Variable structure!\n");
757 return NULL;
758 }
759 size = GFDRepackAttribVarArrayFor32Bit(vsCopy.attribVars, pAV, vsCopy.numAttribs);
760 int oAttrib_names = nE;
761 pOffset[nE] = GFDAddDataTable(pDT, pAV, size);
762 pAddr[nE] = (vsCopy.numAttribs == 0) ? 0 : pOffset[oMain] + (u32)&vsCopy32.attribVars - (u32)&vsCopy32;
763 nE++;
764 free(pAV);
765
766 // 7a: Create a string table to store all the strings in
767 const int kAvgCharsPerString = 12; // will auto-grow if actually biger
768 GFDStringTable *pStrTable = GFDCreateStringTable( ( vsCopy.numUniformBlocks + vsCopy.numUniforms + vsCopy.numSamplers + vsCopy.numAttribs) * kAvgCharsPerString);
769 u32 offStringTable = pDT->m_nDB; // current offset...
770
771 // s1: Store each uniform block name (in common string table), as well as pointer to the initial value in uniform block
772 for(u32 i = 0; i < vsCopy.numUniformBlocks; i++)
773 {
774 pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, vsCopy.uniformBlocks[i].name );
775 pAddr[nE] = pOffset[oUniformBuffers] + i * sizeof(GFDUniformBlock) + (u32)&vsCopy.uniformBlocks[i].name - (u32)&vsCopy.uniformBlocks[i];
776 nE++;
777 }
778
779 // s2: Store each uniform name (in common string table)
780 for(u32 i = 0; i < vsCopy.numUniforms; i++)
781 {
782 pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, vsCopy.uniformVars[i].name );
783 pAddr[nE] = pOffset[oUniforms] + i * sizeof(GFDUniformVar) + (u32)&vsCopy.uniformVars[i].name - (u32)&vsCopy.uniformVars[i];
784 nE++;
785 }
786
787 // s3: Store each sampler name (in common string table)
788 for(u32 i = 0; i < vsCopy.numSamplers; i++)
789 {
790 pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, vsCopy.samplerVars[i].name );
791 pAddr[nE] = pOffset[oSamplers] + i * sizeof(GFDSamplerVar) + (u32)&vsCopy.samplerVars[i].name - (u32)&vsCopy.samplerVars[i];
792 nE++;
793 }
794
795 // s4: Store each attrib name (in common string table)
796 for(u32 i = 0; i < vsCopy.numAttribs; i++)
797 {
798 pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, vsCopy.attribVars[i].name );
799 pAddr[nE] = pOffset[oAttrib_names] + i * sizeof(GFDAttribVar) + (u32)&vsCopy.attribVars[i].name - (u32)&vsCopy.attribVars[i];
800 nE++;
801 }
802
803 // 8: Store the string table (watch out for 1-3 bytes of padding)
804 int oStringTable = nE;
805
806 // Note, although arrays of chars don't seem to be modified to go into network order, we cache our
807 // stringtable as a block in a word array which does get byte-flipped. So let's pre-flip it here
808 // so it comes out right.
809 int nWordsStrTbl = (pStrTable->m_nDB + 0x3) / 4;
810 GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl);
811 pOffset[nE] = GFDAddDataTable(pDT, pStrTable->m_pDB, nWordsStrTbl*4); // simply write out string table data
812 pAddr[nE] = 0; // don't patch this location
813 nE++;
814
815 // let's convert it back so if we read it latter, we won't have problems
816 GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl);
817
818 // end0: Turn all our patch addresses to offsets
819 for(int i = 0; i < nE; i++)
820 {
821 if(pAddr[i] != 0)
822 {
823 *((u32*) (pDT->m_pDB + GFDCleanTag(pAddr[i]))) = pOffset[i];
824 }
825 }
826
827 // 9: Store the offset patch list
828 // (not *really* needed, could reconstruct if know all data types, but makes it a *lot* easier)
829 // After reading data block in at addrX, increment each location i of *(addrX + pAddr[i]) += addrX;
830 // By putting this after main, we can allocate space for it at same time and dellocate it when main goes away,
831 // without affecting the main data.
832 int oPatchTable = nE;
833 pOffset[nE] = GFDAddDataTable(pDT, pAddr, nElements * sizeof(u32)); // simply write out zeros now..
834 pAddr[nE] = 0; // don't patch this location either
835 nE++;
836
837 // 10: Finally, a small structure describing this data block.
838 GFDBlockRelocationHeader *vshTrailer = (GFDBlockRelocationHeader *) malloc( sizeof(GFDBlockRelocationHeader) );
839 memset(vshTrailer, 0, sizeof(GFDBlockRelocationHeader));
840 vshTrailer->magic = GFD_SWAP_8_IN_32(GFD_BLOCK_RELOCATION_HEADER_MAGIC);
841 vshTrailer->type = 0;
842 vshTrailer->size = sizeof(GFDBlockRelocationHeader);
843
844 int oTrailer = nE;
845
846 // Fill in our trailer and write it out
847 vshTrailer->dataSize = GFDCleanTag(pOffset[oPatchTable]) - GFDCleanTag(pOffset[oMain]); // size of the main data section (allocate this size of contiguous memory)
848 vshTrailer->dataOffset = pOffset[oMain]; // offset of the main data section in this block
849 vshTrailer->stringTableCharNumber = pStrTable->m_nDB; // number of characters in the string table
850 vshTrailer->stringTableOffset = pOffset[oStringTable]; // offset of string table in this block
851 vshTrailer->patchTableOffsetNumber = nE; // number of offsets in the patch table
852 vshTrailer->patchTableOffset = pOffset[oPatchTable] ; // offset of the patch table in this block
853
854 pOffset[nE] = GFDAddDataTable(pDT, vshTrailer, sizeof(GFDBlockRelocationHeader));
855 pAddr[nE] = 0; // don't patch this location
856 nE++;
857
858 free(vshTrailer);
859 vshTrailer = NULL;
860
861 assert(offStringTable == GFDCleanTag(pOffset[oStringTable]) && "Guess for offset table in 7a was wrong");
862 assert(nE <= nElements && "Too few offsets allocated");
863
864 free(pAddr);
865 free(pOffset);
866
867 GFDDestroyStringTable( pStrTable );
868 return pDT;
869 }
870
871 /// Create the flat datablock representation of a GX2PixelShader structure
872 /// Call GFDDataTableDestroy() on returned object once doen with it.
GFDCreateBlockRelocateHeaderPSH(GX2PixelShader * pPS)873 GFDDataTable* GFDCreateBlockRelocateHeaderPSH(GX2PixelShader *pPS)
874 {
875 // Create second data structure to hold flattened, offseted version of our original shader
876 GX2PixelShader psCopy;
877 memcpy(&psCopy, pPS, sizeof(GX2PixelShader));
878
879 // Walk thru copy, converting all pointers to data blocks in table, and changing
880 // addresses to offsets into the data block
881
882 // Create data table to hold the structure elements
883 GFDDataTable *pDT = GFDCreateDataTable(sizeof(GX2PixelShader) + psCopy.numUniforms * 8 + 1024); // todo - pick better number
884
885 // How many pointers do we need to patch? (The 10 here is empirically determined - asserts at end if if wrong)
886 // 10 = 1 (shader itself) + 6 in GX2PixelShader + 1 (string table) + 1 (patch list) + 1 (trailer)
887 int nElements = 10 + psCopy.numUniformBlocks + 1*psCopy.numUniforms + psCopy.numSamplers;
888 int nE = 0;
889 u32 size;
890 u32 *pAddr = (u32*) malloc( nElements * sizeof(u32)); // src offsets into data block that need patching
891 u32 *pOffset = (u32*) malloc( nElements * sizeof(u32)); // dst offsets into data block for, containing offets to write
892 memset(pOffset, 0, nElements * sizeof(u32));
893 memset(pAddr, 0, nElements * sizeof(u32));
894
895 // 0: Store main structure itself (we'll rewrite offsets at the end).
896 // For allocation and alignment purposes, this needs to be the first hunk in the data table
897 GFDPixelShader psCopy32;
898 size = GFDRepackPixelShaderFor32Bit(&psCopy, &psCopy32);
899 int oMain = nE;
900 pOffset[nE] = GFDAddDataTable(pDT, &psCopy32, size);
901 pAddr[nE] = 0; // don't patch this location
902 nE++;
903
904 // 1: Store uniform block array
905 GFDUniformBlock *pUB = (GFDUniformBlock *) malloc(sizeof(GFDUniformBlock)*psCopy.numUniformBlocks);
906 size = GFDRepackUniformBlockArrayFor32Bit(psCopy.uniformBlocks, pUB, psCopy.numUniformBlocks);
907 int oUniformBuffers = nE;
908 pOffset[nE] = GFDAddDataTable(pDT, pUB, size);
909 pAddr[nE] = (psCopy.numUniformBlocks == 0) ? 0 : pOffset[oMain] + (u32)&psCopy32.uniformBlocks - (u32)&psCopy32;
910 nE++;
911 free(pUB);
912
913 // 2: Store uniform array
914 GFDUniformVar *pUV = (GFDUniformVar *) malloc(sizeof(GFDUniformVar)*psCopy.numUniforms);
915 size = GFDRepackUniformVarArrayFor32Bit(psCopy.uniformVars, pUV, psCopy.numUniforms);
916 int oUniforms = nE;
917 pOffset[nE] = GFDAddDataTable(pDT, pUV, size);
918 pAddr[nE] = (psCopy.numUniforms == 0) ? 0 : pOffset[oMain] + (u32)&psCopy32.uniformVars - (u32)&psCopy32;
919 nE++;
920 free(pUV);
921
922 // 3: Store uniform initial values
923 // Note initial values points to uniform block, a contiguous section of GX2UniformInitialValues
924 int oUniformData = nE;
925 pOffset[nE] = GFDAddDataTable(pDT, psCopy.initialValues, psCopy.numInitialValues * sizeof(GX2UniformInitialValue));
926 pAddr[nE] = (psCopy.numInitialValues == 0) ? 0 : pOffset[oMain] + (u32)&psCopy32.initialValues - (u32)&psCopy32;
927 nE++;
928
929 // 4: Store loop array
930 int oLoops = nE;
931 pOffset[nE] = GFDAddDataTable(pDT, psCopy._loopVars, psCopy._numLoops * sizeof(GFDLoopVar));
932 pAddr[nE] = (psCopy._numLoops == 0) ? 0 : pOffset[oMain] + (u32)&psCopy32._loopVars - (u32)&psCopy32;
933 nE++;
934
935 // 5: Store sampler descriptors
936 GFDSamplerVar *pSV = (GFDSamplerVar *) malloc(sizeof(GFDSamplerVar)*psCopy.numSamplers);
937 size = GFDRepackSamplerVarArrayFor32Bit(psCopy.samplerVars, pSV, psCopy.numSamplers);
938 int oSamplers = nE;
939 pOffset[nE] = GFDAddDataTable(pDT, pSV, size);
940 pAddr[nE] = (psCopy.numSamplers == 0) ? 0 : pOffset[oMain] + (u32)&psCopy32.samplerVars - (u32)&psCopy32;
941 nE++;
942 free(pSV);
943
944 // 6a: Create a string table to store all the strings in
945 const int kAvgCharsPerString = 12; // will auto-grow if actually biger
946 GFDStringTable *pStrTable = GFDCreateStringTable( ( psCopy.numUniformBlocks + psCopy.numUniforms + psCopy.numSamplers) * kAvgCharsPerString);
947 u32 offStringTable = pDT->m_nDB; // current offset...
948
949 // s1: Store each uniform block name (in common string table), as well as pointer to the initial value in uniform block
950 for(u32 i = 0; i < psCopy.numUniformBlocks; i++)
951 {
952 pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, psCopy.uniformBlocks[i].name );
953 pAddr[nE] = pOffset[oUniformBuffers] + i * sizeof(GFDUniformBlock) + (u32)&psCopy.uniformBlocks[i].name - (u32)&psCopy.uniformBlocks[i];
954 nE++;
955 }
956
957 // s2: Store each uniform name (in common string table)
958 for(u32 i = 0; i < psCopy.numUniforms; i++)
959 {
960 pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, psCopy.uniformVars[i].name );
961 pAddr[nE] = pOffset[oUniforms] + i * sizeof(GFDUniformVar) + (u32)&psCopy.uniformVars[i].name - (u32)&psCopy.uniformVars[i];
962 nE++;
963 }
964
965 // s3: Store each sampler name (in common string table)
966 for(u32 i = 0; i < psCopy.numSamplers; i++)
967 {
968 pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, psCopy.samplerVars[i].name );
969 pAddr[nE] = pOffset[oSamplers] + i * sizeof(GFDSamplerVar) + (u32)&psCopy.samplerVars[i].name - (u32)&psCopy.samplerVars[i];
970 nE++;
971 }
972
973 // 7: Store the string table (watch out for 1-3 bytes of padding)
974 int oStringTable = nE;
975
976 // Note, although arrays of chars don't seem to be modified to go into network order, we cache our
977 // stringtable as a block in a word array which does get byte-flipped. So let's pre-flip it here
978 // so it comes out right.
979 int nWordsStrTbl = (pStrTable->m_nDB + 0x3) / 4;
980 GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl);
981 pOffset[nE] = GFDAddDataTable(pDT, pStrTable->m_pDB, nWordsStrTbl*4); // simply write out string table data
982 pAddr[nE] = 0; // don't patch this location
983 nE++;
984
985 // let's convert it back so if we read it latter, we won't have problems
986 GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl);
987
988 // end0: Turn all our patch addresses to offsets
989 for(int i = 0; i < nE; i++)
990 {
991 if(pAddr[i] != 0)
992 {
993 *((u32*) (pDT->m_pDB + GFDCleanTag(pAddr[i]))) = pOffset[i];
994 }
995 }
996
997 // 8: Store the offset patch list
998 // (not *really* needed, could reconstruct if know all data types, but makes *alot* easier)
999 // After reading data block in at addrX, increment each location i of *(addrX + pAddr[i]) += addrX;
1000 // By putting this after main, we can allocate space for it at same time and dellocate it when main goes away,
1001 // without affecting the main data.
1002 int oPatchTable = nE;
1003 pOffset[nE] = GFDAddDataTable(pDT, pAddr, nElements * sizeof(u32)); // simply write out zeros now..
1004 pAddr[nE] = 0; // don't patch this location either
1005 nE++;
1006
1007 // 9: Finally, a small structure describing this data block.
1008 GFDBlockRelocationHeader *pshTrailer = (GFDBlockRelocationHeader *) malloc( sizeof(GFDBlockRelocationHeader) );
1009 memset(pshTrailer, 0, sizeof(GFDBlockRelocationHeader));
1010 pshTrailer->magic = GFD_SWAP_8_IN_32(GFD_BLOCK_RELOCATION_HEADER_MAGIC);
1011 pshTrailer->type = 0;
1012 pshTrailer->size = sizeof(GFDBlockRelocationHeader);
1013
1014 int oTrailer = nE;
1015
1016 // Fill in our trailer and write it out
1017 pshTrailer->dataSize = GFDCleanTag(pOffset[oPatchTable]) - GFDCleanTag(pOffset[oMain]); // size of the main data section (allocate this size of contiguous memory)
1018 pshTrailer->dataOffset = pOffset[oMain]; // offset of the main data section in this block
1019 pshTrailer->stringTableCharNumber = pStrTable->m_nDB; // number of characters in the string table
1020 pshTrailer->stringTableOffset = pOffset[oStringTable]; // offset of string table in this block
1021 pshTrailer->patchTableOffsetNumber = nE; // number of offsets in the patch table
1022 pshTrailer->patchTableOffset = pOffset[oPatchTable] ; // offset of the patch table in this block
1023
1024 pOffset[nE] = GFDAddDataTable(pDT, pshTrailer, sizeof(GFDBlockRelocationHeader));
1025 pAddr[nE] = 0; // don't patch this location
1026 nE++;
1027
1028 free(pshTrailer);
1029 pshTrailer = NULL;
1030
1031 assert(offStringTable == GFDCleanTag(pOffset[oStringTable]) && "Guess for offset table in 7a was wrong");
1032 assert(nE <= nElements && "Too few offsets allocated");
1033
1034 free(pAddr);
1035 free(pOffset);
1036
1037 GFDDestroyStringTable( pStrTable );
1038 return pDT;
1039 }
1040
1041 /// Create the flat datablock representation of a GX2GeometryShader structure
1042 /// Call GFDDataTableDestroy() on returned object once doen with it.
GFDCreateBlockRelocateHeaderGSH(GX2GeometryShader * pGS)1043 GFDDataTable* GFDCreateBlockRelocateHeaderGSH(GX2GeometryShader *pGS)
1044 {
1045 // Create second data structure to hold flattened, offseted version of our original shader
1046 GX2GeometryShader gsCopy;
1047 memcpy(&gsCopy, pGS, sizeof(GX2GeometryShader));
1048
1049 // Walk thru copy, converting all pointers to data blocks in table, and changing
1050 // addresses to offsets into the data block
1051
1052 // Create data table to hold the structure elements
1053 GFDDataTable *pDT = GFDCreateDataTable(sizeof(GX2GeometryShader) + gsCopy.numUniforms * 8 + 512); // todo - pick better number
1054
1055 // How many pointers do we need to patch? (The 11 here is empirically determined - asserts at end if if wrong)
1056 // 11 = 1 (shader itself) + 7 in GX2GeometryShader + 1 (string table) + 1 (patch list) + 1 (trailer)
1057 int nElements = 11 + gsCopy.numUniformBlocks + gsCopy.numUniforms + gsCopy.numSamplers /*+ gsCopy.numAttribs */;
1058 int nE = 0;
1059 u32 size;
1060 u32 *pAddr = (u32*) malloc( nElements * sizeof(u32)); // src offsets into data block that need patching
1061 u32 *pOffset = (u32*) malloc( nElements * sizeof(u32)); // dst offsets into data block for, containing offets to write
1062 memset(pOffset, 0, nElements * sizeof(u32));
1063 memset(pAddr, 0, nElements * sizeof(u32));
1064
1065 // 0: Store main structure itself (we'll rewrite offsets at the end).
1066 // For allocation and alignment purposes, this needs to be the first hunk in the data table
1067 GFDGeometryShader gsCopy32;
1068 size = GFDRepackGeometryShaderFor32Bit(&gsCopy, &gsCopy32);
1069 int oMain = nE;
1070 pOffset[nE] = GFDAddDataTable(pDT, &gsCopy32, size);
1071 pAddr[nE] = 0; // don't patch this location
1072 nE++;
1073
1074 // 1: Store uniform block/buffer array
1075 GFDUniformBlock *pUB = (GFDUniformBlock *) malloc(sizeof(GFDUniformBlock)*gsCopy.numUniformBlocks);
1076 size = GFDRepackUniformBlockArrayFor32Bit(gsCopy.uniformBlocks, pUB, gsCopy.numUniformBlocks);
1077 int oUniformBuffers = nE;
1078 pOffset[nE] = GFDAddDataTable(pDT, pUB, size);
1079 pAddr[nE] = (gsCopy.numUniformBlocks == 0) ? 0 : pOffset[oMain] + (u32)&gsCopy32.uniformBlocks - (u32)&gsCopy32;
1080 nE++;
1081 free(pUB);
1082
1083 // 2: Store uniform array
1084 GFDUniformVar *pUV = (GFDUniformVar *) malloc(sizeof(GFDUniformVar)*gsCopy.numUniforms);
1085 size = GFDRepackUniformVarArrayFor32Bit(gsCopy.uniformVars, pUV, gsCopy.numUniforms);
1086 int oUniforms = nE;
1087 pOffset[nE] = GFDAddDataTable(pDT, pUV, size);
1088 pAddr[nE] = (gsCopy.numUniforms == 0) ? 0 : pOffset[oMain] + (u32)&gsCopy32.uniformVars - (u32)&gsCopy32;
1089 nE++;
1090 free(pUV);
1091
1092 // 3: Store uniform initial values
1093 // Note initial values points to uniform block, a contiguous section of GX2UniformInitialValues
1094 int oUniformData = nE;
1095 pOffset[nE] = GFDAddDataTable(pDT, gsCopy.initialValues, gsCopy.numInitialValues * sizeof(GX2UniformInitialValue));
1096 pAddr[nE] = (gsCopy.numInitialValues == 0) ? 0 : pOffset[oMain] + (u32)&gsCopy32.initialValues - (u32)&gsCopy32;
1097 nE++;
1098
1099 // 4: Store loop
1100 int oLoops = nE;
1101 pOffset[nE] = GFDAddDataTable(pDT, gsCopy._loopVars, gsCopy._numLoops * sizeof(GFDLoopVar));
1102 pAddr[nE] = (gsCopy._numLoops == 0) ? 0 : pOffset[oMain] + (u32)&gsCopy32._loopVars - (u32)&gsCopy32;
1103 nE++;
1104
1105 // 5: Store sampler descriptors
1106 GFDSamplerVar *pSV = (GFDSamplerVar *) malloc(sizeof(GFDSamplerVar)*gsCopy.numSamplers);
1107 size = GFDRepackSamplerVarArrayFor32Bit(gsCopy.samplerVars, pSV, gsCopy.numSamplers);
1108 int oSamplers = nE;
1109 pOffset[nE] = GFDAddDataTable(pDT, pSV, size);
1110 pAddr[nE] = (gsCopy.numSamplers == 0) ? 0 : pOffset[oMain] + (u32)&gsCopy32.samplerVars - (u32)&gsCopy32;
1111 nE++;
1112 free(pSV);
1113
1114 // 6a: Create a string table to store all the strings in
1115 const int kAvgCharsPerString = 12; // will auto-grow if actually biger
1116 GFDStringTable *pStrTable = GFDCreateStringTable( ( gsCopy.numUniformBlocks + gsCopy.numUniforms + gsCopy.numSamplers) * kAvgCharsPerString);
1117 u32 offStringTable = pDT->m_nDB; // current offset...
1118
1119
1120 // s1: Store each uniform block name (in common string table), as well as pointer to the initial value in uniform block
1121 for(u32 i = 0; i < gsCopy.numUniformBlocks; i++)
1122 {
1123 pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, gsCopy.uniformBlocks[i].name );
1124 pAddr[nE] = pOffset[oUniformBuffers] + i * sizeof(GFDUniformBlock) + (u32)&gsCopy.uniformBlocks[i].name - (u32)&gsCopy.uniformBlocks[i];
1125 nE++;
1126 }
1127
1128 // s2: Store each uniform name (in common string table)
1129 for(u32 i = 0; i < gsCopy.numUniforms; i++)
1130 {
1131 pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, gsCopy.uniformVars[i].name );
1132 pAddr[nE] = pOffset[oUniforms] + i * sizeof(GFDUniformVar) + (u32)&gsCopy.uniformVars[i].name - (u32)&gsCopy.uniformVars[i];
1133 nE++;
1134 }
1135
1136 // s3: Store each sampler name (in common string table)
1137 for(u32 i = 0; i < gsCopy.numSamplers; i++)
1138 {
1139 pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, gsCopy.samplerVars[i].name );
1140 pAddr[nE] = pOffset[oSamplers] + i * sizeof(GFDSamplerVar) + (u32)&gsCopy.samplerVars[i].name - (u32)&gsCopy.samplerVars[i];
1141 nE++;
1142 }
1143
1144 // 7: Store the string table (watch out for 1-3 bytes of padding)
1145 int oStringTable = nE;
1146
1147 // Note, although arrays of chars don't seem to be modified to go into network order, we cache our
1148 // stringtable as a block in a word array which does get byte-flipped. So let's pre-flip it here
1149 // so it comes out right.
1150 int nWordsStrTbl = (pStrTable->m_nDB + 0x3) / 4;
1151 GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl);
1152 pOffset[nE] = GFDAddDataTable(pDT, pStrTable->m_pDB, nWordsStrTbl*4); // simply write out string table data
1153 pAddr[nE] = 0; // don't patch this location
1154 nE++;
1155
1156 // let's convert it back so if we read it latter, we won't have problems
1157 GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl);
1158
1159 // end0: Turn all our patch addresses to offsets
1160 for(int i = 0; i < nE; i++)
1161 {
1162 if(pAddr[i] != 0)
1163 {
1164 *((u32*) (pDT->m_pDB + GFDCleanTag(pAddr[i]))) = pOffset[i];
1165 }
1166 }
1167
1168 // 8: Store the offset patch list
1169 // (not *really* needed, could reconstruct if know all data types, but makes it a *lot* easier)
1170 // After reading data block in at addrX, increment each location i of *(addrX + pAddr[i]) += addrX;
1171 // By putting this after main, we can allocate space for it at same time and dellocate it when main goes away,
1172 // without affecting the main data.
1173 int oPatchTable = nE;
1174 pOffset[nE] = GFDAddDataTable(pDT, pAddr, nElements * sizeof(u32)); // simply write out zeros now..
1175 pAddr[nE] = 0; // don't patch this location either
1176 nE++;
1177
1178 // 9: Finally, a small structure describing this data block.
1179 GFDBlockRelocationHeader *gshTrailer = (GFDBlockRelocationHeader *) malloc( sizeof(GFDBlockRelocationHeader) );
1180 memset(gshTrailer, 0, sizeof(GFDBlockRelocationHeader));
1181 gshTrailer->magic = GFD_SWAP_8_IN_32(GFD_BLOCK_RELOCATION_HEADER_MAGIC);
1182 gshTrailer->type = 0;
1183 gshTrailer->size = sizeof(GFDBlockRelocationHeader);
1184
1185 int oTrailer = nE;
1186
1187 // Fill in our trailer and write it out
1188 gshTrailer->dataSize = GFDCleanTag(pOffset[oPatchTable]) - GFDCleanTag(pOffset[oMain]); // size of the main data section (allocate this size of contiguous memory)
1189 gshTrailer->dataOffset = pOffset[oMain]; // offset of the main data section in this block
1190 gshTrailer->stringTableCharNumber = pStrTable->m_nDB; // number of characters in the string table
1191 gshTrailer->stringTableOffset = pOffset[oStringTable]; // offset of string table in this block
1192 gshTrailer->patchTableOffsetNumber = nE; // number of offsets in the patch table
1193 gshTrailer->patchTableOffset = pOffset[oPatchTable] ; // offset of the patch table in this block
1194
1195 pOffset[nE] = GFDAddDataTable(pDT, gshTrailer, sizeof(GFDBlockRelocationHeader));
1196 pAddr[nE] = 0; // don't patch this location
1197 nE++;
1198
1199 free(gshTrailer);
1200 gshTrailer = NULL;
1201
1202 assert(offStringTable == GFDCleanTag(pOffset[oStringTable]) && "Guess for offset table in 7a was wrong");
1203 assert(nE <= nElements && "Too few offsets allocated");
1204
1205 free(pAddr);
1206 free(pOffset);
1207
1208 GFDDestroyStringTable( pStrTable );
1209
1210 return pDT;
1211 }
1212
1213 /// Create the flat datablock representation of a GX2ComputeShader structure
1214 /// Call GFDDataTableDestroy() on returned object once done with it.
GFDCreateBlockRelocateHeaderCSH(GX2ComputeShader * pCS)1215 GFDDataTable* GFDCreateBlockRelocateHeaderCSH(GX2ComputeShader *pCS)
1216 {
1217 // Create second data structure to hold flattened, offseted version of our original shader
1218 GX2ComputeShader csCopy;
1219 memcpy(&csCopy, pCS, sizeof(GX2ComputeShader));
1220
1221 // Walk thru copy, converting all pointers to data blocks in table, and changing
1222 // addresses to offsets into the data block
1223
1224 // Create data table to hold the structure elements
1225 GFDDataTable *pDT = GFDCreateDataTable(sizeof(GX2ComputeShader) + csCopy.numUniformBlocks * sizeof(GX2UniformBlock) + csCopy.numUniforms * sizeof(GX2UniformVar) + csCopy.numSamplers * sizeof(GX2SamplerVar) + 512); // todo - pick a better number for string table
1226 if ( !pDT )
1227 {
1228 printf("Error! Can't create GFD Data Table!\n");
1229 return NULL;
1230 }
1231
1232 // How many pointers do we need to patch? (The value here is empirically determined - asserts at end if if wrong)
1233 // 10 = 1 (shader itself) + 6 in GX2ComputeShader + 1 (string table) + 1 (patch list) + 1 (trailer)
1234 int nElements = 10 + csCopy.numUniformBlocks + csCopy.numUniforms + csCopy.numSamplers;
1235 int nE = 0;
1236 u32 size;
1237 u32 *pAddr = (u32*) malloc( nElements * sizeof(u32)); // src offsets into data block that need patching
1238 u32 *pOffset = (u32*) malloc( nElements * sizeof(u32)); // dst offsets into data block for, containing offets to write
1239
1240 if ( !pAddr || !pOffset )
1241 {
1242 printf("Error! Memory allocation failure!\n");
1243 return NULL;
1244 }
1245 memset(pOffset, 0, nElements * sizeof(u32));
1246 memset(pAddr, 0, nElements * sizeof(u32));
1247
1248 // 0: Store main structure itself (we'll rewrite offsets at the end).
1249 // For allocation and alignment purposes, this needs to be the first hunk in the data table
1250 GFDComputeShader csCopy32;
1251 size = GFDRepackComputeShaderFor32Bit(&csCopy, &csCopy32);
1252 int oMain = nE;
1253 pOffset[nE] = GFDAddDataTable(pDT, &csCopy32, size);
1254 pAddr[nE] = 0; // don't patch this location
1255 nE++;
1256
1257 // 1: Store uniform block/buffer array
1258 GFDUniformBlock *pUB = (GFDUniformBlock *) malloc(sizeof(GFDUniformBlock)*csCopy.numUniformBlocks);
1259 if ( !pUB )
1260 {
1261 printf("Error! Failed to allocate Uniform Block structure!\n");
1262 return NULL;
1263 }
1264 size = GFDRepackUniformBlockArrayFor32Bit(csCopy.uniformBlocks, pUB, csCopy.numUniformBlocks);
1265 int oUniformBuffers = nE;
1266 pOffset[nE] = GFDAddDataTable(pDT, pUB, size);
1267 pAddr[nE] = (csCopy.numUniformBlocks == 0) ? 0 : pOffset[oMain] + (u32)&csCopy32.uniformBlocks - (u32)&csCopy32;
1268 nE++;
1269 free(pUB);
1270
1271 // 2: Store uniform array
1272 GFDUniformVar *pUV = (GFDUniformVar *) malloc(sizeof(GFDUniformVar)*csCopy.numUniforms);
1273 if ( !pUV )
1274 {
1275 printf("Error! Failed to allocate Uniform Variable structure!\n");
1276 return NULL;
1277 }
1278 size = GFDRepackUniformVarArrayFor32Bit(csCopy.uniformVars, pUV, csCopy.numUniforms);
1279 int oUniforms = nE;
1280 pOffset[nE] = GFDAddDataTable(pDT, pUV, size);
1281 pAddr[nE] = (csCopy.numUniforms == 0) ? 0 : pOffset[oMain] + (u32)&csCopy32.uniformVars - (u32)&csCopy32;
1282 nE++;
1283 free(pUV);
1284
1285 // 3: Store uniform initial values
1286 // Note initial values points to uniform block, a contiguous section of GX2UniformInitialValues
1287 int oUniformData = nE;
1288 pOffset[nE] = GFDAddDataTable(pDT, csCopy.initialValues, csCopy.numInitialValues * sizeof(GX2UniformInitialValue));
1289 pAddr[nE] = (csCopy.numInitialValues == 0) ? 0 : pOffset[oMain] + (u32)&csCopy32.initialValues - (u32)&csCopy32;
1290 nE++;
1291
1292 // 4: Store loop variables
1293 int oLoops = nE;
1294 pOffset[nE] = GFDAddDataTable(pDT, csCopy._loopVars, csCopy._numLoops * sizeof(GFDLoopVar));
1295 pAddr[nE] = (csCopy._numLoops == 0) ? 0 : pOffset[oMain] + (u32)&csCopy32._loopVars - (u32)&csCopy32;
1296 nE++;
1297
1298 // 5: Store sampler descriptors
1299 GFDSamplerVar *pSV = (GFDSamplerVar *) malloc(sizeof(GFDSamplerVar)*csCopy.numSamplers);
1300 if ( !pSV )
1301 {
1302 printf("Error! Failed to allocate Sampler Variable structure!\n");
1303 return NULL;
1304 }
1305 size = GFDRepackSamplerVarArrayFor32Bit(csCopy.samplerVars, pSV, csCopy.numSamplers);
1306 int oSamplers = nE;
1307 pOffset[nE] = GFDAddDataTable(pDT, pSV, size);
1308 pAddr[nE] = (csCopy.numSamplers == 0) ? 0 : pOffset[oMain] + (u32)&csCopy32.samplerVars - (u32)&csCopy32;
1309 nE++;
1310 free(pSV);
1311
1312
1313 // 6a: Create a string table to store all the strings in
1314 const int kAvgCharsPerString = 12; // will auto-grow if actually biger
1315 GFDStringTable *pStrTable = GFDCreateStringTable( ( csCopy.numUniformBlocks + csCopy.numUniforms + csCopy.numSamplers) * kAvgCharsPerString);
1316 u32 offStringTable = pDT->m_nDB; // current offset...
1317
1318 // s1: Store each uniform block name (in common string table), as well as pointer to the initial value in uniform block
1319 for(u32 i = 0; i < csCopy.numUniformBlocks; i++)
1320 {
1321 pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, csCopy.uniformBlocks[i].name );
1322 pAddr[nE] = pOffset[oUniformBuffers] + i * sizeof(GFDUniformBlock) + (u32)&csCopy.uniformBlocks[i].name - (u32)&csCopy.uniformBlocks[i];
1323 nE++;
1324 }
1325
1326 // s2: Store each uniform name (in common string table)
1327 for(u32 i = 0; i < csCopy.numUniforms; i++)
1328 {
1329 pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, csCopy.uniformVars[i].name );
1330 pAddr[nE] = pOffset[oUniforms] + i * sizeof(GFDUniformVar) + (u32)&csCopy.uniformVars[i].name - (u32)&csCopy.uniformVars[i];
1331 nE++;
1332 }
1333
1334 // s3: Store each sampler name (in common string table)
1335 for(u32 i = 0; i < csCopy.numSamplers; i++)
1336 {
1337 pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, csCopy.samplerVars[i].name );
1338 pAddr[nE] = pOffset[oSamplers] + i * sizeof(GFDSamplerVar) + (u32)&csCopy.samplerVars[i].name - (u32)&csCopy.samplerVars[i];
1339 nE++;
1340 }
1341
1342 // 7: Store the string table (watch out for 1-3 bytes of padding)
1343 int oStringTable = nE;
1344
1345 // Note, although arrays of chars don't seem to be modified to go into network order, we cache our
1346 // stringtable as a block in a word array which does get byte-flipped. So let's pre-flip it here
1347 // so it comes out right.
1348 int nWordsStrTbl = (pStrTable->m_nDB + 0x3) / 4;
1349 GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl);
1350 pOffset[nE] = GFDAddDataTable(pDT, pStrTable->m_pDB, nWordsStrTbl*4); // simply write out string table data
1351 pAddr[nE] = 0; // don't patch this location
1352 nE++;
1353
1354 // let's convert it back so if we read it latter, we won't have problems
1355 GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl);
1356
1357 // end0: Turn all our patch addresses to offsets
1358 for(int i = 0; i < nE; i++)
1359 {
1360 if(pAddr[i] != 0)
1361 {
1362 *((u32*) (pDT->m_pDB + GFDCleanTag(pAddr[i]))) = pOffset[i];
1363 }
1364 }
1365
1366 // 8: Store the offset patch list
1367 // (not *really* needed, could reconstruct if know all data types, but makes it a *lot* easier)
1368 // After reading data block in at addrX, increment each location i of *(addrX + pAddr[i]) += addrX;
1369 // By putting this after main, we can allocate space for it at same time and dellocate it when main goes away,
1370 // without affecting the main data.
1371 int oPatchTable = nE;
1372 pOffset[nE] = GFDAddDataTable(pDT, pAddr, nElements * sizeof(u32)); // simply write out zeros now..
1373 pAddr[nE] = 0; // don't patch this location either
1374 nE++;
1375
1376 // 9: Finally, a small structure describing this data block.
1377 GFDBlockRelocationHeader *vshTrailer = (GFDBlockRelocationHeader *) malloc( sizeof(GFDBlockRelocationHeader) );
1378 if ( !vshTrailer )
1379 {
1380 printf("Error! Failed to allocate Block Relocation structure!\n");
1381 return NULL;
1382 }
1383 memset(vshTrailer, 0, sizeof(GFDBlockRelocationHeader));
1384 vshTrailer->magic = GFD_SWAP_8_IN_32(GFD_BLOCK_RELOCATION_HEADER_MAGIC);
1385 vshTrailer->type = 0;
1386 vshTrailer->size = sizeof(GFDBlockRelocationHeader);
1387
1388 int oTrailer = nE;
1389
1390 // Fill in our trailer and write it out
1391 vshTrailer->dataSize = GFDCleanTag(pOffset[oPatchTable]) - GFDCleanTag(pOffset[oMain]); // size of the main data section (allocate this size of contiguous memory)
1392 vshTrailer->dataOffset = pOffset[oMain]; // offset of the main data section in this block
1393 vshTrailer->stringTableCharNumber = pStrTable->m_nDB; // number of characters in the string table
1394 vshTrailer->stringTableOffset = pOffset[oStringTable]; // offset of string table in this block
1395 vshTrailer->patchTableOffsetNumber = nE; // number of offsets in the patch table
1396 vshTrailer->patchTableOffset = pOffset[oPatchTable] ; // offset of the patch table in this block
1397
1398 pOffset[nE] = GFDAddDataTable(pDT, vshTrailer, sizeof(GFDBlockRelocationHeader));
1399 pAddr[nE] = 0; // don't patch this location
1400 nE++;
1401
1402 free(vshTrailer);
1403 vshTrailer = NULL;
1404
1405 assert(offStringTable == GFDCleanTag(pOffset[oStringTable]) && "Guess for offset table in 7a was wrong");
1406 assert(nE <= nElements && "Too few offsets allocated");
1407
1408 free(pAddr);
1409 free(pOffset);
1410
1411 GFDDestroyStringTable( pStrTable );
1412 return pDT;
1413 }
1414
1415
GFDWriteFileVertexShaderBlock(FILE * fp,GFDEndianSwapMode swapMode,GFDAlignMode alignMode,GX2VertexShader * pVS)1416 bool GFDWriteFileVertexShaderBlock(FILE *fp, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, GX2VertexShader *pVS)
1417 {
1418 if(pVS == NULL)
1419 return false;
1420
1421 // Get info about the actual shader program
1422 u32 nBytesProg = pVS->shaderSize;
1423 void* pDataProg = pVS->shaderPtr; // Current location, we will relocate it
1424
1425 // Set NULL
1426 pVS->shaderPtr = NULL;
1427
1428 // Now convert structure into a flat, relocatable format
1429 GFDDataTable *pDT_VS = GFDCreateBlockRelocateHeaderVSH(pVS);
1430
1431 if(NULL == pDT_VS)
1432 return false;
1433
1434 int nBytesVSStruct = pDT_VS->m_nDB; // How big is our vertex struct (changes size due to uniforms, samplers, and other varying things)
1435
1436 // Write header for VS struct
1437 if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_VSH_HEADER, nBytesVSStruct))
1438 return false;
1439
1440 // Write VS struct
1441 if(!GFDWriteFilePPCData(fp, (nBytesVSStruct + 3) / 4, GFD_ELEMENT_SIZE_32, (u32 *) pDT_VS->m_pDB))
1442 return false;
1443
1444 // Add pad block
1445 if(alignMode)
1446 {
1447 // Calc padding size for shader align
1448 u32 padSize = ((u32)(ftell(fp) + 2 * GFD_BLOCK_HEADER_SIZE +(GX2_SHADER_ALIGNMENT-1)) & ~(GX2_SHADER_ALIGNMENT-1)) - (u32)ftell(fp) - 2 * GFD_BLOCK_HEADER_SIZE;
1449 while(padSize < 0)
1450 padSize += GX2_SHADER_ALIGNMENT;
1451
1452 if(!GFDWriteFilePadBlock(fp, padSize))
1453 return false;
1454 }
1455
1456 // Write out Header for program block
1457 if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_VSH_PROGRAM, nBytesProg))
1458 return false;
1459
1460 // Write program data block
1461 if(!GFDWriteFileGPUData(fp, (nBytesProg + 3) / 4, GFD_ELEMENT_SIZE_32, swapMode, (u32 *) pDataProg))
1462 return false;
1463
1464 GFDDestroyDataTable(pDT_VS);
1465
1466 // Restore
1467 pVS->shaderPtr = pDataProg;
1468
1469 return true;
1470 }
1471
GFDWriteFilePixelShaderBlock(FILE * fp,GFDEndianSwapMode swapMode,GFDAlignMode alignMode,GX2PixelShader * pPS)1472 bool GFDWriteFilePixelShaderBlock(FILE *fp, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, GX2PixelShader *pPS)
1473 {
1474 if(pPS == NULL)
1475 return false;
1476
1477 // Get info about the actual shader program
1478 u32 nBytesProg = pPS->shaderSize;
1479 void* pDataProg = pPS->shaderPtr; // Current location, we will relocate it
1480
1481 // Set NULL
1482 pPS->shaderPtr = NULL;
1483
1484 // Convert structure into a flat, relocatable format
1485 GFDDataTable *pDT_PS = GFDCreateBlockRelocateHeaderPSH(pPS);
1486
1487 if(NULL == pDT_PS)
1488 return false;
1489
1490 int nBytesPSStruct = pDT_PS->m_nDB; // How big is our pixel shader struct (changes size due to uniforms, samplers, and other varying things)
1491
1492 // Write header for PS struct
1493 if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_PSH_HEADER, nBytesPSStruct))
1494 return false;
1495
1496 // Write PS struct
1497 if(!GFDWriteFilePPCData(fp, (nBytesPSStruct + 3) / 4, GFD_ELEMENT_SIZE_32, (u32 *) pDT_PS->m_pDB))
1498 return false;
1499
1500 // Add pad block
1501 if(alignMode)
1502 {
1503 // Calc padding size for shader align
1504 u32 padSize = ((u32)(ftell(fp) + 2 * GFD_BLOCK_HEADER_SIZE +(GX2_SHADER_ALIGNMENT-1)) & ~(GX2_SHADER_ALIGNMENT-1)) - (u32)ftell(fp) - 2 * GFD_BLOCK_HEADER_SIZE;
1505 while(padSize < 0)
1506 padSize += GX2_SHADER_ALIGNMENT;
1507
1508 if(!GFDWriteFilePadBlock(fp, padSize))
1509 return false;
1510 }
1511
1512 // Write Header for program block
1513 if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_PSH_PROGRAM, nBytesProg))
1514 return false;
1515
1516 // Write program data block
1517 if(!GFDWriteFileGPUData(fp, (nBytesProg + 3) / 4, GFD_ELEMENT_SIZE_32, swapMode, (u32 *) pDataProg))
1518 return false;
1519
1520 GFDDestroyDataTable(pDT_PS);
1521
1522 // Restore
1523 pPS->shaderPtr = pDataProg;
1524
1525 return true;
1526 }
1527
GFDWriteFileGeometryShaderBlock(FILE * fp,GFDEndianSwapMode swapMode,GFDAlignMode alignMode,GX2GeometryShader * pGS)1528 bool GFDWriteFileGeometryShaderBlock(FILE *fp, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, GX2GeometryShader *pGS)
1529 {
1530 if(pGS == NULL)
1531 return false;
1532
1533 // Get info about the actual shader program
1534 u32 nBytesProg = pGS->shaderSize;
1535 void* pDataProg = pGS->shaderPtr; // Current location, we will relocate it
1536
1537 u32 nBytesCopyProg = pGS->copyShaderSize;
1538 void* pDataCopyProg = pGS->copyShaderPtr; // Current location, we will relocate it
1539
1540 // Set NULL
1541 pGS->shaderPtr = NULL;
1542 pGS->copyShaderPtr = NULL;
1543
1544 // Convert structure into a flat, relocatable format
1545 GFDDataTable *pDT_GS = GFDCreateBlockRelocateHeaderGSH(pGS);
1546
1547 if(NULL == pDT_GS)
1548 return false;
1549
1550 int nBytesGSStruct = pDT_GS->m_nDB; // How big is our shader struct (changes size due to uniforms, samplers, and other varying things)
1551
1552 // Write header for GS struct
1553 if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_GSH_HEADER, nBytesGSStruct))
1554 return false;
1555
1556 // Write GS struct
1557 if(!GFDWriteFilePPCData(fp, (nBytesGSStruct + 3) / 4, GFD_ELEMENT_SIZE_32, (u32 *) pDT_GS->m_pDB))
1558 return false;
1559
1560 // Add pad block
1561 if(alignMode)
1562 {
1563 // Calc padding size for shader align
1564 u32 padSize = ((u32)(ftell(fp) + 2 * GFD_BLOCK_HEADER_SIZE +(GX2_SHADER_ALIGNMENT-1)) & ~(GX2_SHADER_ALIGNMENT-1)) - (u32)ftell(fp) - 2 * GFD_BLOCK_HEADER_SIZE;
1565 while(padSize < 0)
1566 padSize += GX2_SHADER_ALIGNMENT;
1567
1568 if(!GFDWriteFilePadBlock(fp, padSize))
1569 return false;
1570 }
1571
1572 // Write Header for program block
1573 if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_GSH_PROGRAM, nBytesProg))
1574 return false;
1575
1576 // Write program data block
1577 if(!GFDWriteFileGPUData(fp, (nBytesProg + 3) / 4, GFD_ELEMENT_SIZE_32, swapMode, (u32 *) pDataProg))
1578 return false;
1579
1580 // Add pad block
1581 if(alignMode)
1582 {
1583 // Calc padding size for shader align
1584 u32 padSize = ((u32)(ftell(fp) + 2 * GFD_BLOCK_HEADER_SIZE +(GX2_SHADER_ALIGNMENT-1)) & ~(GX2_SHADER_ALIGNMENT-1)) - (u32)ftell(fp) - 2 * GFD_BLOCK_HEADER_SIZE;
1585 while(padSize < 0)
1586 padSize += GX2_SHADER_ALIGNMENT;
1587
1588 if(!GFDWriteFilePadBlock(fp, padSize))
1589 return false;
1590 }
1591
1592 // Write Header for copy program block
1593 if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_GSH_COPY_PROGRAM, nBytesCopyProg))
1594 return false;
1595
1596 // Write copy program data block
1597 if(!GFDWriteFileGPUData(fp, (nBytesCopyProg + 3) / 4, GFD_ELEMENT_SIZE_32, swapMode, (u32 *) pDataCopyProg))
1598 return false;
1599
1600 GFDDestroyDataTable(pDT_GS);
1601
1602 // Restore
1603 pGS->shaderPtr = pDataProg;
1604 pGS->copyShaderPtr = pDataCopyProg;
1605
1606 return true;
1607 }
1608
GFDWriteFileComputeShaderBlock(FILE * fp,GFDEndianSwapMode swapMode,GFDAlignMode alignMode,GX2ComputeShader * pCS)1609 bool GFDWriteFileComputeShaderBlock(FILE *fp, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, GX2ComputeShader *pCS)
1610 {
1611 if(pCS == NULL)
1612 return false;
1613
1614 // Get info about the actual shader program
1615 u32 nBytesProg = pCS->shaderSize;
1616 void* pDataProg = pCS->shaderPtr; // Current location, we will relocate it
1617
1618 // Set NULL
1619 pCS->shaderPtr = NULL;
1620
1621 // Now convert structure into a flat, relocatable format
1622 GFDDataTable *pDT_CS = GFDCreateBlockRelocateHeaderCSH(pCS);
1623
1624 if(NULL == pDT_CS)
1625 return false;
1626
1627 int nBytesCSStruct = pDT_CS->m_nDB; // How big is our compute struct (changes size due to uniforms, samplers, and other varying things)
1628
1629 // Write header for CS struct
1630 if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_CSH_HEADER, nBytesCSStruct))
1631 return false;
1632
1633 // Write CS struct
1634 if(!GFDWriteFilePPCData(fp, (nBytesCSStruct + 3) / 4, GFD_ELEMENT_SIZE_32, (u32 *) pDT_CS->m_pDB))
1635 return false;
1636
1637 // Add pad block
1638 if(alignMode)
1639 {
1640 // Calc padding size for shader align
1641 u32 padSize = ((u32)(ftell(fp) + 2 * GFD_BLOCK_HEADER_SIZE +(GX2_SHADER_ALIGNMENT-1)) & ~(GX2_SHADER_ALIGNMENT-1)) - (u32)ftell(fp) - 2 * GFD_BLOCK_HEADER_SIZE;
1642 while(padSize < 0)
1643 padSize += GX2_SHADER_ALIGNMENT;
1644
1645 if(!GFDWriteFilePadBlock(fp, padSize))
1646 return false;
1647 }
1648
1649 // Write out Header for program block
1650 if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_CSH_PROGRAM, nBytesProg))
1651 return false;
1652
1653 // Write program data block
1654 if(!GFDWriteFileGPUData(fp, (nBytesProg + 3) / 4, GFD_ELEMENT_SIZE_32, swapMode, (u32 *) pDataProg))
1655 return false;
1656
1657 GFDDestroyDataTable(pDT_CS);
1658
1659 // Restore
1660 pCS->shaderPtr = pDataProg;
1661
1662 return true;
1663 }
1664
1665
GFDWriteFileShader(char * pFilename,GFDGPUVersion gpuVer,GFDEndianSwapMode swapMode,GFDAlignMode alignMode,u32 numShader,const GFDShaders * pShadersOrig)1666 bool GFD_API GFDWriteFileShader(char* pFilename, GFDGPUVersion gpuVer, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, u32 numShader, const GFDShaders *pShadersOrig)
1667 {
1668 GFDShaders2 shaders = {0};
1669
1670 if (!pShadersOrig)
1671 {
1672 return false;
1673 }
1674
1675 shaders.abiVersion = GFD_DLL_ABI_VERSION;
1676 shaders.pVertexShader = pShadersOrig->pVertexShader;
1677 shaders.pGeometryShader = pShadersOrig->pGeometryShader;
1678 shaders.pPixelShader = pShadersOrig->pPixelShader;
1679
1680 return GFDWriteFileShader2(pFilename, gpuVer, swapMode, alignMode, numShader, &shaders);
1681 }
1682
GFDWriteFileShader2(char * pFilename,GFDGPUVersion gpuVer,GFDEndianSwapMode swapMode,GFDAlignMode alignMode,u32 numShader,const GFDShaders2 * pShaders)1683 bool GFD_API GFDWriteFileShader2(char* pFilename, GFDGPUVersion gpuVer, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, u32 numShader, const GFDShaders2 *pShaders)
1684 {
1685 FILE *fpout = NULL;
1686 u32 count = 0;
1687
1688 if (!pShaders)
1689 {
1690 return false;
1691 }
1692
1693 if ( GFD_DLL_ABI_TYPE(pShaders->abiVersion) != GFD_DLL_ABI_TYPE(GFD_DLL_ABI_VERSION) ||
1694 GFD_DLL_ABI_VERSION_NUM(pShaders->abiVersion) > 0)
1695 {
1696 return false;
1697 }
1698
1699 if (!pFilename)
1700 {
1701 pFilename = GSH_DEFAULT_FILENAME;
1702 }
1703
1704 // Open file
1705 if(GFDOpenFile(&fpout, pFilename, "wb") != 0)
1706 {
1707 printf("Error: Can't open %s\n", pFilename);
1708 return false;
1709 }
1710
1711 // Check gpu version
1712 switch(gpuVer) {
1713 case GFD_GPU_VERSION_0:
1714 break;
1715 case GFD_GPU_VERSION_1:
1716 break;
1717 case GFD_GPU_VERSION_GPU7:
1718 break;
1719 default:
1720 printf("Warning: Unsupported GPU %d, using default\n", gpuVer);
1721 gpuVer = GFD_GPU_VERSION_GPU7;
1722 break;
1723 }
1724
1725 // Write the file header
1726 if(!GFDWriteFileHeader(fpout, gpuVer, alignMode))
1727 {
1728 printf("Error: Can't write file header\n");
1729 GFDCloseFile(fpout);
1730 return false;
1731 }
1732
1733 // Writes multiple shader blocks
1734 for (count = 0; count < numShader; count++)
1735 {
1736 if(NULL != &pShaders[count])
1737 {
1738 if(NULL != pShaders[count].pVertexShader)
1739 {
1740 if(!GFDWriteFileVertexShaderBlock(fpout, swapMode, alignMode, pShaders[count].pVertexShader))
1741 {
1742 printf("Error: Can't write vsh block.\n");
1743 GFDCloseFile(fpout);
1744 return false;
1745 }
1746 }
1747
1748 if(NULL != pShaders[count].pPixelShader)
1749 {
1750 if(!GFDWriteFilePixelShaderBlock(fpout, swapMode, alignMode, pShaders[count].pPixelShader))
1751 {
1752 printf("Error: Can't write psh block.\n");
1753 GFDCloseFile(fpout);
1754 return false;
1755 }
1756 }
1757
1758 if(NULL != pShaders[count].pGeometryShader)
1759 {
1760 if(!GFDWriteFileGeometryShaderBlock(fpout, swapMode, alignMode, pShaders[count].pGeometryShader))
1761 {
1762 printf("Error: Can't write psh block.\n");
1763 GFDCloseFile(fpout);
1764 return false;
1765 }
1766 }
1767
1768 if(NULL != pShaders[count].pComputeShader)
1769 {
1770 if(!GFDWriteFileComputeShaderBlock(fpout, swapMode, alignMode, pShaders[count].pComputeShader))
1771 {
1772 printf("Error: Can't write csh block.\n");
1773 GFDCloseFile(fpout);
1774 return false;
1775 }
1776 }
1777 }
1778 }
1779
1780 // Write an 'End' block to the file
1781 if(!GFDWriteFileBlockHeader(fpout, GFD_BLOCK_TYPE_END, 0))
1782 {
1783 printf("Error: Can't write end block header\n");
1784 GFDCloseFile(fpout);
1785 return false;
1786 }
1787
1788 GFDCloseFile(fpout);
1789 return true;
1790 }
1791
GFDAppendWriteFileShader(char * pFilename,GFDGPUVersion gpuVer,GFDEndianSwapMode swapMode,GFDAlignMode alignMode,u32 numShader,const GFDShaders * pShadersOrig)1792 bool GFD_API GFDAppendWriteFileShader(char* pFilename, GFDGPUVersion gpuVer, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, u32 numShader, const GFDShaders *pShadersOrig)
1793 {
1794 GFDShaders2 shaders = {0};
1795
1796 if (!pShadersOrig)
1797 {
1798 return false;
1799 }
1800
1801 // Convert to GFDShaders2 structure
1802 shaders.abiVersion = GFD_DLL_ABI_VERSION;
1803 shaders.pVertexShader = pShadersOrig->pVertexShader;
1804 shaders.pGeometryShader = pShadersOrig->pGeometryShader;
1805 shaders.pPixelShader = pShadersOrig->pPixelShader;
1806
1807 // Call the new version with the updated
1808 return GFDAppendWriteFileShader2(pFilename, gpuVer, swapMode, alignMode, numShader, &shaders);
1809 }
1810
GFDAppendWriteFileShader2(char * pFilename,GFDGPUVersion gpuVer,GFDEndianSwapMode swapMode,GFDAlignMode alignMode,u32 numShader,const GFDShaders2 * pShaders)1811 bool GFD_API GFDAppendWriteFileShader2(char* pFilename, GFDGPUVersion gpuVer, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, u32 numShader, const GFDShaders2 *pShaders)
1812 {
1813 FILE *fpout = NULL;
1814 u32 count = 0;
1815 GFDHeader fileHeader;
1816
1817 if (!pShaders)
1818 {
1819 return false;
1820 }
1821
1822 if ( GFD_DLL_ABI_TYPE(pShaders->abiVersion) != GFD_DLL_ABI_TYPE(GFD_DLL_ABI_VERSION) ||
1823 GFD_DLL_ABI_VERSION_NUM(pShaders->abiVersion) > 0)
1824 {
1825 return false;
1826 }
1827
1828 // open file
1829 if(GFDOpenFile(&fpout, pFilename, "rb+") != 0)
1830 {
1831 printf("Error: Can't open %s\n", pFilename);
1832 return false;
1833 }
1834
1835 // Read File Header
1836 if(!GFDReadFilePPCData(&fileHeader, (GFD_HEADER_SIZE + 3) / 4, GFD_ELEMENT_SIZE_32, fpout))
1837 {
1838 GFDCloseFile(fpout);
1839 printf("Error: Can't read file header.\n");
1840 return false;
1841 }
1842
1843 // check gpu version
1844 if(fileHeader.gpuVersion != gpuVer)
1845 {
1846 GFDCloseFile(fpout);
1847 printf("Error: GPU version is different.\n");
1848 return false;
1849 }
1850
1851 // check header version
1852 if(!GFDCheckHeaderMagicVersions(&fileHeader))
1853 {
1854 GFDCloseFile(fpout);
1855 printf("Error: Format version is different.\n");
1856 return false;
1857 }
1858
1859 // seeks to beginning of 'End' block
1860 fseek(fpout, -(s32)GFD_BLOCK_HEADER_SIZE, SEEK_END);
1861
1862 // append writes multiple shader blocks
1863 for (count = 0; count < numShader; count++)
1864 {
1865 if(NULL != &pShaders[count])
1866 {
1867 if(NULL != pShaders[count].pVertexShader)
1868 {
1869 if(!GFDWriteFileVertexShaderBlock(fpout, swapMode, alignMode, pShaders[count].pVertexShader))
1870 {
1871 printf("Error: Can't write vsh block.\n");
1872 GFDCloseFile(fpout);
1873 return false;
1874 }
1875 }
1876
1877 if(NULL != pShaders[count].pPixelShader)
1878 {
1879 if(!GFDWriteFilePixelShaderBlock(fpout, swapMode, alignMode, pShaders[count].pPixelShader))
1880 {
1881 printf("Error: Can't write psh block.\n");
1882 GFDCloseFile(fpout);
1883 return false;
1884 }
1885 }
1886
1887 if(NULL != pShaders[count].pGeometryShader)
1888 {
1889 if(!GFDWriteFileGeometryShaderBlock(fpout, swapMode, alignMode, pShaders[count].pGeometryShader))
1890 {
1891 printf("Error: Can't write psh block.\n");
1892 GFDCloseFile(fpout);
1893 return false;
1894 }
1895 }
1896
1897 if(NULL != pShaders[count].pComputeShader)
1898 {
1899 if(!GFDWriteFileComputeShaderBlock(fpout, swapMode, alignMode, pShaders[count].pComputeShader))
1900 {
1901 printf("Error: Can't write csh block.\n");
1902 GFDCloseFile(fpout);
1903 return false;
1904 }
1905 }
1906 }
1907 }
1908
1909 // Write an 'End' block to the file
1910 if(!GFDWriteFileBlockHeader(fpout, GFD_BLOCK_TYPE_END, 0))
1911 {
1912 printf("Error: Can't write end block header\n");
1913 GFDCloseFile(fpout);
1914 return false;
1915 }
1916
1917 GFDCloseFile(fpout);
1918 return true;
1919 }
1920
1921 // Write As Code-----
1922
_GFDWriteGX2AttributesAsCode(FILE * fp,const char * sName,u32 nAttribs,GX2AttribVar * pAV)1923 void _GFDWriteGX2AttributesAsCode(FILE *fp, const char* sName, u32 nAttribs, GX2AttribVar *pAV)
1924 {
1925 if (nAttribs == 0) {
1926 return;
1927 } else {
1928 fprintf(fp, "GX2AttribVar %s_attrib_vars[] = {\n", sName);
1929
1930 for(u32 i = 0; i < nAttribs; i++)
1931 {
1932 GX2AttribVar *pAVI = &(pAV[i]);
1933 fprintf(fp," {\"%s\", %s, %u, %u}%c\n", pAVI->name,
1934 varTypeName[pAVI->type], pAVI->arrayCount, pAVI->location,
1935 (i==nAttribs-1)?' ':',');
1936 }
1937 fprintf(fp,"};\n");
1938 }
1939 fprintf(fp,"\n");
1940 }
1941
_GFDWriteGX2SamplersAsCode(FILE * fp,const char * sName,u32 nSamplers,GX2SamplerVar * pSV)1942 void _GFDWriteGX2SamplersAsCode(FILE *fp, const char* sName, u32 nSamplers, GX2SamplerVar *pSV)
1943 {
1944 if (nSamplers == 0) {
1945 return;
1946 } else {
1947 fprintf(fp, "GX2SamplerVar %s_sampler_vars[] = {\n", sName);
1948
1949 for(u32 i = 0; i < nSamplers; i++)
1950 {
1951 GX2SamplerVar *pSVI = &(pSV[i]);
1952 fprintf(fp," {\"%s\", %s, %u}%c\n", pSVI->name,
1953 samplerTypeName[pSVI->type], pSVI->location,
1954 (i==nSamplers-1)?' ':',');
1955 }
1956 fprintf(fp,"};\n");
1957 }
1958 fprintf(fp,"\n");
1959 }
1960
_GFDWriteGX2UniformsAsCode(FILE * fp,const char * sName,u32 nUniforms,GX2UniformVar * pUV,GX2UniformInitialValue * ivBase)1961 void _GFDWriteGX2UniformsAsCode(FILE *fp, const char* sName, u32 nUniforms, GX2UniformVar *pUV, GX2UniformInitialValue *ivBase)
1962 {
1963 if (nUniforms == 0) {
1964 return;
1965 } else {
1966 fprintf(fp, "GX2UniformVar %s_uniforms[] = {\n", sName);
1967
1968 for(u32 i = 0; i < nUniforms; i++)
1969 {
1970 GX2UniformVar *pUVI = &(pUV[i]);
1971 fprintf(fp," {\"%s\", %s, %u, %u, ", pUVI->name,
1972 varTypeName[pUVI->type], pUVI->arrayCount, pUVI->offset);
1973 if (pUVI->blockIndex == GX2_UNIFORM_BLOCK_INDEX_INVALID) {
1974 fprintf(fp,"\n GX2_UNIFORM_BLOCK_INDEX_INVALID, ");
1975 } else {
1976 fprintf(fp,"%u, ", pUVI->blockIndex);
1977 }
1978 fprintf(fp,"}%c\n", (i==nUniforms-1)?' ':',');
1979 }
1980 fprintf(fp,"};\n");
1981 }
1982 fprintf(fp,"\n");
1983 }
1984
_GFDGX2UniformBlockAsCode(FILE * fp,const char * sName,u32 nUniBlocks,GX2UniformBlock * pUB)1985 void _GFDGX2UniformBlockAsCode(FILE *fp, const char* sName, u32 nUniBlocks, GX2UniformBlock *pUB)
1986 {
1987 if (nUniBlocks == 0) {
1988 return;
1989 } else {
1990 fprintf(fp, "GX2UniformBlock %s_uniform_blocks[] = {\n", sName);
1991
1992 for(u32 i = 0; i < nUniBlocks; i++)
1993 {
1994 GX2UniformBlock *pUBI = &(pUB[i]);
1995 fprintf(fp," {\"%s\", %u, %u}%c\n", pUBI->name,
1996 pUBI->location, pUBI->size,
1997 (i==nUniBlocks-1)?' ':',');
1998 }
1999 fprintf(fp,"};\n");
2000 }
2001 fprintf(fp,"\n");
2002 }
2003
_GFDWriteWordsAsHex(FILE * fp,u32 * ptr,u32 byteLen)2004 void _GFDWriteWordsAsHex(FILE *fp, u32 *ptr, u32 byteLen)
2005 {
2006 assert(fp != NULL);
2007 assert((byteLen & 0x03) == 0);
2008
2009 fprintf(fp, " ");
2010 for(u32 j = 0; j < byteLen/4; j++)
2011 {
2012 fprintf(fp,"0x%08x%c", ptr[j], (j==byteLen/4-1)?' ':',');
2013 if((j & 3) == 3)
2014 {
2015 if((j & 0x3f) == 0x3)
2016 fprintf(fp, " // 0x%04x\n ", j-3);
2017 else
2018 fprintf(fp, "\n ");
2019 }
2020 }
2021 }
2022
_GFDWriteWordsAsCode(FILE * fp,const char * name,u32 * ptr,u32 byteLen,const char * attrib)2023 void _GFDWriteWordsAsCode(FILE *fp, const char *name, u32 *ptr, u32 byteLen, const char *attrib)
2024 {
2025 assert(fp != NULL);
2026 assert((byteLen & 0x03) == 0);
2027
2028 if (ptr == NULL || byteLen == 0) {
2029 return;
2030 }
2031
2032 fprintf(fp, "%s static const u32 %s[%u] =\n{\n", attrib, name, byteLen/4);
2033
2034 _GFDWriteWordsAsHex(fp, ptr, byteLen);
2035
2036 fprintf(fp, "\n};\n\n");
2037 }
2038
makeName(char * dst,char * src1,char * src2,u32 max)2039 static char *makeName(char *dst, char *src1, char *src2, u32 max)
2040 {
2041 dst[0]=0;
2042
2043 // Avoid warnings about strncat being unsafe
2044 // (because it might write size+1 characters)
2045
2046 // strncat(dst, src1, max-1);
2047 strncat_s(dst, max, src1, max-1);
2048 size_t len=strlen(dst);
2049 // strncat(dst+len, src2, max-len-1);
2050 strncat_s(dst+len, max-len, src2, max-len-1);
2051 return dst;
2052 }
2053
GFDWriteFileShaderAsCodeWithSource(char * pFilename,GFDEndianSwapMode swapMode,const GFDShaders2 * pShaders,const GFDShadersSrc * pShadersSrc)2054 GFD_DECLSPEC bool GFD_API GFDWriteFileShaderAsCodeWithSource(char* pFilename, GFDEndianSwapMode swapMode, const GFDShaders2 *pShaders, const GFDShadersSrc *pShadersSrc)
2055 {
2056 FILE *fpout = NULL;
2057
2058 // Get base name from filename (for structure names)
2059 char *slash1, *slash2, *dot;
2060 #define BASEMAX 256
2061 char basename[BASEMAX], tempname[BASEMAX];
2062
2063 slash1 = strrchr(pFilename, '/');
2064 slash2 = strrchr(pFilename, '\\');
2065 if (slash2 > slash1) slash1 = slash2;
2066 basename[0]=0;
2067 if (slash1) {
2068 // strncat(basename, slash1+1, BASEMAX-1);
2069 strncat_s(basename, BASEMAX, slash1+1, BASEMAX-1);
2070 } else {
2071 // strncat(basename, pFilename, BASEMAX-1);
2072 strncat_s(basename, BASEMAX, pFilename, BASEMAX-1);
2073 }
2074 dot = strrchr(basename, '.');
2075 if (dot) *dot=0;
2076
2077 if ( GFD_DLL_ABI_TYPE(pShaders->abiVersion) != GFD_DLL_ABI_TYPE(GFD_DLL_ABI_VERSION) ||
2078 GFD_DLL_ABI_VERSION_NUM(pShaders->abiVersion) > 0)
2079 {
2080 return false;
2081 }
2082
2083 // open file
2084 if(GFDOpenFile(&fpout, pFilename, "w") != 0)
2085 {
2086 printf("Error: Can't open %s\n", pFilename);
2087 return false;
2088 }
2089
2090 fprintf(fpout,"//--------------------------------------------------\n\n");
2091 fprintf(fpout,"// This file is automatically generated by gfd.\n\n");
2092 fprintf(fpout,"//--------------------------------------------------\n\n");
2093
2094 // For Vertex Shader
2095 if(NULL != pShaders->pVertexShader)
2096 {
2097 fprintf(fpout, "// ---------- %s Vertex Shader ----------\n\n", basename);
2098
2099 // First, write out initial values
2100 // (because uniforms refer to them)
2101
2102 _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_VS_initial_values", BASEMAX),
2103 (u32 *) pShaders->pVertexShader->initialValues,
2104 pShaders->pVertexShader->numInitialValues * sizeof(GX2UniformInitialValue), "");
2105
2106 // Swap endian for GPU7
2107 if(swapMode != GFD_ENDIAN_SWAP_MODE_8_IN_32)
2108 {
2109 int nElem = (pShaders->pVertexShader->shaderSize + 0x3) / 4;
2110 GFDEndianSwap8in32((u32*)pShaders->pVertexShader->shaderPtr, nElem);
2111 }
2112
2113 if ( pShadersSrc && pShadersSrc->pVertexShader )
2114 {
2115 char *next_token = NULL;
2116 char *p = strtok_s((char*)pShadersSrc->pVertexShader, "\r\n", &next_token);
2117
2118 fprintf(fpout, "// Source Vertex Shader\n");
2119 while ( p )
2120 {
2121 fprintf(fpout, "// %s\n", p);
2122 p = strtok_s(NULL, "\r\n", &next_token);
2123 }
2124 fprintf(fpout, "// End Source Vertex Shader\n");
2125 }
2126
2127 // Then, write out the shader pieces,
2128 // and finally, write the shader itself.
2129 _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_VS_shaderPtr", BASEMAX),
2130 (u32 *) pShaders->pVertexShader->shaderPtr,
2131 pShaders->pVertexShader->shaderSize, "ALIGNVAR(256)");
2132
2133 _GFDGX2UniformBlockAsCode(fpout, makeName(tempname, basename, "_VS", BASEMAX),
2134 pShaders->pVertexShader->numUniformBlocks,
2135 pShaders->pVertexShader->uniformBlocks);
2136
2137 _GFDWriteGX2UniformsAsCode(fpout, makeName(tempname, basename, "_VS", BASEMAX),
2138 pShaders->pVertexShader->numUniforms,
2139 pShaders->pVertexShader->uniformVars, pShaders->pVertexShader->initialValues);
2140
2141 _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_VS_loop_vars", BASEMAX),
2142 (u32 *) pShaders->pVertexShader->_loopVars,
2143 pShaders->pVertexShader->_numLoops * sizeof(GFDLoopVar), "");
2144
2145 _GFDWriteGX2SamplersAsCode(fpout, makeName(tempname, basename, "_VS", BASEMAX),
2146 pShaders->pVertexShader->numSamplers,
2147 pShaders->pVertexShader->samplerVars);
2148
2149 _GFDWriteGX2AttributesAsCode(fpout, makeName(tempname, basename, "_VS", BASEMAX),
2150 pShaders->pVertexShader->numAttribs,
2151 pShaders->pVertexShader->attribVars);
2152
2153 fprintf(fpout, "\n");
2154 fprintf(fpout, "\n");
2155 fprintf(fpout, "static GX2VertexShader %s_VS = {\n", basename);
2156 fprintf(fpout, " { // _regs\n");
2157 _GFDWriteWordsAsHex(fpout, (u32 *) &pShaders->pVertexShader->_regs, sizeof(GFDVertexShaderRegs));
2158 fprintf(fpout, "\n },\n");
2159
2160 fprintf(fpout, " %u,\n", pShaders->pVertexShader->shaderSize);
2161 fprintf(fpout, " (void *) %s_VS_shaderPtr,\n", basename);
2162
2163 fprintf(fpout, " %s,\n", shaderModeName[(u32)pShaders->pVertexShader->shaderMode]);
2164
2165 fprintf(fpout, " %u,\n", pShaders->pVertexShader->numUniformBlocks);
2166 if (pShaders->pVertexShader->numUniformBlocks) {
2167 fprintf(fpout, " %s_VS_uniform_blocks,\n", basename);
2168 } else {
2169 fprintf(fpout, " NULL,\n");
2170 }
2171
2172 fprintf(fpout, " %u,\n", pShaders->pVertexShader->numUniforms);
2173 if (pShaders->pVertexShader->numUniforms) {
2174 fprintf(fpout, " %s_VS_uniforms,\n", basename);
2175 } else {
2176 fprintf(fpout, " NULL,\n");
2177 }
2178
2179 fprintf(fpout, " %u,\n", pShaders->pVertexShader->numInitialValues);
2180 if (pShaders->pVertexShader->numInitialValues) {
2181 fprintf(fpout, " (GX2UniformInitialValue *) %s_VS_initial_values,\n", basename);
2182 } else {
2183 fprintf(fpout, " NULL,\n");
2184 }
2185
2186 fprintf(fpout, " %u,\n", pShaders->pVertexShader->_numLoops);
2187 if (pShaders->pVertexShader->_numLoops) {
2188 fprintf(fpout, " (GFDLoopVar *) %s_VS_loop_vars,\n", basename);
2189 } else {
2190 fprintf(fpout, " NULL,\n");
2191 }
2192
2193 fprintf(fpout, " %u,\n", pShaders->pVertexShader->numSamplers);
2194 if (pShaders->pVertexShader->numSamplers) {
2195 fprintf(fpout, " %s_VS_sampler_vars,\n", basename);
2196 } else {
2197 fprintf(fpout, " NULL,\n");
2198 }
2199
2200 fprintf(fpout, " %u,\n", pShaders->pVertexShader->numAttribs);
2201 if (pShaders->pVertexShader->numAttribs) {
2202 fprintf(fpout, " %s_VS_attrib_vars,\n", basename);
2203 } else {
2204 fprintf(fpout, " NULL,\n");
2205 }
2206
2207 fprintf(fpout, " %d,\n", (u32)pShaders->pVertexShader->ringItemsize);
2208
2209 fprintf(fpout, " (GX2Boolean)%d,\n", (u32)pShaders->pVertexShader->hasStreamOut);
2210
2211 fprintf(fpout, " { \n");
2212 _GFDWriteWordsAsHex(fpout, (u32 *) &pShaders->pVertexShader->streamOutVertexStride, sizeof(u32)*GX2_MAX_STREAMOUT_BUFFERS);
2213 fprintf(fpout, "\n },\n");
2214
2215 // end of structure
2216 fprintf(fpout, "};\n\n\n");
2217 }
2218
2219 // For Pixel Shader
2220 if(NULL != pShaders->pPixelShader )
2221 {
2222 fprintf(fpout, "// ---------- %s Pixel Shader ----------\n\n", basename);
2223
2224 // First, write out initial values
2225 // (because uniforms refer to them)
2226 _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_PS_initial_values", BASEMAX),
2227 (u32 *) pShaders->pPixelShader->initialValues,
2228 pShaders->pPixelShader->numInitialValues * sizeof(GX2UniformInitialValue), "");
2229
2230 // Swap endian for GPU7
2231 if(swapMode != GFD_ENDIAN_SWAP_MODE_8_IN_32)
2232 {
2233 int nElem = ( pShaders->pPixelShader->shaderSize + 0x3) / 4;
2234 GFDEndianSwap8in32((u32*) pShaders->pPixelShader->shaderPtr, nElem);
2235 }
2236
2237 if ( pShadersSrc && pShadersSrc->pPixelShader )
2238 {
2239 char *next_token = NULL;
2240 char *p = strtok_s((char*)pShadersSrc->pPixelShader, "\r\n", &next_token);
2241
2242 fprintf(fpout, "// Source Pixel Shader\n");
2243 while ( p )
2244 {
2245 fprintf(fpout, "// %s\n", p);
2246 p = strtok_s(NULL, "\r\n", &next_token);
2247 }
2248 fprintf(fpout, "// End Source Pixel Shader\n");
2249 }
2250
2251 // Then, write out the shader pieces,
2252 // and finally, write the shader itself.
2253 _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_PS_shaderPtr", BASEMAX),
2254 (u32 *) pShaders->pPixelShader->shaderPtr,
2255 pShaders->pPixelShader->shaderSize, "ALIGNVAR(256)");
2256
2257 _GFDGX2UniformBlockAsCode(fpout, makeName(tempname, basename, "_PS", BASEMAX),
2258 pShaders->pPixelShader->numUniformBlocks,
2259 pShaders->pPixelShader->uniformBlocks);
2260
2261 _GFDWriteGX2UniformsAsCode(fpout, makeName(tempname, basename, "_PS", BASEMAX),
2262 pShaders->pPixelShader->numUniforms,
2263 pShaders->pPixelShader->uniformVars, pShaders->pPixelShader->initialValues);
2264
2265 _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_PS_loop_vars", BASEMAX),
2266 (u32 *) pShaders->pPixelShader->_loopVars,
2267 pShaders->pPixelShader->_numLoops * sizeof(GFDLoopVar), "");
2268
2269 _GFDWriteGX2SamplersAsCode(fpout, makeName(tempname, basename, "_PS", BASEMAX),
2270 pShaders->pPixelShader->numSamplers,
2271 pShaders->pPixelShader->samplerVars);
2272
2273 fprintf(fpout, "\n");
2274 fprintf(fpout, "\n");
2275 fprintf(fpout, "static GX2PixelShader %s_PS = {\n", basename);
2276 fprintf(fpout, " { // _regs\n");
2277 _GFDWriteWordsAsHex(fpout, (u32 *) &pShaders->pPixelShader->_regs, sizeof(GFDPixelShaderRegs));
2278 fprintf(fpout, "\n },\n");
2279
2280 fprintf(fpout, " %u,\n", pShaders->pPixelShader->shaderSize);
2281 fprintf(fpout, " (void *) %s_PS_shaderPtr,\n", basename);
2282
2283 fprintf(fpout, " %s,\n", shaderModeName[(u32)pShaders->pPixelShader->shaderMode]);
2284
2285 fprintf(fpout, " %u,\n", pShaders->pPixelShader->numUniformBlocks);
2286 if (pShaders->pPixelShader->numUniformBlocks) {
2287 fprintf(fpout, " %s_PS_uniform_blocks,\n", basename);
2288 } else {
2289 fprintf(fpout, " NULL,\n");
2290 }
2291
2292 fprintf(fpout, " %u,\n", pShaders->pPixelShader->numUniforms);
2293 if (pShaders->pPixelShader->numUniforms) {
2294 fprintf(fpout, " %s_PS_uniforms,\n", basename);
2295 } else {
2296 fprintf(fpout, " NULL,\n");
2297 }
2298
2299 fprintf(fpout, " %u,\n", pShaders->pPixelShader->numInitialValues);
2300 if (pShaders->pPixelShader->numInitialValues) {
2301 fprintf(fpout, " (GX2UniformInitialValue *) %s_PS_initial_values,\n", basename);
2302 } else {
2303 fprintf(fpout, " NULL,\n");
2304 }
2305
2306 fprintf(fpout, " %u,\n", pShaders->pPixelShader->_numLoops);
2307 if (pShaders->pPixelShader->_numLoops) {
2308 fprintf(fpout, " (GFDLoopVar *) %s_PS_loop_vars,\n", basename);
2309 } else {
2310 fprintf(fpout, " NULL,\n");
2311 }
2312
2313 fprintf(fpout, " %u,\n", pShaders->pPixelShader->numSamplers);
2314 if (pShaders->pPixelShader->numSamplers) {
2315 fprintf(fpout, " %s_PS_sampler_vars,\n", basename);
2316 } else {
2317 fprintf(fpout, " NULL,\n");
2318 }
2319
2320 // end of structure
2321 fprintf(fpout, "};\n\n\n");
2322 }
2323
2324 // For Geometry Shader
2325 if(NULL != pShaders->pGeometryShader )
2326 {
2327 fprintf(fpout, "// ---------- %s Geometry Shader ----------\n\n", basename);
2328
2329 // First, write out initial values
2330 // (because uniforms refer to them)
2331 _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_GS_initial_values", BASEMAX),
2332 (u32 *) pShaders->pGeometryShader->initialValues,
2333 pShaders->pGeometryShader->numInitialValues * sizeof(GX2UniformInitialValue), "");
2334 // Swap endian for GPU7
2335 if(swapMode != GFD_ENDIAN_SWAP_MODE_8_IN_32)
2336 {
2337 int nElem = ( pShaders->pGeometryShader->shaderSize + 0x3) / 4;
2338 GFDEndianSwap8in32((u32*) pShaders->pGeometryShader->shaderPtr, nElem);
2339 }
2340
2341 if ( pShadersSrc && pShadersSrc->pGeometryShader )
2342 {
2343 char *next_token = NULL;
2344 char *p = strtok_s((char*)pShadersSrc->pGeometryShader, "\r\n", &next_token);
2345
2346 fprintf(fpout, "// Source Geometry Shader\n");
2347 while ( p )
2348 {
2349 fprintf(fpout, "// %s\n", p);
2350 p = strtok_s(NULL, "\r\n", &next_token);
2351 }
2352 fprintf(fpout, "// End Source Geometry Shader\n");
2353 }
2354
2355 // Then, write out the shader pieces,
2356 // and finally, write the shader itself.
2357 _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_GS_shaderPtr", BASEMAX),
2358 (u32 *) pShaders->pGeometryShader->shaderPtr,
2359 pShaders->pGeometryShader->shaderSize, "ALIGNVAR(256)");
2360
2361 // Swap endian for GPU7
2362 if(swapMode != GFD_ENDIAN_SWAP_MODE_8_IN_32)
2363 {
2364 int nElem = ( pShaders->pGeometryShader->copyShaderSize + 0x3) / 4;
2365 GFDEndianSwap8in32((u32*) pShaders->pGeometryShader->copyShaderPtr, nElem);
2366 }
2367
2368 _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_GS_copyShaderPtr", BASEMAX),
2369 (u32 *) pShaders->pGeometryShader->copyShaderPtr,
2370 pShaders->pGeometryShader->copyShaderSize, "ALIGNVAR(256)");
2371
2372 _GFDGX2UniformBlockAsCode(fpout, makeName(tempname, basename, "_GS", BASEMAX),
2373 pShaders->pGeometryShader->numUniformBlocks,
2374 pShaders->pGeometryShader->uniformBlocks);
2375
2376 _GFDWriteGX2UniformsAsCode(fpout, makeName(tempname, basename, "_GS", BASEMAX),
2377 pShaders->pGeometryShader->numUniforms,
2378 pShaders->pGeometryShader->uniformVars, pShaders->pGeometryShader->initialValues);
2379
2380 _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_GS_loop_vars", BASEMAX),
2381 (u32 *) pShaders->pGeometryShader->_loopVars,
2382 pShaders->pGeometryShader->_numLoops * sizeof(GFDLoopVar), "");
2383
2384 _GFDWriteGX2SamplersAsCode(fpout, makeName(tempname, basename, "_GS", BASEMAX),
2385 pShaders->pGeometryShader->numSamplers,
2386 pShaders->pGeometryShader->samplerVars);
2387
2388 fprintf(fpout, "\n");
2389 fprintf(fpout, "\n");
2390 fprintf(fpout, "static GX2GeometryShader %s_GS = {\n", basename);
2391 fprintf(fpout, " { // _regs\n");
2392 _GFDWriteWordsAsHex(fpout, (u32 *) &pShaders->pGeometryShader->_regs, sizeof(GFDGeometryShaderRegs));
2393 fprintf(fpout, "\n },\n");
2394
2395 fprintf(fpout, " %u,\n", pShaders->pGeometryShader->shaderSize);
2396 fprintf(fpout, " (void *) %s_GS_shaderPtr,\n", basename);
2397 fprintf(fpout, " %u,\n", pShaders->pGeometryShader->copyShaderSize);
2398 fprintf(fpout, " (void *) %s_GS_copyShaderPtr,\n", basename);
2399 fprintf(fpout, " %s,\n", shaderModeName[(u32)pShaders->pGeometryShader->shaderMode]);
2400
2401 fprintf(fpout, " %u,\n", pShaders->pGeometryShader->numUniformBlocks);
2402 if (pShaders->pGeometryShader->numUniformBlocks) {
2403 fprintf(fpout, " %s_GS_uniform_blocks,\n", basename);
2404 } else {
2405 fprintf(fpout, " NULL,\n");
2406 }
2407
2408 fprintf(fpout, " %u,\n", pShaders->pGeometryShader->numUniforms);
2409 if (pShaders->pGeometryShader->numUniforms) {
2410 fprintf(fpout, " %s_GS_uniforms,\n", basename);
2411 } else {
2412 fprintf(fpout, " NULL,\n");
2413 }
2414
2415 fprintf(fpout, " %u,\n", pShaders->pGeometryShader->numInitialValues);
2416 if (pShaders->pGeometryShader->numInitialValues) {
2417 fprintf(fpout, " (GX2UniformInitialValue *) %s_GS_initial_values,\n", basename);
2418 } else {
2419 fprintf(fpout, " NULL,\n");
2420 }
2421
2422 fprintf(fpout, " %u,\n", pShaders->pGeometryShader->_numLoops);
2423 if (pShaders->pGeometryShader->_numLoops) {
2424 fprintf(fpout, " (GFDLoopVar *) %s_GS_loop_vars,\n", basename);
2425 } else {
2426 fprintf(fpout, " NULL,\n");
2427 }
2428
2429 fprintf(fpout, " %u,\n", pShaders->pGeometryShader->numSamplers);
2430 if (pShaders->pGeometryShader->numSamplers) {
2431 fprintf(fpout, " %s_GS_sampler_vars,\n", basename);
2432 } else {
2433 fprintf(fpout, " NULL,\n");
2434 }
2435
2436 fprintf(fpout, " %d,\n", (u32)pShaders->pGeometryShader->ringItemsize);
2437
2438 fprintf(fpout, " (GX2Boolean)%d,\n", (u32)pShaders->pGeometryShader->hasStreamOut);
2439
2440 fprintf(fpout, " { \n");
2441 _GFDWriteWordsAsHex(fpout, (u32 *) &pShaders->pGeometryShader->streamOutVertexStride, sizeof(u32)*GX2_MAX_STREAMOUT_BUFFERS);
2442 fprintf(fpout, "\n },\n");
2443
2444 // end of structure
2445 fprintf(fpout, "};\n\n\n");
2446 }
2447
2448
2449 // For Compute Shader
2450 if(NULL != pShaders->pComputeShader)
2451 {
2452 fprintf(fpout, "// ---------- %s Compute Shader ----------\n\n", basename);
2453
2454 // Write this first since the uniforms refer to them.
2455 _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_CS_initial_values", BASEMAX),
2456 (u32 *) pShaders->pComputeShader->initialValues,
2457 pShaders->pComputeShader->numInitialValues * sizeof(GX2UniformInitialValue), "");
2458
2459 // Swap endian for GPU7
2460 if(swapMode != GFD_ENDIAN_SWAP_MODE_8_IN_32)
2461 {
2462 int nElem = (pShaders->pComputeShader->shaderSize + 0x3) / 4;
2463 GFDEndianSwap8in32((u32*)pShaders->pComputeShader->shaderPtr, nElem);
2464 }
2465
2466 if ( pShadersSrc && pShadersSrc->pComputeShader )
2467 {
2468 char *next_token = NULL;
2469 char *p = strtok_s((char*)pShadersSrc->pComputeShader, "\r\n", &next_token);
2470
2471 fprintf(fpout, "// Source Compute Shader\n");
2472 while ( p )
2473 {
2474 fprintf(fpout, "// %s\n", p);
2475 p = strtok_s(NULL, "\r\n", &next_token);
2476 }
2477 fprintf(fpout, "// End Source Compute Shader\n");
2478 }
2479
2480 // Then, write out the shader pieces,
2481 // and finally, write the shader itself.
2482 _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_CS_shaderPtr", BASEMAX),
2483 (u32 *) pShaders->pComputeShader->shaderPtr,
2484 pShaders->pComputeShader->shaderSize, "ALIGNVAR(256)");
2485
2486 _GFDGX2UniformBlockAsCode(fpout, makeName(tempname, basename, "_CS", BASEMAX),
2487 pShaders->pComputeShader->numUniformBlocks,
2488 pShaders->pComputeShader->uniformBlocks);
2489
2490 _GFDWriteGX2SamplersAsCode(fpout, makeName(tempname, basename, "_CS", BASEMAX),
2491 pShaders->pComputeShader->numSamplers,
2492 pShaders->pComputeShader->samplerVars);
2493
2494 _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_CS_loop_vars", BASEMAX),
2495 (u32 *) pShaders->pComputeShader->_loopVars,
2496 pShaders->pComputeShader->_numLoops * sizeof(GFDLoopVar), "");
2497
2498 _GFDWriteGX2UniformsAsCode(fpout, makeName(tempname, basename, "_CS", BASEMAX),
2499 pShaders->pComputeShader->numUniforms,
2500 pShaders->pComputeShader->uniformVars, pShaders->pComputeShader->initialValues);
2501
2502 fprintf(fpout, "\n");
2503 fprintf(fpout, "\n");
2504 fprintf(fpout, "static GX2ComputeShader %s_CS = {\n", basename);
2505 fprintf(fpout, " { // _regs\n");
2506 _GFDWriteWordsAsHex(fpout, (u32 *) &pShaders->pComputeShader->_regs, sizeof(GFDComputeShaderRegs));
2507 fprintf(fpout, "\n },\n");
2508
2509 fprintf(fpout, " %u,\n", pShaders->pComputeShader->shaderSize);
2510 fprintf(fpout, " (void *) %s_CS_shaderPtr,\n", basename);
2511
2512
2513 fprintf(fpout, " %u,\n", pShaders->pComputeShader->numUniformBlocks);
2514 if (pShaders->pComputeShader->numUniformBlocks) {
2515 fprintf(fpout, " %s_CS_uniform_blocks,\n", basename);
2516 } else {
2517 fprintf(fpout, " NULL,\n");
2518 }
2519
2520 fprintf(fpout, " %u,\n", pShaders->pComputeShader->numUniforms);
2521 if (pShaders->pComputeShader->numUniforms) {
2522 fprintf(fpout, " %s_CS_uniforms,\n", basename);
2523 } else {
2524 fprintf(fpout, " NULL,\n");
2525 }
2526
2527 fprintf(fpout, " %u,\n", pShaders->pComputeShader->numInitialValues);
2528 if (pShaders->pComputeShader->numInitialValues) {
2529 fprintf(fpout, " (GX2UniformInitialValue *) %s_CS_initial_values,\n", basename);
2530 } else {
2531 fprintf(fpout, " NULL,\n");
2532 }
2533
2534 fprintf(fpout, " %u,\n", pShaders->pComputeShader->_numLoops);
2535 if (pShaders->pComputeShader->_numLoops) {
2536 fprintf(fpout, " (GFDLoopVar *) %s_CS_loop_vars,\n", basename);
2537 } else {
2538 fprintf(fpout, " NULL,\n");
2539 }
2540
2541 fprintf(fpout, " %u,\n", pShaders->pComputeShader->numSamplers);
2542 if (pShaders->pComputeShader->numSamplers) {
2543 fprintf(fpout, " %s_CS_sampler_vars,\n", basename);
2544 } else {
2545 fprintf(fpout, " NULL,\n");
2546 }
2547
2548 fprintf(fpout, " %u,\n", pShaders->pComputeShader->layout_size_x);
2549 fprintf(fpout, " %u,\n", pShaders->pComputeShader->layout_size_y);
2550 fprintf(fpout, " %u,\n", pShaders->pComputeShader->layout_size_z);
2551 fprintf(fpout, " (GX2Boolean)%d,\n", (u32)pShaders->pComputeShader->Over64Mode);
2552 fprintf(fpout, " %d,\n", (u32)pShaders->pComputeShader->numWavesPerSIMD);
2553
2554 // end of structure
2555 fprintf(fpout, "};\n\n\n");
2556 }
2557
2558 GFDCloseFile(fpout);
2559
2560 return true;
2561 }
2562
GFDWriteFileShaderAsCode(char * pFilename,GFDEndianSwapMode swapMode,const GFDShaders * pShadersOrig)2563 GFD_DECLSPEC bool GFD_API GFDWriteFileShaderAsCode(char* pFilename, GFDEndianSwapMode swapMode, const GFDShaders *pShadersOrig)
2564 {
2565 GFDShaders2 shaders = {0};
2566
2567 if (!pShadersOrig)
2568 {
2569 return false;
2570 }
2571
2572 // Convert to new version of the structure
2573 shaders.abiVersion = GFD_DLL_ABI_VERSION;
2574 shaders.pVertexShader = pShadersOrig->pVertexShader;
2575 shaders.pGeometryShader = pShadersOrig->pGeometryShader;
2576 shaders.pPixelShader = pShadersOrig->pPixelShader;
2577
2578 return GFDWriteFileShaderAsCodeWithSource(pFilename, swapMode, &shaders, NULL);
2579 }
2580
GFDWriteFileShaderAsCode2(char * pFilename,GFDEndianSwapMode swapMode,const GFDShaders2 * pShaders)2581 GFD_DECLSPEC bool GFD_API GFDWriteFileShaderAsCode2(char* pFilename, GFDEndianSwapMode swapMode, const GFDShaders2 *pShaders)
2582 {
2583 if (!pShaders)
2584 {
2585 return false;
2586 }
2587
2588 if ( GFD_DLL_ABI_TYPE(pShaders->abiVersion) != GFD_DLL_ABI_TYPE(GFD_DLL_ABI_VERSION) ||
2589 GFD_DLL_ABI_VERSION_NUM(pShaders->abiVersion) > 0)
2590 {
2591 return false;
2592 }
2593
2594 return GFDWriteFileShaderAsCodeWithSource(pFilename, swapMode, pShaders, NULL);
2595 }
2596
2597