1 /*---------------------------------------------------------------------------*
2 
3   Copyright (C) Nintendo.  All rights reserved.
4 
5   These coded instructions, statements, and computer programs contain
6   proprietary information of Nintendo of America Inc. and/or Nintendo
7   Company Ltd., and are protected by Federal copyright law.  They may
8   not be disclosed to third parties or copied or duplicated in any form,
9   in whole or in part, without the prior written consent of Nintendo.
10 
11  *---------------------------------------------------------------------------*/
12 
13 #include "types.h"
14 #include <assert.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 
18 #include "windows/gx2.h"
19 
20 #include "cafe/gfd.h"
21 #include "gfdFile.h"
22 
23 #define GSH_DEFAULT_FILENAME "out.gsh"
24 
25 // definitions for private structures
26 
27 // GFD specific Shader structures to repack structure between 32 bit and 64 bit
28 
29 typedef struct _GFDVertexShaderRegs
30 {
31     u32 reg[GX2_NUM_VERTEX_SHADER_REGISTERS];
32 } GFDVertexShaderRegs;
33 
34 typedef struct _GFDGeometryShaderRegs
35 {
36     u32 reg[GX2_NUM_GEOMETRY_SHADER_REGISTERS];
37 } GFDGeometryShaderRegs;
38 
39 typedef struct _GFDPixelShaderRegs
40 {
41     u32 reg[GX2_NUM_PIXEL_SHADER_REGISTERS];
42 } GFDPixelShaderRegs;
43 
44 typedef struct _GFDComputeShaderRegs
45 {
46     u32 reg[GX2_NUM_COMPUTE_SHADER_REGISTERS];
47 } GFDComputeShaderRegs;
48 
49 typedef struct _GFDVertexShader
50 {
51     GFDVertexShaderRegs _regs;
52     u32                 shaderSize;
53     u32                 shaderPtr;
54     GX2ShaderMode       shaderMode;
55     u32                 numUniformBlocks;
56     u32                 uniformBlocks;
57     u32                 numUniforms;
58     u32                 uniformVars;
59     u32                 numInitialValues;
60     u32                 initialValues;
61     u32                 _numLoops;
62     u32                 _loopVars;
63     u32                 numSamplers;
64     u32                 samplerVars;
65     u32                 numAttribs;
66     u32                 attribVars;
67     u32                 ringItemsize;
68     u32                 hasStreamOut;
69     u32                 streamOutVertexStride[GX2_MAX_STREAMOUT_BUFFERS];
70     GX2RBuffer          shaderProgram;
71 } GFDVertexShader;
72 
73 // GFD specific Shader structures to repack structure between 32 bit and 64 bit
74 
75 typedef struct _GFDGeometryShader
76 {
77     GFDGeometryShaderRegs _regs;
78     u32                 shaderSize;
79     u32                 shaderPtr;
80     u32                 copyShaderSize;
81     u32                 copyShaderPtr;
82     GX2ShaderMode       shaderMode;
83     u32                 numUniformBlocks;
84     u32                 uniformBlocks;
85     u32                 numUniforms;
86     u32                 uniformVars;
87     u32                 numInitialValues;
88     u32                 initialValues;
89     u32                 _numLoops;
90     u32                 _loopVars;
91     u32                 numSamplers;
92     u32                 samplerVars;
93     u32                 ringItemsize;
94     u32                 hasStreamOut;
95     u32                 streamOutVertexStride[GX2_MAX_STREAMOUT_BUFFERS];
96     GX2RBuffer          shaderProgram;
97     GX2RBuffer          copyShaderProgram;
98 } GFDGeometryShader;
99 
100 // GFD specific Shader structures to repack structure between 32 bit and 64 bit
101 
102 typedef struct _GFDPixelShader
103 {
104     GFDPixelShaderRegs  _regs;
105     u32                 shaderSize;
106     u32                 shaderPtr;
107     GX2ShaderMode       shaderMode;
108     u32                 numUniformBlocks;
109     u32                 uniformBlocks;
110     u32                 numUniforms;
111     u32                 uniformVars;
112     u32                 numInitialValues;
113     u32                 initialValues;
114     u32                 _numLoops;
115     u32                 _loopVars;
116     u32                 numSamplers;
117     u32                 samplerVars;
118     GX2RBuffer          shaderProgram;
119 } GFDPixelShader;
120 
121 typedef struct _GFDComputeShader
122 {
123     GFDComputeShaderRegs  _regs;
124     u32                 shaderSize;
125     u32                 shaderPtr;
126     u32                 numUniformBlocks;
127     u32                 uniformBlocks;
128     u32                 numUniforms;
129     u32                 uniformVars;
130     u32                 numInitialValues;
131     u32                 initialValues;
132     u32                 _numLoops;
133     u32                 _loopVars;
134     u32                 numSamplers;
135     u32                 samplerVars;
136     u32                 layout_size_x;
137     u32                 layout_size_y;
138     u32                 layout_size_z;
139     u32                 Over64Mode;
140     u32                 numWavesPerSIMD;
141     GX2RBuffer          shaderProgram;
142 } GFDComputeShader;
143 
144 // GFD specific Uniform Block structures to repack structure between 32 bit and 64 bit
145 
146 typedef struct _GFDUniformBlock
147 {
148     u32          name;
149     u32          location;
150     u32          size;
151 } GFDUniformBlock;
152 
153 // GFD specific Uniform Var structures to repack structure between 32 bit and 64 bit
154 
155 typedef struct _GFDUniformVar
156 {
157     u32          name;
158     GX2VarType   type;
159     u32          arrayCount;
160     u32          offset;
161     u32          blockIndex;
162 } GFDUniformVar;
163 
164 // GFD specific Attrib Var structures to repack structure between 32 bit and 64 bit
165 
166 typedef struct _GFDAttribVar
167 {
168     u32          name;
169     GX2VarType   type;
170     u32          arrayCount;
171     u32          location;
172 } GFDAttribVar;
173 
174 // GFD specific Sampler Var structures to repack structure between 32 bit and 64 bit
175 
176 typedef struct _GFDSamplerVar
177 {
178     u32            name;
179     GX2SamplerType type;
180     u32            location;
181 } GFDSamplerVar;
182 
183 typedef struct _GFDLoopVar
184 {
185     u32 reg[GX2_NUM_LOOP_VAR_U32_WORDS];
186 } GFDLoopVar;
187 
188 // name conversions
189 
190 // Note: These arrays must be kept in sync with the enum lists in gx2Enum.h
191 // Those lists must be kept in sync with the compiler output.
192 // The latter check is done elsewhere.  The former is below.
193 static const char *varTypeName[] = {
194     "GX2_VAR_TYPE_VOID",    // 0
195     "GX2_VAR_TYPE_BOOL",    // 1
196     "GX2_VAR_TYPE_INT",     // 2
197     "GX2_VAR_TYPE_UINT",    // 3
198     "GX2_VAR_TYPE_FLOAT",   // 4
199     "GX2_VAR_TYPE_DOUBLE",  // 5
200     "GX2_VAR_TYPE_DVEC2",   // 6
201     "GX2_VAR_TYPE_DVEC3",   // 7
202     "GX2_VAR_TYPE_DVEC4",   // 8
203     "GX2_VAR_TYPE_VEC2",    // 9
204     "GX2_VAR_TYPE_VEC3",    // 10
205     "GX2_VAR_TYPE_VEC4",    // 11
206     "GX2_VAR_TYPE_BVEC2",   // 12
207     "GX2_VAR_TYPE_BVEC3",   // 13
208     "GX2_VAR_TYPE_BVEC4",   // 14
209     "GX2_VAR_TYPE_IVEC2",   // 15
210     "GX2_VAR_TYPE_IVEC3",   // 16
211     "GX2_VAR_TYPE_IVEC4",   // 17
212     "GX2_VAR_TYPE_UVEC2",   // 18
213     "GX2_VAR_TYPE_UVEC3",   // 19
214     "GX2_VAR_TYPE_UVEC4",   // 20
215     "GX2_VAR_TYPE_MAT2",    // 21
216     "GX2_VAR_TYPE_MAT2X3",  // 22
217     "GX2_VAR_TYPE_MAT2X4",  // 23
218     "GX2_VAR_TYPE_MAT3X2",  // 24
219     "GX2_VAR_TYPE_MAT3",    // 25
220     "GX2_VAR_TYPE_MAT3X4",  // 26
221     "GX2_VAR_TYPE_MAT4X2",  // 27
222     "GX2_VAR_TYPE_MAT4X3",  // 28
223     "GX2_VAR_TYPE_MAT4",    // 29
224     "GX2_VAR_TYPE_DMAT2",   // 30
225     "GX2_VAR_TYPE_DMAT2X3", // 31
226     "GX2_VAR_TYPE_DMAT2X4", // 32
227     "GX2_VAR_TYPE_DMAT3X2", // 33
228     "GX2_VAR_TYPE_DMAT3",   // 34
229     "GX2_VAR_TYPE_DMAT3X4", // 35
230     "GX2_VAR_TYPE_DMAT4X2", // 36
231     "GX2_VAR_TYPE_DMAT4X3", // 37
232     "GX2_VAR_TYPE_DMAT4"    // 38
233 };
234 
235 static const char *samplerTypeName[] = {
236     "GX2_SAMPLER_TYPE_1D",                        // 0
237     "GX2_SAMPLER_TYPE_2D",                        // 1
238     "GX2_SAMPLER_TYPE_2D_RECT",                   // 2
239     "GX2_SAMPLER_TYPE_3D",                        // 3
240     "GX2_SAMPLER_TYPE_CUBE",                      // 4
241     "GX2_SAMPLER_TYPE_1D_SHADOW",                 // 5
242     "GX2_SAMPLER_TYPE_2D_SHADOW",                 // 6
243     "GX2_SAMPLER_TYPE_2D_RECT_SHADOW",            // 7
244     "GX2_SAMPLER_TYPE_CUBE_SHADOW",               // 8
245     "GX2_SAMPLER_TYPE_1D_ARRAY",                  // 9
246     "GX2_SAMPLER_TYPE_2D_ARRAY",                  // 10
247     "GX2_SAMPLER_TYPE_1D_ARRAY_SHADOW",           // 11
248     "GX2_SAMPLER_TYPE_2D_ARRAY_SHADOW",           // 12
249     "GX2_SAMPLER_TYPE_CUBE_ARRAY",                // 13
250     "GX2_SAMPLER_TYPE_CUBE_ARRAY_SHADOW",         // 14
251     "GX2_SAMPLER_TYPE_BUFFER",                    // 15
252     "GX2_SAMPLER_TYPE_RENDERBUFFER",              // 16
253     "GX2_SAMPLER_TYPE_2D_MS",                     // 17
254     "GX2_SAMPLER_TYPE_2D_MS_ARRAY",               // 18
255     "GX2_SAMPLER_TYPE_INT_1D",                    // 19
256     "GX2_SAMPLER_TYPE_INT_2D",                    // 20
257     "GX2_SAMPLER_TYPE_INT_2D_RECT",               // 21
258     "GX2_SAMPLER_TYPE_INT_3D",                    // 22
259     "GX2_SAMPLER_TYPE_INT_CUBE",                  // 23
260     "GX2_SAMPLER_TYPE_INT_1D_ARRAY",              // 24
261     "GX2_SAMPLER_TYPE_INT_2D_ARRAY",              // 25
262     "GX2_SAMPLER_TYPE_INT_CUBE_ARRAY",            // 26
263     "GX2_SAMPLER_TYPE_INT_BUFFER",                // 27
264     "GX2_SAMPLER_TYPE_INT_RENDERBUFFER",          // 28
265     "GX2_SAMPLER_TYPE_INT_2D_MS",                 // 29
266     "GX2_SAMPLER_TYPE_INT_2D_MS_ARRAY",           // 30
267     "GX2_SAMPLER_TYPE_UNSIGNED_INT_1D",           // 31
268     "GX2_SAMPLER_TYPE_UNSIGNED_INT_2D",           // 32
269     "GX2_SAMPLER_TYPE_UNSIGNED_INT_2D_RECT",      // 33
270     "GX2_SAMPLER_TYPE_UNSIGNED_INT_3D",           // 34
271     "GX2_SAMPLER_TYPE_UNSIGNED_INT_CUBE",         // 35
272     "GX2_SAMPLER_TYPE_UNSIGNED_INT_1D_ARRAY",     // 36
273     "GX2_SAMPLER_TYPE_UNSIGNED_INT_2D_ARRAY",     // 37
274     "GX2_SAMPLER_TYPE_UNSIGNED_INT_CUBE_ARRAY",   // 38
275     "GX2_SAMPLER_TYPE_UNSIGNED_INT_BUFFER",       // 39
276     "GX2_SAMPLER_TYPE_UNSIGNED_INT_RENDERBUFFER", // 40
277     "GX2_SAMPLER_TYPE_UNSIGNED_INT_2D_MS",        // 41
278     "GX2_SAMPLER_TYPE_UNSIGNED_INT_2D_MS_ARRAY"   // 42
279 };
280 
281 static const char *shaderModeName[] = {
282     "GX2_SHADER_MODE_UNIFORM_REGISTER",
283     "GX2_SHADER_MODE_UNIFORM_BLOCK",
284     "GX2_SHADER_MODE_GEOMETRY_SHADER"
285 };
286 
287 /// Cleans out extra debug flags attached to offset
GFDCleanTag(u32 Offset)288 u32 GFDCleanTag(u32 Offset)     {return Offset & ~GFD_TAG_MASK;}
289 
290 /// Verifies offset stored in file is tagged with GFD_TAG_DAT
GFDCheckTagDAT(u32 Offset)291 BOOL GFDCheckTagDAT(u32 Offset) {return (Offset & GFD_TAG_MASK) == GFD_TAG_DAT;}
292 
293 /// Verifies offset stored in file is tagged with GFD_TAG_STR
GFDCheckTagSTR(u32 Offset)294 BOOL GFDCheckTagSTR(u32 Offset) {return (Offset & GFD_TAG_MASK) == GFD_TAG_STR;}
295 
296 /// A block of null terminated strings all stored in the same char array
297 /// All strings are padded with zeros at the end to be integer multiple of words (4) long.
298 typedef struct _GFDStringTable
299 {
300     u32    m_n;      ///< Current number of strings
301     u32    m_nDB;    ///< Current size of pDataBlock in chars
302     u32    m_maxDB;  ///< Max size of data block, again in chars
303     char * m_pDB;    ///< Block of data containing all the strings;
304 } GFDStringTable;
305 
306 /// A table of data segments, stored contiguously in the same int array as [Size, Data] pairs.
307 /// Data Tables are very similar to String Tables.  They just point arrays of data
308 /// rather than arrays null terminated strings.
309 typedef struct _GFDDataTable
310 {
311     u32    m_n;      ///< Current number of data blocks in hunk (good for error checking)
312     u32    m_nDB;    ///< Current size (e.g. next writeable offset) of m_pDB in chars
313     u32    m_maxDB;  ///< Max size of data block, again in chars
314     char * m_pDB;    ///< Block of data containing all the data
315 } GFDDataTable;
316 
317 /// Create a new empty string table, with space for max strings;
GFDCreateStringTable(u32 max)318 GFDStringTable *GFDCreateStringTable(u32 max)
319 {
320     GFDStringTable *pTable = (GFDStringTable *) malloc(sizeof(GFDStringTable));
321 
322     if ( pTable )
323     {
324         pTable->m_n = 0;
325 
326         pTable->m_nDB = 0;
327         pTable->m_maxDB = max;
328         pTable->m_pDB = (char *) malloc(pTable->m_maxDB * sizeof(char));
329         memset(pTable->m_pDB, 0, pTable->m_maxDB * sizeof(char));
330     }
331     else
332     {
333         printf("Error! Failed to create string table!\n");
334     }
335     return pTable;
336 }
337 
338 /// Frees the string table and all data associated with it
GFDDestroyStringTable(GFDStringTable * pTable)339 void GFDDestroyStringTable(GFDStringTable *pTable)
340 {
341     if(pTable)
342     {
343         free(pTable->m_pDB);
344         memset(pTable, 0, sizeof(GFDStringTable));     // paranioa to avoid any chance of reuse
345         free(pTable);
346     }
347 }
348 
349 /// Grows maximum space in the string table to the new size.  Will not make table smaller.
GFDGrowStringTableDB(GFDStringTable * pTable,u32 newMaxDB)350 void GFDGrowStringTableDB(GFDStringTable *pTable, u32 newMaxDB)
351 {
352     if(newMaxDB > pTable->m_maxDB)  // if want more space, malloc it up, and copy old data over
353     {
354         char *pDB = (char*) malloc(newMaxDB * sizeof(char));     // create new memory
355         memset(pDB, 0, newMaxDB * sizeof(char));
356         memcpy(pDB, pTable->m_pDB, pTable->m_maxDB);              // copy old data into this new location
357 
358         free(pTable->m_pDB);                                    // cleanup, and save new values
359         pTable->m_pDB   = pDB;
360         pTable->m_maxDB = newMaxDB;
361     }
362 }
363 
364 /// Adds null terminated string to the string table.
365 /// Returns offset into the string table of the begining of this string
GFDAddStringTable(GFDStringTable * pTable,const char * str)366 u32 GFDAddStringTable(GFDStringTable *pTable,  const char *str)
367 {
368     size_t len = strlen(str);
369     // (We want our strings to be word aligned to make transfer accross 'network' easier)
370     int len_pad = (len + 1 + 3) & ~0x3;         // round length up to word boundary.. (+1 for first terminating 0)
371 
372     if(pTable->m_nDB + len_pad + 1 >= pTable->m_maxDB)
373     {
374         int newSize = pTable->m_nDB;
375         if(len_pad >= newSize)  newSize = len_pad*2;        // be safe for really long strings
376         newSize = (newSize + 0x1f) & ~0x1f;                 // round to nice size
377         GFDGrowStringTableDB(pTable, pTable->m_nDB + newSize);      // double size if needed
378     }
379 
380     // We could check here for a duplicate string already in the table,
381     // and return that index, but let's skip that optimization for now.
382 
383     int n   = pTable->m_n;
384     int off = pTable->m_nDB;
385 
386     // copy string into the string table
387     strncpy_s(pTable->m_nDB + pTable->m_pDB, len+1, str, len+1);
388     int zero = 0;   // fill trailing pad with zeros too...
389     strncpy_s(pTable->m_nDB + pTable->m_pDB + len + 1, sizeof(u32), (char*) &zero, len_pad - len-1);
390 
391     // update our structs pointing to it
392     pTable->m_nDB += len_pad;
393     pTable->m_n   += 1;
394 
395     return off | GFD_TAG_STR;
396 }
397 
398 /// Create a new empty string table, with space for max strings;
GFDCreateDataTable(u32 max)399 GFDDataTable *GFDCreateDataTable(u32 max)
400 {
401     GFDDataTable *pTable = (GFDDataTable *) malloc(sizeof(GFDDataTable));
402 
403     if ( pTable )
404     {
405         pTable->m_n = 0;
406         pTable->m_nDB = 0;
407         pTable->m_maxDB = max * sizeof(u32);  // assume chars are average of 8 or so in size (will grow separatly from ppStrings)
408         pTable->m_pDB = (char *) malloc(pTable->m_maxDB * sizeof(char));
409         memset(pTable->m_pDB, 0xbb, pTable->m_maxDB * sizeof(char));
410     }
411     else
412     {
413         printf("Error! Failed to create data table!\n");
414     }
415 
416     return pTable;
417 }
418 
419 /// Frees the string table and all data associated with it
GFDDestroyDataTable(GFDDataTable * pTable)420 void GFDDestroyDataTable(GFDDataTable *pTable)
421 {
422     if(pTable)
423     {
424         free(pTable->m_pDB);
425         memset(pTable, 0, sizeof(GFDStringTable));  // paranioa to avoid any chance of reuse
426         free(pTable);
427     }
428 }
429 
430 /// Grows maximum space of data table to the new size, in bytes.  Will not make table smaller.
GFDGrowDataTableDB(GFDDataTable * pTable,u32 newMaxDB)431 void GFDGrowDataTableDB(GFDDataTable *pTable, u32 newMaxDB)
432 {
433     if(newMaxDB > pTable->m_maxDB)  // if want more space, malloc it up, and copy old data over
434     {
435         char *pDB = (char *) malloc(newMaxDB * sizeof(char));  // create new memory
436         memset(pDB, 0, newMaxDB * sizeof(char));
437         memcpy(pDB, pTable->m_pDB, pTable->m_maxDB);           // copy old data into this new location
438 
439         free(pTable->m_pDB);                                   // cleanup, and save new values
440         pTable->m_pDB   = pDB;
441         pTable->m_maxDB = newMaxDB;
442     }
443 }
444 
445 /// Adds a new hunk of data to the data table, of specified length in bytes.
446 /// nBytes must be integer multiple of 4.
447 /// Returns byte offset into that table.
448 /// adds new block to our hunk table.  Returns new size
GFDAddDataTable(GFDDataTable * pTable,void * data,u32 nBytes)449 u32 GFDAddDataTable(GFDDataTable *pTable, void *data, u32 nBytes)
450 {
451     assert( (nBytes & 0x3) == 0 && "nBytes must be multiple of 4");
452 
453     if(pTable->m_nDB + nBytes + sizeof(u32) >= pTable->m_maxDB)
454     {
455         size_t newSize = pTable->m_nDB;                                // double size
456         if(nBytes >= newSize)  newSize = nBytes*2;                  // if doubling isn't enough, make bigger
457         int finalSize = (pTable->m_nDB + newSize + 0x1f) & ~0x1f;   // finally round to nearest 32 bytes
458         GFDGrowDataTableDB(pTable, finalSize);
459     }
460 
461     u32 off = pTable->m_nDB;        // offset is data, after the length...
462 
463     memcpy(pTable->m_pDB + pTable->m_nDB, data, nBytes);
464 
465     // update our structs pointing to it
466     pTable->m_nDB += nBytes;
467     pTable->m_n   += 1;
468 
469     return off | GFD_TAG_DAT;        // OR in this silly constant to use for catching errors
470 }
471 
472 //--------------------------------------------------------------------------
473 
474 /// Repack a vertex shader from a 64-bit structure to a 32-bit structure.
475 /// We output a GX2VertexShader * for convenience, but it is not valid for 64-bit.
476 /// All pointers are cast to 32-bit integers. It is therefore 7*4 bytes shorter.
477 /// The return value is the resulting 32-bit structure size.
GFDRepackVertexShaderFor32Bit(GX2VertexShader * pVSin64,GFDVertexShader * pVSout32)478 u32 GFDRepackVertexShaderFor32Bit(GX2VertexShader *pVSin64, GFDVertexShader *pVSout32)
479 {
480 	assert(sizeof(pVSout32->_regs) == sizeof(pVSin64->_regs));
481     memcpy(&pVSout32->_regs, pVSin64->_regs, sizeof(pVSout32->_regs));
482 
483     pVSout32->shaderSize       =       pVSin64->shaderSize;
484     pVSout32->shaderPtr        = (u32) pVSin64->shaderPtr;
485     pVSout32->shaderMode       =       pVSin64->shaderMode;
486     pVSout32->numUniformBlocks =       pVSin64->numUniformBlocks;
487     pVSout32->uniformBlocks    = (u32) pVSin64->uniformBlocks;
488     pVSout32->numUniforms      =       pVSin64->numUniforms;
489     pVSout32->uniformVars      = (u32) pVSin64->uniformVars;
490     pVSout32->numInitialValues =       pVSin64->numInitialValues;
491     pVSout32->initialValues    = (u32) pVSin64->initialValues;
492     pVSout32->_numLoops        =       pVSin64->_numLoops;
493     pVSout32->_loopVars        = (u32) pVSin64->_loopVars;
494     pVSout32->numSamplers      =       pVSin64->numSamplers;
495     pVSout32->samplerVars      = (u32) pVSin64->samplerVars;
496     pVSout32->numAttribs       =       pVSin64->numAttribs;
497     pVSout32->attribVars       = (u32) pVSin64->attribVars;
498     pVSout32->ringItemsize     =       pVSin64->ringItemsize;
499     pVSout32->hasStreamOut     = (u32) pVSin64->hasStreamOut;
500 
501 	assert(sizeof(pVSout32->streamOutVertexStride) == sizeof(pVSin64->streamOutVertexStride));
502     memcpy(&pVSout32->streamOutVertexStride, pVSin64->streamOutVertexStride, sizeof(pVSout32->streamOutVertexStride));
503 
504     pVSout32->shaderProgram =         pVSin64->shaderProgram;
505 
506     return sizeof(GFDVertexShader);
507 }
508 
509 /// Repack a pixel shader from a 64-bit structure to a 32-bit structure.
510 /// We output a GX2PixelShader * for convenience, but it is not valid for 64-bit.
511 /// All pointers are cast to 32-bit integers. It is therefore 6*4 bytes shorter.
512 /// The return value is the resulting 32-bit structure size.
GFDRepackPixelShaderFor32Bit(GX2PixelShader * pPSin64,GFDPixelShader * pPSout32)513 u32 GFDRepackPixelShaderFor32Bit(GX2PixelShader *pPSin64, GFDPixelShader *pPSout32)
514 {
515 	assert(sizeof(pPSout32->_regs) == sizeof(pPSin64->_regs));
516     memcpy(&pPSout32->_regs, pPSin64->_regs, sizeof(pPSout32->_regs));
517 
518     pPSout32->shaderSize       =       pPSin64->shaderSize;
519     pPSout32->shaderPtr        = (u32) pPSin64->shaderPtr;
520     pPSout32->shaderMode       =       pPSin64->shaderMode;
521     pPSout32->numUniformBlocks =       pPSin64->numUniformBlocks;
522     pPSout32->uniformBlocks    = (u32) pPSin64->uniformBlocks;
523     pPSout32->numUniforms      =       pPSin64->numUniforms;
524     pPSout32->uniformVars      = (u32) pPSin64->uniformVars;
525     pPSout32->numInitialValues =       pPSin64->numInitialValues;
526     pPSout32->initialValues    = (u32) pPSin64->initialValues;
527     pPSout32->_numLoops        =       pPSin64->_numLoops;
528     pPSout32->_loopVars        = (u32) pPSin64->_loopVars;
529     pPSout32->numSamplers      =       pPSin64->numSamplers;
530     pPSout32->samplerVars      = (u32) pPSin64->samplerVars;
531     pPSout32->shaderProgram    =       pPSin64->shaderProgram;
532 
533     return sizeof(GFDPixelShader);
534 }
535 
536 /// Repack a geometry shader from a 64-bit structure to a 32-bit structure.
537 /// We output a GX2GeometryShader * for convenience, but it is not valid for 64-bit.
538 /// All pointers are cast to 32-bit integers. It is therefore 6*4 bytes shorter.
539 /// The return value is the resulting 32-bit structure size.
GFDRepackGeometryShaderFor32Bit(GX2GeometryShader * pGSin64,GFDGeometryShader * pGSout32)540 u32 GFDRepackGeometryShaderFor32Bit(GX2GeometryShader *pGSin64, GFDGeometryShader *pGSout32)
541 {
542 	assert(sizeof(pGSout32->_regs) == sizeof(pGSin64->_regs));
543     memcpy(&pGSout32->_regs, pGSin64->_regs, sizeof(pGSout32->_regs));
544 
545     pGSout32->shaderSize       =       pGSin64->shaderSize;
546     pGSout32->shaderPtr        = (u32) pGSin64->shaderPtr;
547     pGSout32->copyShaderSize   =       pGSin64->copyShaderSize;
548     pGSout32->copyShaderPtr    = (u32) pGSin64->copyShaderPtr;
549     pGSout32->shaderMode       =       pGSin64->shaderMode;
550     pGSout32->numUniformBlocks =       pGSin64->numUniformBlocks;
551     pGSout32->uniformBlocks    = (u32) pGSin64->uniformBlocks;
552     pGSout32->numUniforms      =       pGSin64->numUniforms;
553     pGSout32->uniformVars      = (u32) pGSin64->uniformVars;
554     pGSout32->numInitialValues =       pGSin64->numInitialValues;
555     pGSout32->initialValues    = (u32) pGSin64->initialValues;
556     pGSout32->_numLoops        =       pGSin64->_numLoops;
557     pGSout32->_loopVars        = (u32) pGSin64->_loopVars;
558     pGSout32->numSamplers      =       pGSin64->numSamplers;
559     pGSout32->samplerVars      = (u32) pGSin64->samplerVars;
560     pGSout32->ringItemsize     =       pGSin64->ringItemsize;
561     pGSout32->hasStreamOut     = (u32) pGSin64->hasStreamOut;
562 
563 	assert(sizeof(pGSout32->streamOutVertexStride) == sizeof(pGSin64->streamOutVertexStride));
564     memcpy(&pGSout32->streamOutVertexStride, pGSin64->streamOutVertexStride, sizeof(pGSout32->streamOutVertexStride));
565 
566     pGSout32->shaderProgram     =       pGSin64->shaderProgram;
567     pGSout32->copyShaderProgram =       pGSin64->copyShaderProgram;
568 
569     return sizeof(GFDGeometryShader);
570 }
571 
572 /// Repack a compute shader from a 64-bit structure to a 32-bit structure.
573 /// We output a GX2ComputeShader * for convenience, but it is not valid for 64-bit.
574 /// All pointers are cast to 32-bit integers. It is therefore 7*4 bytes shorter.
575 /// The return value is the resulting 32-bit structure size.
GFDRepackComputeShaderFor32Bit(GX2ComputeShader * pCSin64,GFDComputeShader * pCSout32)576 u32 GFDRepackComputeShaderFor32Bit(GX2ComputeShader *pCSin64, GFDComputeShader *pCSout32)
577 {
578 	assert(sizeof(pCSout32->_regs) == sizeof(pCSin64->_regs));
579     memcpy(&pCSout32->_regs, pCSin64->_regs, sizeof(pCSout32->_regs));
580 
581     pCSout32->shaderSize        =       pCSin64->shaderSize;
582     pCSout32->shaderPtr         = (u32) pCSin64->shaderPtr;
583     pCSout32->numUniformBlocks  =       pCSin64->numUniformBlocks;
584     pCSout32->uniformBlocks     = (u32) pCSin64->uniformBlocks;
585     pCSout32->numUniforms       =       pCSin64->numUniforms;
586     pCSout32->uniformVars       = (u32) pCSin64->uniformVars;
587     pCSout32->numInitialValues  =       pCSin64->numInitialValues;
588     pCSout32->initialValues     = (u32) pCSin64->initialValues;
589     pCSout32->_numLoops         =       pCSin64->_numLoops;
590     pCSout32->_loopVars         = (u32) pCSin64->_loopVars;
591     pCSout32->numSamplers       =       pCSin64->numSamplers;
592     pCSout32->samplerVars       = (u32) pCSin64->samplerVars;
593     pCSout32->layout_size_x     =       pCSin64->layout_size_x;
594     pCSout32->layout_size_y     =       pCSin64->layout_size_y;
595     pCSout32->layout_size_z     =       pCSin64->layout_size_z;
596     pCSout32->Over64Mode        =       pCSin64->Over64Mode;
597     pCSout32->numWavesPerSIMD   =       pCSin64->numWavesPerSIMD;
598     pCSout32->shaderProgram     = pCSin64->shaderProgram;
599 
600     return sizeof(GFDComputeShader);
601 }
602 
603 /// Repack a uniform block array from a 64-bit structure to a 32-bit structure.
604 /// We output a GX2UniformBlock * for convenience, but it is not valid for 64-bit.
605 /// All pointers are cast to 32-bit integers. It is therefore 1*4*n bytes shorter.
606 /// The return value is the resulting 32-bit structure size.
GFDRepackUniformBlockArrayFor32Bit(GX2UniformBlock * pUBin64,GFDUniformBlock * pUBout32,u32 n)607 u32 GFDRepackUniformBlockArrayFor32Bit(GX2UniformBlock *pUBin64, GFDUniformBlock *pUBout32, u32 n)
608 {
609     for(u32 i=0; i<n; i++) {
610 		pUBout32[i].name     = (u32) pUBin64[i].name;
611         pUBout32[i].location =       pUBin64[i].location;
612         pUBout32[i].size     =       pUBin64[i].size;
613     }
614     return sizeof(GFDUniformBlock)*n;
615 }
616 
617 /// Repack a uniform var array from a 64-bit structure to a 32-bit structure.
618 /// We output a GX2UniformVar * for convenience, but it is not valid for 64-bit.
619 /// All pointers are cast to 32-bit integers. It is therefore 2*4*n bytes shorter.
620 /// The return value is the resulting 32-bit structure size.
GFDRepackUniformVarArrayFor32Bit(GX2UniformVar * pUVin64,GFDUniformVar * pUVout32,u32 n)621 u32 GFDRepackUniformVarArrayFor32Bit(GX2UniformVar *pUVin64, GFDUniformVar *pUVout32, u32 n)
622 {
623     for(u32 i=0; i<n; i++) {
624         pUVout32[i].name       = (u32) pUVin64[i].name;
625         pUVout32[i].type       =       pUVin64[i].type;
626         pUVout32[i].arrayCount =       pUVin64[i].arrayCount;
627         pUVout32[i].offset     =       pUVin64[i].offset;
628         pUVout32[i].blockIndex =       pUVin64[i].blockIndex;
629     }
630     return sizeof(GFDUniformVar)*n;
631 }
632 
633 /// Repack an attrib var array from a 64-bit structure to a 32-bit structure.
634 /// We output a GX2AttribVar * for convenience, but it is not valid for 64-bit.
635 /// All pointers are cast to 32-bit integers. It is therefore 1*4*n bytes shorter.
636 /// The return value is the resulting 32-bit structure size.
GFDRepackAttribVarArrayFor32Bit(GX2AttribVar * pAVin64,GFDAttribVar * pAVout32,u32 n)637 u32 GFDRepackAttribVarArrayFor32Bit(GX2AttribVar *pAVin64, GFDAttribVar *pAVout32, u32 n)
638 {
639     for(u32 i=0; i<n; i++) {
640         pAVout32[i].name       = (u32) pAVin64[i].name;
641         pAVout32[i].type       =       pAVin64[i].type;
642         pAVout32[i].arrayCount =       pAVin64[i].arrayCount;
643         pAVout32[i].location   =       pAVin64[i].location;
644     }
645     return sizeof(GFDAttribVar)*n;
646 }
647 
648 /// Repack a (texture) sampler var array from a 64-bit structure to a 32-bit structure.
649 /// We output a GX2SamplerVar * for convenience, but it is not valid for 64-bit.
650 /// All pointers are cast to 32-bit integers. It is therefore 1*4*n bytes shorter.
651 /// The return value is the resulting 32-bit structure size.
GFDRepackSamplerVarArrayFor32Bit(GX2SamplerVar * pSVin64,GFDSamplerVar * pSVout32,u32 n)652 u32 GFDRepackSamplerVarArrayFor32Bit(GX2SamplerVar *pSVin64, GFDSamplerVar *pSVout32, u32 n)
653 {
654     for(u32 i=0; i<n; i++) {
655         pSVout32[i].name     = (u32) pSVin64[i].name;
656         pSVout32[i].type     =       pSVin64[i].type;
657         pSVout32[i].location =       pSVin64[i].location;
658     }
659     return sizeof(GFDSamplerVar)*n;
660 }
661 // ------------------------------------------------------------
662 
663 /// Create the flat datablock representation of a GX2VertexShader structure
664 /// Call GFDDataTableDestroy() on returned object once doen with it.
GFDCreateBlockRelocateHeaderVSH(GX2VertexShader * pVS)665 GFDDataTable* GFDCreateBlockRelocateHeaderVSH(GX2VertexShader *pVS)
666 {
667     // Create second data structure to hold flattened, offseted version of our original shader
668     GX2VertexShader vsCopy;
669     memcpy(&vsCopy,  pVS,  sizeof(GX2VertexShader));
670 
671     // Walk thru copy, converting all pointers to data blocks in table, and changing
672     // addresses to offsets into the data block
673 
674     // Create data table to hold the structure elements
675     GFDDataTable *pDT = GFDCreateDataTable(sizeof(GX2VertexShader) + vsCopy.numUniforms * 8 + 512);      // todo - pick better number
676 
677     // How many pointers do we need to patch? (The 11 here is empirically determined - asserts at end if if wrong)
678     // 11 = 1 (shader itself) + 7 in GX2VertexShader + 1 (string table) + 1 (patch list) + 1 (trailer)
679     int nElements = 11 + vsCopy.numUniformBlocks + vsCopy.numUniforms + vsCopy.numSamplers + vsCopy.numAttribs;
680     int nE = 0;
681     u32 size;
682     u32 *pAddr   = (u32*) malloc( nElements * sizeof(u32));    // src offsets into data block that need patching
683     u32 *pOffset = (u32*) malloc( nElements * sizeof(u32));    // dst offsets into data block for, containing offets to write
684 
685     memset(pOffset, 0, nElements * sizeof(u32));
686     memset(pAddr, 0,  nElements * sizeof(u32));
687 
688     // 0: Store main structure itself  (we'll rewrite offsets at the end).
689     // For allocation and alignment purposes, this needs to be the first hunk in the data table
690     GFDVertexShader vsCopy32;
691     size = GFDRepackVertexShaderFor32Bit(&vsCopy, &vsCopy32);
692     int oMain = nE;
693     pOffset[nE] = GFDAddDataTable(pDT, &vsCopy32, size);
694     pAddr[nE]   = 0;       // don't patch this location
695     nE++;
696 
697     // 1: Store uniform block/buffer array
698     GFDUniformBlock *pUB = (GFDUniformBlock *) malloc(sizeof(GFDUniformBlock)*vsCopy.numUniformBlocks);
699     if ( !pUB )
700     {
701         printf("Error! Failed to allocate Uniform Block structure!\n");
702         return NULL;
703     }
704     size = GFDRepackUniformBlockArrayFor32Bit(vsCopy.uniformBlocks, pUB, vsCopy.numUniformBlocks);
705     int oUniformBuffers = nE;
706     pOffset[nE] = GFDAddDataTable(pDT, pUB, size);
707     pAddr[nE]  = (vsCopy.numUniformBlocks == 0) ? 0 : pOffset[oMain] + (u32)&vsCopy32.uniformBlocks - (u32)&vsCopy32;
708     nE++;
709     free(pUB);
710 
711     // 2: Store uniform array
712     GFDUniformVar *pUV = (GFDUniformVar *) malloc(sizeof(GFDUniformVar)*vsCopy.numUniforms);
713     if ( !pUV )
714     {
715         printf("Error! Failed to allocate Uniform Variable structure!\n");
716         return NULL;
717     }
718     size = GFDRepackUniformVarArrayFor32Bit(vsCopy.uniformVars, pUV, vsCopy.numUniforms);
719     int oUniforms = nE;
720     pOffset[nE] = GFDAddDataTable(pDT, pUV, size);
721     pAddr[nE]  = (vsCopy.numUniforms == 0) ? 0 : pOffset[oMain] + (u32)&vsCopy32.uniformVars - (u32)&vsCopy32;
722     nE++;
723     free(pUV);
724 
725     // 3: Store uniform initial values
726     // Note initial values points to uniform block, a contiguous section of GX2UniformInitialValues
727     int oUniformData = nE;
728     pOffset[nE] = GFDAddDataTable(pDT, vsCopy.initialValues, vsCopy.numInitialValues * sizeof(GX2UniformInitialValue));
729     pAddr[nE]   = (vsCopy.numInitialValues == 0) ? 0 : pOffset[oMain] + (u32)&vsCopy32.initialValues - (u32)&vsCopy32;
730     nE++;
731 
732     // 4: Store loop
733     int oLoops = nE;
734     pOffset[nE] = GFDAddDataTable(pDT, vsCopy._loopVars, vsCopy._numLoops * sizeof(GFDLoopVar));
735     pAddr[nE]  = (vsCopy._numLoops == 0) ? 0 : pOffset[oMain] + (u32)&vsCopy32._loopVars - (u32)&vsCopy32;
736     nE++;
737 
738     // 5: Store sampler descriptors
739     GFDSamplerVar *pSV = (GFDSamplerVar *) malloc(sizeof(GFDSamplerVar)*vsCopy.numSamplers);
740     if ( !pSV )
741     {
742         printf("Error! Failed to allocate Sampler Variable structure!\n");
743         return NULL;
744     }
745     size = GFDRepackSamplerVarArrayFor32Bit(vsCopy.samplerVars, pSV, vsCopy.numSamplers);
746     int oSamplers = nE;
747     pOffset[nE] = GFDAddDataTable(pDT, pSV, size);
748     pAddr[nE]   = (vsCopy.numSamplers == 0) ? 0 : pOffset[oMain] + (u32)&vsCopy32.samplerVars - (u32)&vsCopy32;
749     nE++;
750     free(pSV);
751 
752     // 6: Store Attributes
753     GFDAttribVar *pAV = (GFDAttribVar *) malloc(sizeof(GFDAttribVar)*vsCopy.numAttribs);
754     if ( !pAV )
755     {
756         printf("Error! Failed to allocate Attribute Variable structure!\n");
757         return NULL;
758     }
759     size = GFDRepackAttribVarArrayFor32Bit(vsCopy.attribVars, pAV, vsCopy.numAttribs);
760     int oAttrib_names = nE;
761     pOffset[nE] = GFDAddDataTable(pDT, pAV, size);
762     pAddr[nE]  = (vsCopy.numAttribs == 0) ? 0 : pOffset[oMain] + (u32)&vsCopy32.attribVars - (u32)&vsCopy32;
763     nE++;
764     free(pAV);
765 
766     // 7a: Create a string table to store all the strings in
767     const int kAvgCharsPerString = 12;   // will auto-grow if actually biger
768     GFDStringTable *pStrTable = GFDCreateStringTable( ( vsCopy.numUniformBlocks + vsCopy.numUniforms + vsCopy.numSamplers + vsCopy.numAttribs) * kAvgCharsPerString);
769     u32 offStringTable = pDT->m_nDB;     // current offset...
770 
771     // s1: Store each uniform block name (in common string table), as well as pointer to the initial value in uniform block
772     for(u32 i = 0; i < vsCopy.numUniformBlocks; i++)
773     {
774         pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, vsCopy.uniformBlocks[i].name );
775         pAddr[nE]   = pOffset[oUniformBuffers] + i * sizeof(GFDUniformBlock) + (u32)&vsCopy.uniformBlocks[i].name - (u32)&vsCopy.uniformBlocks[i];
776         nE++;
777     }
778 
779     // s2: Store each uniform name (in common string table)
780     for(u32 i = 0; i < vsCopy.numUniforms; i++)
781     {
782         pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, vsCopy.uniformVars[i].name );
783         pAddr[nE]   = pOffset[oUniforms] + i * sizeof(GFDUniformVar) + (u32)&vsCopy.uniformVars[i].name - (u32)&vsCopy.uniformVars[i];
784         nE++;
785     }
786 
787     // s3: Store each sampler name (in common string table)
788     for(u32 i = 0; i < vsCopy.numSamplers; i++)
789     {
790         pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, vsCopy.samplerVars[i].name );
791         pAddr[nE]   = pOffset[oSamplers] + i * sizeof(GFDSamplerVar) + (u32)&vsCopy.samplerVars[i].name - (u32)&vsCopy.samplerVars[i];
792         nE++;
793     }
794 
795     // s4: Store each attrib name (in common string table)
796     for(u32 i = 0; i < vsCopy.numAttribs; i++)
797     {
798         pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, vsCopy.attribVars[i].name );
799         pAddr[nE]   = pOffset[oAttrib_names] + i * sizeof(GFDAttribVar) + (u32)&vsCopy.attribVars[i].name - (u32)&vsCopy.attribVars[i];
800         nE++;
801     }
802 
803     // 8: Store the string table (watch out for 1-3 bytes of padding)
804     int oStringTable = nE;
805 
806     // Note, although arrays of chars don't seem to be modified to go into network order, we cache our
807     // stringtable as a block in a word array which does get byte-flipped.  So let's pre-flip it here
808     // so it comes out right.
809     int nWordsStrTbl = (pStrTable->m_nDB + 0x3) / 4;
810     GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl);
811     pOffset[nE] = GFDAddDataTable(pDT, pStrTable->m_pDB, nWordsStrTbl*4);  // simply write out string table data
812     pAddr[nE]   = 0;        // don't patch this location
813     nE++;
814 
815     // let's convert it back so if we read it latter, we won't have problems
816     GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl);
817 
818     // end0: Turn all our patch addresses to offsets
819     for(int i = 0; i < nE; i++)
820     {
821         if(pAddr[i] != 0)
822         {
823             *((u32*) (pDT->m_pDB + GFDCleanTag(pAddr[i]))) = pOffset[i];
824         }
825     }
826 
827     // 9: Store the offset patch list
828     // (not *really* needed, could reconstruct if know all data types, but makes it a *lot* easier)
829     // After reading data block in at addrX, increment each location i of *(addrX + pAddr[i]) += addrX;
830     // By putting this after main, we can allocate space for it at same time and dellocate it when main goes away,
831     // without affecting the main data.
832     int oPatchTable = nE;
833     pOffset[nE] = GFDAddDataTable(pDT, pAddr, nElements * sizeof(u32));  // simply write out zeros now..
834     pAddr[nE]   = 0;        // don't patch this location either
835     nE++;
836 
837     // 10: Finally, a small structure describing this data block.
838     GFDBlockRelocationHeader *vshTrailer = (GFDBlockRelocationHeader *) malloc( sizeof(GFDBlockRelocationHeader) );
839     memset(vshTrailer, 0, sizeof(GFDBlockRelocationHeader));
840     vshTrailer->magic = GFD_SWAP_8_IN_32(GFD_BLOCK_RELOCATION_HEADER_MAGIC);
841     vshTrailer->type  = 0;
842     vshTrailer->size  = sizeof(GFDBlockRelocationHeader);
843 
844     int oTrailer = nE;
845 
846     // Fill in our trailer and write it out
847     vshTrailer->dataSize               = GFDCleanTag(pOffset[oPatchTable]) - GFDCleanTag(pOffset[oMain]);  // size of the main data section (allocate this size of contiguous memory)
848     vshTrailer->dataOffset             = pOffset[oMain];             // offset of the main data section in this block
849     vshTrailer->stringTableCharNumber  = pStrTable->m_nDB;           // number of characters in the string table
850     vshTrailer->stringTableOffset      = pOffset[oStringTable];      // offset of string table in this block
851     vshTrailer->patchTableOffsetNumber = nE;                         // number of offsets in the patch table
852     vshTrailer->patchTableOffset       = pOffset[oPatchTable] ;      // offset of the patch table in this block
853 
854     pOffset[nE] = GFDAddDataTable(pDT, vshTrailer, sizeof(GFDBlockRelocationHeader));
855     pAddr[nE] = 0;          // don't patch this location
856     nE++;
857 
858     free(vshTrailer);
859     vshTrailer = NULL;
860 
861     assert(offStringTable == GFDCleanTag(pOffset[oStringTable]) && "Guess for offset table in 7a was wrong");
862     assert(nE <= nElements && "Too few offsets allocated");
863 
864     free(pAddr);
865     free(pOffset);
866 
867     GFDDestroyStringTable( pStrTable );
868     return pDT;
869 }
870 
871 /// Create the flat datablock representation of a GX2PixelShader structure
872 /// Call GFDDataTableDestroy() on returned object once doen with it.
GFDCreateBlockRelocateHeaderPSH(GX2PixelShader * pPS)873 GFDDataTable* GFDCreateBlockRelocateHeaderPSH(GX2PixelShader *pPS)
874 {
875     // Create second data structure to hold flattened, offseted version of our original shader
876     GX2PixelShader psCopy;
877     memcpy(&psCopy,  pPS,  sizeof(GX2PixelShader));
878 
879     // Walk thru copy, converting all pointers to data blocks in table, and changing
880     // addresses to offsets into the data block
881 
882     // Create data table to hold the structure elements
883     GFDDataTable *pDT = GFDCreateDataTable(sizeof(GX2PixelShader) + psCopy.numUniforms * 8 + 1024);      // todo - pick better number
884 
885     // How many pointers do we need to patch? (The 10 here is empirically determined - asserts at end if if wrong)
886     // 10 = 1 (shader itself) + 6 in GX2PixelShader + 1 (string table) + 1 (patch list) + 1 (trailer)
887     int nElements = 10 + psCopy.numUniformBlocks + 1*psCopy.numUniforms + psCopy.numSamplers;
888     int nE = 0;
889     u32 size;
890     u32 *pAddr   = (u32*) malloc( nElements * sizeof(u32));    // src offsets into data block that need patching
891     u32 *pOffset = (u32*) malloc( nElements * sizeof(u32));    // dst offsets into data block for, containing offets to write
892     memset(pOffset, 0, nElements * sizeof(u32));
893     memset(pAddr, 0,  nElements * sizeof(u32));
894 
895     // 0: Store main structure itself  (we'll rewrite offsets at the end).
896     // For allocation and alignment purposes, this needs to be the first hunk in the data table
897     GFDPixelShader psCopy32;
898     size = GFDRepackPixelShaderFor32Bit(&psCopy, &psCopy32);
899     int oMain = nE;
900     pOffset[nE] = GFDAddDataTable(pDT, &psCopy32, size);
901     pAddr[nE]   = 0;       // don't patch this location
902     nE++;
903 
904     // 1: Store uniform block array
905     GFDUniformBlock *pUB = (GFDUniformBlock *) malloc(sizeof(GFDUniformBlock)*psCopy.numUniformBlocks);
906     size = GFDRepackUniformBlockArrayFor32Bit(psCopy.uniformBlocks, pUB, psCopy.numUniformBlocks);
907     int oUniformBuffers = nE;
908     pOffset[nE] = GFDAddDataTable(pDT, pUB, size);
909     pAddr[nE]  = (psCopy.numUniformBlocks == 0) ? 0 : pOffset[oMain] + (u32)&psCopy32.uniformBlocks - (u32)&psCopy32;
910     nE++;
911     free(pUB);
912 
913     // 2: Store uniform array
914     GFDUniformVar *pUV = (GFDUniformVar *) malloc(sizeof(GFDUniformVar)*psCopy.numUniforms);
915     size = GFDRepackUniformVarArrayFor32Bit(psCopy.uniformVars, pUV, psCopy.numUniforms);
916     int oUniforms = nE;
917     pOffset[nE] = GFDAddDataTable(pDT, pUV, size);
918     pAddr[nE]  = (psCopy.numUniforms == 0) ? 0 : pOffset[oMain] + (u32)&psCopy32.uniformVars - (u32)&psCopy32;
919     nE++;
920     free(pUV);
921 
922     // 3: Store uniform initial values
923     // Note initial values points to uniform block, a contiguous section of GX2UniformInitialValues
924     int oUniformData = nE;
925     pOffset[nE] = GFDAddDataTable(pDT, psCopy.initialValues, psCopy.numInitialValues * sizeof(GX2UniformInitialValue));
926     pAddr[nE]   = (psCopy.numInitialValues == 0) ? 0 : pOffset[oMain] + (u32)&psCopy32.initialValues - (u32)&psCopy32;
927     nE++;
928 
929     // 4: Store loop array
930     int oLoops = nE;
931     pOffset[nE] = GFDAddDataTable(pDT, psCopy._loopVars, psCopy._numLoops * sizeof(GFDLoopVar));
932     pAddr[nE]  = (psCopy._numLoops == 0) ? 0 : pOffset[oMain] + (u32)&psCopy32._loopVars - (u32)&psCopy32;
933     nE++;
934 
935     // 5: Store sampler descriptors
936     GFDSamplerVar *pSV = (GFDSamplerVar *) malloc(sizeof(GFDSamplerVar)*psCopy.numSamplers);
937     size = GFDRepackSamplerVarArrayFor32Bit(psCopy.samplerVars, pSV, psCopy.numSamplers);
938     int oSamplers = nE;
939     pOffset[nE] = GFDAddDataTable(pDT, pSV, size);
940     pAddr[nE]   = (psCopy.numSamplers == 0) ? 0 : pOffset[oMain] + (u32)&psCopy32.samplerVars - (u32)&psCopy32;
941     nE++;
942     free(pSV);
943 
944     // 6a: Create a string table to store all the strings in
945     const int kAvgCharsPerString = 12;   // will auto-grow if actually biger
946     GFDStringTable *pStrTable = GFDCreateStringTable( ( psCopy.numUniformBlocks +  psCopy.numUniforms + psCopy.numSamplers) * kAvgCharsPerString);
947     u32 offStringTable = pDT->m_nDB;     // current offset...
948 
949     // s1: Store each uniform block name (in common string table), as well as pointer to the initial value in uniform block
950     for(u32 i = 0; i < psCopy.numUniformBlocks; i++)
951     {
952         pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, psCopy.uniformBlocks[i].name );
953         pAddr[nE]   = pOffset[oUniformBuffers] + i * sizeof(GFDUniformBlock) + (u32)&psCopy.uniformBlocks[i].name - (u32)&psCopy.uniformBlocks[i];
954         nE++;
955     }
956 
957     // s2: Store each uniform name (in common string table)
958     for(u32 i = 0; i < psCopy.numUniforms; i++)
959     {
960         pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, psCopy.uniformVars[i].name );
961         pAddr[nE]   = pOffset[oUniforms] + i * sizeof(GFDUniformVar) + (u32)&psCopy.uniformVars[i].name - (u32)&psCopy.uniformVars[i];
962         nE++;
963     }
964 
965     // s3: Store each sampler name (in common string table)
966     for(u32 i = 0; i < psCopy.numSamplers; i++)
967     {
968         pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, psCopy.samplerVars[i].name );
969         pAddr[nE]   = pOffset[oSamplers] + i * sizeof(GFDSamplerVar) + (u32)&psCopy.samplerVars[i].name - (u32)&psCopy.samplerVars[i];
970         nE++;
971     }
972 
973     // 7: Store the string table (watch out for 1-3 bytes of padding)
974     int oStringTable = nE;
975 
976     // Note, although arrays of chars don't seem to be modified to go into network order, we cache our
977     // stringtable as a block in a word array which does get byte-flipped.  So let's pre-flip it here
978     // so it comes out right.
979     int nWordsStrTbl = (pStrTable->m_nDB + 0x3) / 4;
980     GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl);
981     pOffset[nE] = GFDAddDataTable(pDT, pStrTable->m_pDB, nWordsStrTbl*4);  // simply write out string table data
982     pAddr[nE]   = 0;        // don't patch this location
983     nE++;
984 
985     // let's convert it back so if we read it latter, we won't have problems
986     GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl);
987 
988     // end0: Turn all our patch addresses to offsets
989     for(int i = 0; i < nE; i++)
990     {
991         if(pAddr[i] != 0)
992         {
993             *((u32*) (pDT->m_pDB + GFDCleanTag(pAddr[i]))) = pOffset[i];
994         }
995     }
996 
997     // 8: Store the offset patch list
998     // (not *really* needed, could reconstruct if know all data types, but makes *alot* easier)
999     // After reading data block in at addrX, increment each location i of *(addrX + pAddr[i]) += addrX;
1000     // By putting this after main, we can allocate space for it at same time and dellocate it when main goes away,
1001     // without affecting the main data.
1002     int oPatchTable = nE;
1003     pOffset[nE] = GFDAddDataTable(pDT, pAddr, nElements * sizeof(u32));        // simply write out zeros now..
1004     pAddr[nE]   = 0;        // don't patch this location either
1005     nE++;
1006 
1007     // 9: Finally, a small structure describing this data block.
1008     GFDBlockRelocationHeader *pshTrailer = (GFDBlockRelocationHeader *) malloc( sizeof(GFDBlockRelocationHeader) );
1009     memset(pshTrailer, 0, sizeof(GFDBlockRelocationHeader));
1010     pshTrailer->magic   = GFD_SWAP_8_IN_32(GFD_BLOCK_RELOCATION_HEADER_MAGIC);
1011     pshTrailer->type    = 0;
1012     pshTrailer->size    = sizeof(GFDBlockRelocationHeader);
1013 
1014     int oTrailer = nE;
1015 
1016     // Fill in our trailer and write it out
1017     pshTrailer->dataSize               = GFDCleanTag(pOffset[oPatchTable]) - GFDCleanTag(pOffset[oMain]);  // size of the main data section (allocate this size of contiguous memory)
1018     pshTrailer->dataOffset             = pOffset[oMain];             // offset of the main data section in this block
1019     pshTrailer->stringTableCharNumber  = pStrTable->m_nDB;           // number of characters in the string table
1020     pshTrailer->stringTableOffset      = pOffset[oStringTable];      // offset of string table in this block
1021     pshTrailer->patchTableOffsetNumber = nE;                         // number of offsets in the patch table
1022     pshTrailer->patchTableOffset       = pOffset[oPatchTable] ;      // offset of the patch table in this block
1023 
1024     pOffset[nE] = GFDAddDataTable(pDT, pshTrailer, sizeof(GFDBlockRelocationHeader));
1025     pAddr[nE] = 0;          // don't patch this location
1026     nE++;
1027 
1028     free(pshTrailer);
1029     pshTrailer = NULL;
1030 
1031     assert(offStringTable == GFDCleanTag(pOffset[oStringTable]) && "Guess for offset table in 7a was wrong");
1032     assert(nE <= nElements && "Too few offsets allocated");
1033 
1034     free(pAddr);
1035     free(pOffset);
1036 
1037     GFDDestroyStringTable( pStrTable );
1038     return pDT;
1039 }
1040 
1041 /// Create the flat datablock representation of a GX2GeometryShader structure
1042 /// Call GFDDataTableDestroy() on returned object once doen with it.
GFDCreateBlockRelocateHeaderGSH(GX2GeometryShader * pGS)1043 GFDDataTable* GFDCreateBlockRelocateHeaderGSH(GX2GeometryShader *pGS)
1044 {
1045     // Create second data structure to hold flattened, offseted version of our original shader
1046     GX2GeometryShader gsCopy;
1047     memcpy(&gsCopy,  pGS,  sizeof(GX2GeometryShader));
1048 
1049     // Walk thru copy, converting all pointers to data blocks in table, and changing
1050     // addresses to offsets into the data block
1051 
1052     // Create data table to hold the structure elements
1053     GFDDataTable *pDT = GFDCreateDataTable(sizeof(GX2GeometryShader) + gsCopy.numUniforms * 8 + 512);      // todo - pick better number
1054 
1055     // How many pointers do we need to patch? (The 11 here is empirically determined - asserts at end if if wrong)
1056     // 11 = 1 (shader itself) + 7 in GX2GeometryShader + 1 (string table) + 1 (patch list) + 1 (trailer)
1057     int nElements = 11 + gsCopy.numUniformBlocks + gsCopy.numUniforms + gsCopy.numSamplers /*+ gsCopy.numAttribs */;
1058     int nE = 0;
1059     u32 size;
1060     u32 *pAddr   = (u32*) malloc( nElements * sizeof(u32));    // src offsets into data block that need patching
1061     u32 *pOffset = (u32*) malloc( nElements * sizeof(u32));    // dst offsets into data block for, containing offets to write
1062     memset(pOffset, 0, nElements * sizeof(u32));
1063     memset(pAddr, 0,  nElements * sizeof(u32));
1064 
1065     // 0: Store main structure itself  (we'll rewrite offsets at the end).
1066     // For allocation and alignment purposes, this needs to be the first hunk in the data table
1067     GFDGeometryShader gsCopy32;
1068     size = GFDRepackGeometryShaderFor32Bit(&gsCopy, &gsCopy32);
1069     int oMain = nE;
1070     pOffset[nE] = GFDAddDataTable(pDT, &gsCopy32, size);
1071     pAddr[nE]   = 0;       // don't patch this location
1072     nE++;
1073 
1074     // 1: Store uniform block/buffer array
1075     GFDUniformBlock *pUB = (GFDUniformBlock *) malloc(sizeof(GFDUniformBlock)*gsCopy.numUniformBlocks);
1076     size = GFDRepackUniformBlockArrayFor32Bit(gsCopy.uniformBlocks, pUB, gsCopy.numUniformBlocks);
1077     int oUniformBuffers = nE;
1078     pOffset[nE] = GFDAddDataTable(pDT, pUB, size);
1079     pAddr[nE]  = (gsCopy.numUniformBlocks == 0) ? 0 : pOffset[oMain] + (u32)&gsCopy32.uniformBlocks - (u32)&gsCopy32;
1080     nE++;
1081     free(pUB);
1082 
1083     // 2: Store uniform array
1084     GFDUniformVar *pUV = (GFDUniformVar *) malloc(sizeof(GFDUniformVar)*gsCopy.numUniforms);
1085     size = GFDRepackUniformVarArrayFor32Bit(gsCopy.uniformVars, pUV, gsCopy.numUniforms);
1086     int oUniforms = nE;
1087     pOffset[nE] = GFDAddDataTable(pDT, pUV, size);
1088     pAddr[nE]  = (gsCopy.numUniforms == 0) ? 0 : pOffset[oMain] + (u32)&gsCopy32.uniformVars - (u32)&gsCopy32;
1089     nE++;
1090     free(pUV);
1091 
1092     // 3: Store uniform initial values
1093     // Note initial values points to uniform block, a contiguous section of GX2UniformInitialValues
1094     int oUniformData = nE;
1095     pOffset[nE] = GFDAddDataTable(pDT, gsCopy.initialValues, gsCopy.numInitialValues * sizeof(GX2UniformInitialValue));
1096     pAddr[nE]   = (gsCopy.numInitialValues == 0) ? 0 : pOffset[oMain] + (u32)&gsCopy32.initialValues - (u32)&gsCopy32;
1097     nE++;
1098 
1099     // 4: Store loop
1100     int oLoops = nE;
1101     pOffset[nE] = GFDAddDataTable(pDT, gsCopy._loopVars, gsCopy._numLoops * sizeof(GFDLoopVar));
1102     pAddr[nE]  = (gsCopy._numLoops == 0) ? 0 : pOffset[oMain] + (u32)&gsCopy32._loopVars - (u32)&gsCopy32;
1103     nE++;
1104 
1105     // 5: Store sampler descriptors
1106     GFDSamplerVar *pSV = (GFDSamplerVar *) malloc(sizeof(GFDSamplerVar)*gsCopy.numSamplers);
1107     size = GFDRepackSamplerVarArrayFor32Bit(gsCopy.samplerVars, pSV, gsCopy.numSamplers);
1108     int oSamplers = nE;
1109     pOffset[nE] = GFDAddDataTable(pDT, pSV, size);
1110     pAddr[nE]   = (gsCopy.numSamplers == 0) ? 0 : pOffset[oMain] + (u32)&gsCopy32.samplerVars - (u32)&gsCopy32;
1111     nE++;
1112     free(pSV);
1113 
1114     // 6a: Create a string table to store all the strings in
1115     const int kAvgCharsPerString = 12;   // will auto-grow if actually biger
1116     GFDStringTable *pStrTable = GFDCreateStringTable( ( gsCopy.numUniformBlocks +  gsCopy.numUniforms + gsCopy.numSamplers) * kAvgCharsPerString);
1117     u32 offStringTable = pDT->m_nDB;     // current offset...
1118 
1119 
1120     // s1: Store each uniform block name (in common string table), as well as pointer to the initial value in uniform block
1121     for(u32 i = 0; i < gsCopy.numUniformBlocks; i++)
1122     {
1123         pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, gsCopy.uniformBlocks[i].name );
1124         pAddr[nE]   = pOffset[oUniformBuffers] + i * sizeof(GFDUniformBlock) + (u32)&gsCopy.uniformBlocks[i].name - (u32)&gsCopy.uniformBlocks[i];
1125         nE++;
1126     }
1127 
1128     // s2: Store each uniform name (in common string table)
1129     for(u32 i = 0; i < gsCopy.numUniforms; i++)
1130     {
1131         pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, gsCopy.uniformVars[i].name );
1132         pAddr[nE]   = pOffset[oUniforms] + i * sizeof(GFDUniformVar) + (u32)&gsCopy.uniformVars[i].name - (u32)&gsCopy.uniformVars[i];
1133         nE++;
1134     }
1135 
1136     // s3: Store each sampler name (in common string table)
1137     for(u32 i = 0; i < gsCopy.numSamplers; i++)
1138     {
1139         pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, gsCopy.samplerVars[i].name );
1140         pAddr[nE]   = pOffset[oSamplers] + i * sizeof(GFDSamplerVar) + (u32)&gsCopy.samplerVars[i].name - (u32)&gsCopy.samplerVars[i];
1141         nE++;
1142     }
1143 
1144     // 7: Store the string table (watch out for 1-3 bytes of padding)
1145     int oStringTable = nE;
1146 
1147     // Note, although arrays of chars don't seem to be modified to go into network order, we cache our
1148     // stringtable as a block in a word array which does get byte-flipped.  So let's pre-flip it here
1149     // so it comes out right.
1150     int nWordsStrTbl = (pStrTable->m_nDB + 0x3) / 4;
1151     GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl);
1152     pOffset[nE] = GFDAddDataTable(pDT, pStrTable->m_pDB, nWordsStrTbl*4);  // simply write out string table data
1153     pAddr[nE]   = 0;        // don't patch this location
1154     nE++;
1155 
1156     // let's convert it back so if we read it latter, we won't have problems
1157     GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl);
1158 
1159     // end0: Turn all our patch addresses to offsets
1160     for(int i = 0; i < nE; i++)
1161     {
1162         if(pAddr[i] != 0)
1163         {
1164             *((u32*) (pDT->m_pDB + GFDCleanTag(pAddr[i]))) = pOffset[i];
1165         }
1166     }
1167 
1168     // 8: Store the offset patch list
1169     // (not *really* needed, could reconstruct if know all data types, but makes it a *lot* easier)
1170     // After reading data block in at addrX, increment each location i of *(addrX + pAddr[i]) += addrX;
1171     // By putting this after main, we can allocate space for it at same time and dellocate it when main goes away,
1172     // without affecting the main data.
1173     int oPatchTable = nE;
1174     pOffset[nE] = GFDAddDataTable(pDT, pAddr, nElements * sizeof(u32));  // simply write out zeros now..
1175     pAddr[nE]   = 0;        // don't patch this location either
1176     nE++;
1177 
1178     // 9: Finally, a small structure describing this data block.
1179     GFDBlockRelocationHeader *gshTrailer = (GFDBlockRelocationHeader *) malloc( sizeof(GFDBlockRelocationHeader) );
1180     memset(gshTrailer, 0, sizeof(GFDBlockRelocationHeader));
1181     gshTrailer->magic = GFD_SWAP_8_IN_32(GFD_BLOCK_RELOCATION_HEADER_MAGIC);
1182     gshTrailer->type  = 0;
1183     gshTrailer->size  = sizeof(GFDBlockRelocationHeader);
1184 
1185     int oTrailer = nE;
1186 
1187     // Fill in our trailer and write it out
1188     gshTrailer->dataSize               = GFDCleanTag(pOffset[oPatchTable]) - GFDCleanTag(pOffset[oMain]);  // size of the main data section (allocate this size of contiguous memory)
1189     gshTrailer->dataOffset             = pOffset[oMain];             // offset of the main data section in this block
1190     gshTrailer->stringTableCharNumber  = pStrTable->m_nDB;           // number of characters in the string table
1191     gshTrailer->stringTableOffset      = pOffset[oStringTable];      // offset of string table in this block
1192     gshTrailer->patchTableOffsetNumber = nE;                         // number of offsets in the patch table
1193     gshTrailer->patchTableOffset       = pOffset[oPatchTable] ;      // offset of the patch table in this block
1194 
1195     pOffset[nE] = GFDAddDataTable(pDT, gshTrailer, sizeof(GFDBlockRelocationHeader));
1196     pAddr[nE] = 0;          // don't patch this location
1197     nE++;
1198 
1199     free(gshTrailer);
1200     gshTrailer = NULL;
1201 
1202     assert(offStringTable == GFDCleanTag(pOffset[oStringTable]) && "Guess for offset table in 7a was wrong");
1203     assert(nE <= nElements && "Too few offsets allocated");
1204 
1205     free(pAddr);
1206     free(pOffset);
1207 
1208     GFDDestroyStringTable( pStrTable );
1209 
1210     return pDT;
1211 }
1212 
1213 /// Create the flat datablock representation of a GX2ComputeShader structure
1214 /// Call GFDDataTableDestroy() on returned object once done with it.
GFDCreateBlockRelocateHeaderCSH(GX2ComputeShader * pCS)1215 GFDDataTable* GFDCreateBlockRelocateHeaderCSH(GX2ComputeShader *pCS)
1216 {
1217     // Create second data structure to hold flattened, offseted version of our original shader
1218     GX2ComputeShader csCopy;
1219     memcpy(&csCopy,  pCS,  sizeof(GX2ComputeShader));
1220 
1221     // Walk thru copy, converting all pointers to data blocks in table, and changing
1222     // addresses to offsets into the data block
1223 
1224     // Create data table to hold the structure elements
1225     GFDDataTable *pDT = GFDCreateDataTable(sizeof(GX2ComputeShader) + csCopy.numUniformBlocks * sizeof(GX2UniformBlock) + csCopy.numUniforms * sizeof(GX2UniformVar) + csCopy.numSamplers * sizeof(GX2SamplerVar) + 512);      // todo - pick a better number for string table
1226     if ( !pDT )
1227     {
1228         printf("Error! Can't create GFD Data Table!\n");
1229         return NULL;
1230     }
1231 
1232     // How many pointers do we need to patch? (The value here is empirically determined - asserts at end if if wrong)
1233     // 10 = 1 (shader itself) + 6 in GX2ComputeShader + 1 (string table) + 1 (patch list) + 1 (trailer)
1234     int nElements = 10 + csCopy.numUniformBlocks + csCopy.numUniforms + csCopy.numSamplers;
1235     int nE = 0;
1236     u32 size;
1237     u32 *pAddr   = (u32*) malloc( nElements * sizeof(u32));    // src offsets into data block that need patching
1238     u32 *pOffset = (u32*) malloc( nElements * sizeof(u32));    // dst offsets into data block for, containing offets to write
1239 
1240     if ( !pAddr || !pOffset )
1241     {
1242         printf("Error! Memory allocation failure!\n");
1243         return NULL;
1244     }
1245     memset(pOffset, 0, nElements * sizeof(u32));
1246     memset(pAddr, 0,  nElements * sizeof(u32));
1247 
1248     // 0: Store main structure itself  (we'll rewrite offsets at the end).
1249     // For allocation and alignment purposes, this needs to be the first hunk in the data table
1250     GFDComputeShader csCopy32;
1251     size = GFDRepackComputeShaderFor32Bit(&csCopy, &csCopy32);
1252     int oMain = nE;
1253     pOffset[nE] = GFDAddDataTable(pDT, &csCopy32, size);
1254     pAddr[nE]   = 0;       // don't patch this location
1255     nE++;
1256 
1257     // 1: Store uniform block/buffer array
1258     GFDUniformBlock *pUB = (GFDUniformBlock *) malloc(sizeof(GFDUniformBlock)*csCopy.numUniformBlocks);
1259     if ( !pUB )
1260     {
1261         printf("Error! Failed to allocate Uniform Block structure!\n");
1262         return NULL;
1263     }
1264     size = GFDRepackUniformBlockArrayFor32Bit(csCopy.uniformBlocks, pUB, csCopy.numUniformBlocks);
1265     int oUniformBuffers = nE;
1266     pOffset[nE] = GFDAddDataTable(pDT, pUB, size);
1267     pAddr[nE]  = (csCopy.numUniformBlocks == 0) ? 0 : pOffset[oMain] + (u32)&csCopy32.uniformBlocks - (u32)&csCopy32;
1268     nE++;
1269     free(pUB);
1270 
1271     // 2: Store uniform array
1272     GFDUniformVar *pUV = (GFDUniformVar *) malloc(sizeof(GFDUniformVar)*csCopy.numUniforms);
1273     if ( !pUV )
1274     {
1275         printf("Error! Failed to allocate Uniform Variable structure!\n");
1276         return NULL;
1277     }
1278     size = GFDRepackUniformVarArrayFor32Bit(csCopy.uniformVars, pUV, csCopy.numUniforms);
1279     int oUniforms = nE;
1280     pOffset[nE] = GFDAddDataTable(pDT, pUV, size);
1281     pAddr[nE]  = (csCopy.numUniforms == 0) ? 0 : pOffset[oMain] + (u32)&csCopy32.uniformVars - (u32)&csCopy32;
1282     nE++;
1283     free(pUV);
1284 
1285     // 3: Store uniform initial values
1286     // Note initial values points to uniform block, a contiguous section of GX2UniformInitialValues
1287     int oUniformData = nE;
1288     pOffset[nE] = GFDAddDataTable(pDT, csCopy.initialValues, csCopy.numInitialValues * sizeof(GX2UniformInitialValue));
1289     pAddr[nE]   = (csCopy.numInitialValues == 0) ? 0 : pOffset[oMain] + (u32)&csCopy32.initialValues - (u32)&csCopy32;
1290     nE++;
1291 
1292     // 4: Store loop variables
1293     int oLoops = nE;
1294     pOffset[nE] = GFDAddDataTable(pDT, csCopy._loopVars, csCopy._numLoops * sizeof(GFDLoopVar));
1295     pAddr[nE]  = (csCopy._numLoops == 0) ? 0 : pOffset[oMain] + (u32)&csCopy32._loopVars - (u32)&csCopy32;
1296     nE++;
1297 
1298     // 5: Store sampler descriptors
1299     GFDSamplerVar *pSV = (GFDSamplerVar *) malloc(sizeof(GFDSamplerVar)*csCopy.numSamplers);
1300     if ( !pSV )
1301     {
1302         printf("Error! Failed to allocate Sampler Variable structure!\n");
1303         return NULL;
1304     }
1305     size = GFDRepackSamplerVarArrayFor32Bit(csCopy.samplerVars, pSV, csCopy.numSamplers);
1306     int oSamplers = nE;
1307     pOffset[nE] = GFDAddDataTable(pDT, pSV, size);
1308     pAddr[nE]   = (csCopy.numSamplers == 0) ? 0 : pOffset[oMain] + (u32)&csCopy32.samplerVars - (u32)&csCopy32;
1309     nE++;
1310     free(pSV);
1311 
1312 
1313     // 6a: Create a string table to store all the strings in
1314     const int kAvgCharsPerString = 12;   // will auto-grow if actually biger
1315     GFDStringTable *pStrTable = GFDCreateStringTable( ( csCopy.numUniformBlocks + csCopy.numUniforms + csCopy.numSamplers) * kAvgCharsPerString);
1316     u32 offStringTable = pDT->m_nDB;     // current offset...
1317 
1318     // s1: Store each uniform block name (in common string table), as well as pointer to the initial value in uniform block
1319     for(u32 i = 0; i < csCopy.numUniformBlocks; i++)
1320     {
1321         pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, csCopy.uniformBlocks[i].name );
1322         pAddr[nE]   = pOffset[oUniformBuffers] + i * sizeof(GFDUniformBlock) + (u32)&csCopy.uniformBlocks[i].name - (u32)&csCopy.uniformBlocks[i];
1323         nE++;
1324     }
1325 
1326     // s2: Store each uniform name (in common string table)
1327     for(u32 i = 0; i < csCopy.numUniforms; i++)
1328     {
1329         pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, csCopy.uniformVars[i].name );
1330         pAddr[nE]   = pOffset[oUniforms] + i * sizeof(GFDUniformVar) + (u32)&csCopy.uniformVars[i].name - (u32)&csCopy.uniformVars[i];
1331         nE++;
1332     }
1333 
1334     // s3: Store each sampler name (in common string table)
1335     for(u32 i = 0; i < csCopy.numSamplers; i++)
1336     {
1337         pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, csCopy.samplerVars[i].name );
1338         pAddr[nE]   = pOffset[oSamplers] + i * sizeof(GFDSamplerVar) + (u32)&csCopy.samplerVars[i].name - (u32)&csCopy.samplerVars[i];
1339         nE++;
1340     }
1341 
1342     // 7: Store the string table (watch out for 1-3 bytes of padding)
1343     int oStringTable = nE;
1344 
1345     // Note, although arrays of chars don't seem to be modified to go into network order, we cache our
1346     // stringtable as a block in a word array which does get byte-flipped.  So let's pre-flip it here
1347     // so it comes out right.
1348     int nWordsStrTbl = (pStrTable->m_nDB + 0x3) / 4;
1349     GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl);
1350     pOffset[nE] = GFDAddDataTable(pDT, pStrTable->m_pDB, nWordsStrTbl*4);  // simply write out string table data
1351     pAddr[nE]   = 0;        // don't patch this location
1352     nE++;
1353 
1354     // let's convert it back so if we read it latter, we won't have problems
1355     GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl);
1356 
1357     // end0: Turn all our patch addresses to offsets
1358     for(int i = 0; i < nE; i++)
1359     {
1360         if(pAddr[i] != 0)
1361         {
1362             *((u32*) (pDT->m_pDB + GFDCleanTag(pAddr[i]))) = pOffset[i];
1363         }
1364     }
1365 
1366     // 8: Store the offset patch list
1367     // (not *really* needed, could reconstruct if know all data types, but makes it a *lot* easier)
1368     // After reading data block in at addrX, increment each location i of *(addrX + pAddr[i]) += addrX;
1369     // By putting this after main, we can allocate space for it at same time and dellocate it when main goes away,
1370     // without affecting the main data.
1371     int oPatchTable = nE;
1372     pOffset[nE] = GFDAddDataTable(pDT, pAddr, nElements * sizeof(u32));  // simply write out zeros now..
1373     pAddr[nE]   = 0;        // don't patch this location either
1374     nE++;
1375 
1376     // 9: Finally, a small structure describing this data block.
1377     GFDBlockRelocationHeader *vshTrailer = (GFDBlockRelocationHeader *) malloc( sizeof(GFDBlockRelocationHeader) );
1378     if ( !vshTrailer )
1379     {
1380         printf("Error! Failed to allocate Block Relocation structure!\n");
1381         return NULL;
1382     }
1383     memset(vshTrailer, 0, sizeof(GFDBlockRelocationHeader));
1384     vshTrailer->magic = GFD_SWAP_8_IN_32(GFD_BLOCK_RELOCATION_HEADER_MAGIC);
1385     vshTrailer->type  = 0;
1386     vshTrailer->size  = sizeof(GFDBlockRelocationHeader);
1387 
1388     int oTrailer = nE;
1389 
1390     // Fill in our trailer and write it out
1391     vshTrailer->dataSize               = GFDCleanTag(pOffset[oPatchTable]) - GFDCleanTag(pOffset[oMain]);  // size of the main data section (allocate this size of contiguous memory)
1392     vshTrailer->dataOffset             = pOffset[oMain];             // offset of the main data section in this block
1393     vshTrailer->stringTableCharNumber  = pStrTable->m_nDB;           // number of characters in the string table
1394     vshTrailer->stringTableOffset      = pOffset[oStringTable];      // offset of string table in this block
1395     vshTrailer->patchTableOffsetNumber = nE;                         // number of offsets in the patch table
1396     vshTrailer->patchTableOffset       = pOffset[oPatchTable] ;      // offset of the patch table in this block
1397 
1398     pOffset[nE] = GFDAddDataTable(pDT, vshTrailer, sizeof(GFDBlockRelocationHeader));
1399     pAddr[nE] = 0;          // don't patch this location
1400     nE++;
1401 
1402     free(vshTrailer);
1403     vshTrailer = NULL;
1404 
1405     assert(offStringTable == GFDCleanTag(pOffset[oStringTable]) && "Guess for offset table in 7a was wrong");
1406     assert(nE <= nElements && "Too few offsets allocated");
1407 
1408     free(pAddr);
1409     free(pOffset);
1410 
1411     GFDDestroyStringTable( pStrTable );
1412     return pDT;
1413 }
1414 
1415 
GFDWriteFileVertexShaderBlock(FILE * fp,GFDEndianSwapMode swapMode,GFDAlignMode alignMode,GX2VertexShader * pVS)1416 bool GFDWriteFileVertexShaderBlock(FILE *fp, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, GX2VertexShader *pVS)
1417 {
1418     if(pVS == NULL)
1419         return false;
1420 
1421     // Get info about the actual shader program
1422     u32   nBytesProg = pVS->shaderSize;
1423     void* pDataProg  = pVS->shaderPtr;  // Current location, we will relocate it
1424 
1425     // Set NULL
1426     pVS->shaderPtr = NULL;
1427 
1428     // Now convert structure into a flat, relocatable format
1429     GFDDataTable *pDT_VS = GFDCreateBlockRelocateHeaderVSH(pVS);
1430 
1431     if(NULL == pDT_VS)
1432         return false;
1433 
1434     int nBytesVSStruct = pDT_VS->m_nDB;  // How big is our vertex struct (changes size due to uniforms, samplers, and other varying things)
1435 
1436     // Write header for VS struct
1437     if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_VSH_HEADER, nBytesVSStruct))
1438         return false;
1439 
1440     // Write VS struct
1441     if(!GFDWriteFilePPCData(fp, (nBytesVSStruct + 3) / 4, GFD_ELEMENT_SIZE_32, (u32 *) pDT_VS->m_pDB))
1442         return false;
1443 
1444     // Add pad block
1445     if(alignMode)
1446     {
1447         // Calc padding size for shader align
1448         u32 padSize = ((u32)(ftell(fp) + 2 * GFD_BLOCK_HEADER_SIZE +(GX2_SHADER_ALIGNMENT-1)) & ~(GX2_SHADER_ALIGNMENT-1)) - (u32)ftell(fp) - 2 * GFD_BLOCK_HEADER_SIZE;
1449         while(padSize < 0)
1450             padSize += GX2_SHADER_ALIGNMENT;
1451 
1452         if(!GFDWriteFilePadBlock(fp, padSize))
1453             return false;
1454     }
1455 
1456     //  Write out Header for program block
1457     if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_VSH_PROGRAM, nBytesProg))
1458         return false;
1459 
1460     // Write program data block
1461     if(!GFDWriteFileGPUData(fp, (nBytesProg + 3) / 4, GFD_ELEMENT_SIZE_32, swapMode, (u32 *) pDataProg))
1462         return false;
1463 
1464     GFDDestroyDataTable(pDT_VS);
1465 
1466     // Restore
1467     pVS->shaderPtr =  pDataProg;
1468 
1469     return true;
1470 }
1471 
GFDWriteFilePixelShaderBlock(FILE * fp,GFDEndianSwapMode swapMode,GFDAlignMode alignMode,GX2PixelShader * pPS)1472 bool GFDWriteFilePixelShaderBlock(FILE *fp, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, GX2PixelShader *pPS)
1473 {
1474     if(pPS == NULL)
1475         return false;
1476 
1477    // Get info about the actual shader program
1478     u32   nBytesProg = pPS->shaderSize;
1479     void* pDataProg  = pPS->shaderPtr;  // Current location, we will relocate it
1480 
1481     // Set NULL
1482     pPS->shaderPtr = NULL;
1483 
1484     // Convert structure into a flat, relocatable format
1485     GFDDataTable *pDT_PS = GFDCreateBlockRelocateHeaderPSH(pPS);
1486 
1487     if(NULL == pDT_PS)
1488         return false;
1489 
1490     int nBytesPSStruct = pDT_PS->m_nDB;  // How big is our pixel shader struct (changes size due to uniforms, samplers, and other varying things)
1491 
1492     // Write header for PS struct
1493     if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_PSH_HEADER, nBytesPSStruct))
1494         return false;
1495 
1496     // Write PS struct
1497     if(!GFDWriteFilePPCData(fp, (nBytesPSStruct + 3) / 4, GFD_ELEMENT_SIZE_32, (u32 *) pDT_PS->m_pDB))
1498         return false;
1499 
1500     // Add pad block
1501     if(alignMode)
1502     {
1503         // Calc padding size for shader align
1504         u32 padSize = ((u32)(ftell(fp) + 2 * GFD_BLOCK_HEADER_SIZE +(GX2_SHADER_ALIGNMENT-1)) & ~(GX2_SHADER_ALIGNMENT-1)) - (u32)ftell(fp) - 2 * GFD_BLOCK_HEADER_SIZE;
1505         while(padSize < 0)
1506             padSize += GX2_SHADER_ALIGNMENT;
1507 
1508         if(!GFDWriteFilePadBlock(fp, padSize))
1509             return false;
1510     }
1511 
1512     // Write Header for program block
1513     if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_PSH_PROGRAM, nBytesProg))
1514         return false;
1515 
1516     // Write program data block
1517     if(!GFDWriteFileGPUData(fp, (nBytesProg + 3) / 4, GFD_ELEMENT_SIZE_32, swapMode, (u32 *) pDataProg))
1518         return false;
1519 
1520     GFDDestroyDataTable(pDT_PS);
1521 
1522     // Restore
1523     pPS->shaderPtr = pDataProg;
1524 
1525     return true;
1526 }
1527 
GFDWriteFileGeometryShaderBlock(FILE * fp,GFDEndianSwapMode swapMode,GFDAlignMode alignMode,GX2GeometryShader * pGS)1528 bool GFDWriteFileGeometryShaderBlock(FILE *fp, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, GX2GeometryShader *pGS)
1529 {
1530     if(pGS == NULL)
1531         return false;
1532 
1533     // Get info about the actual shader program
1534     u32   nBytesProg = pGS->shaderSize;
1535     void* pDataProg  = pGS->shaderPtr;  // Current location, we will relocate it
1536 
1537     u32   nBytesCopyProg = pGS->copyShaderSize;
1538     void* pDataCopyProg  = pGS->copyShaderPtr;  // Current location, we will relocate it
1539 
1540     // Set NULL
1541     pGS->shaderPtr     = NULL;
1542     pGS->copyShaderPtr = NULL;
1543 
1544     // Convert structure into a flat, relocatable format
1545     GFDDataTable *pDT_GS = GFDCreateBlockRelocateHeaderGSH(pGS);
1546 
1547     if(NULL == pDT_GS)
1548         return false;
1549 
1550     int nBytesGSStruct = pDT_GS->m_nDB;  // How big is our shader struct (changes size due to uniforms, samplers, and other varying things)
1551 
1552     // Write header for GS struct
1553     if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_GSH_HEADER, nBytesGSStruct))
1554         return false;
1555 
1556     // Write GS struct
1557     if(!GFDWriteFilePPCData(fp, (nBytesGSStruct + 3) / 4, GFD_ELEMENT_SIZE_32, (u32 *) pDT_GS->m_pDB))
1558         return false;
1559 
1560     // Add pad block
1561     if(alignMode)
1562     {
1563         // Calc padding size for shader align
1564         u32 padSize = ((u32)(ftell(fp) + 2 * GFD_BLOCK_HEADER_SIZE +(GX2_SHADER_ALIGNMENT-1)) & ~(GX2_SHADER_ALIGNMENT-1)) - (u32)ftell(fp) - 2 * GFD_BLOCK_HEADER_SIZE;
1565         while(padSize < 0)
1566             padSize += GX2_SHADER_ALIGNMENT;
1567 
1568         if(!GFDWriteFilePadBlock(fp, padSize))
1569             return false;
1570     }
1571 
1572     // Write Header for program block
1573     if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_GSH_PROGRAM, nBytesProg))
1574         return false;
1575 
1576     // Write program data block
1577     if(!GFDWriteFileGPUData(fp, (nBytesProg + 3) / 4, GFD_ELEMENT_SIZE_32, swapMode, (u32 *) pDataProg))
1578         return false;
1579 
1580 	// Add pad block
1581 	if(alignMode)
1582     {
1583         // Calc padding size for shader align
1584         u32 padSize = ((u32)(ftell(fp) + 2 * GFD_BLOCK_HEADER_SIZE +(GX2_SHADER_ALIGNMENT-1)) & ~(GX2_SHADER_ALIGNMENT-1)) - (u32)ftell(fp) - 2 * GFD_BLOCK_HEADER_SIZE;
1585         while(padSize < 0)
1586             padSize += GX2_SHADER_ALIGNMENT;
1587 
1588         if(!GFDWriteFilePadBlock(fp, padSize))
1589             return false;
1590     }
1591 
1592     // Write Header for copy program block
1593     if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_GSH_COPY_PROGRAM, nBytesCopyProg))
1594         return false;
1595 
1596     // Write copy program data block
1597     if(!GFDWriteFileGPUData(fp, (nBytesCopyProg + 3) / 4, GFD_ELEMENT_SIZE_32, swapMode, (u32 *) pDataCopyProg))
1598         return false;
1599 
1600     GFDDestroyDataTable(pDT_GS);
1601 
1602     // Restore
1603     pGS->shaderPtr = pDataProg;
1604     pGS->copyShaderPtr = pDataCopyProg;
1605 
1606     return true;
1607 }
1608 
GFDWriteFileComputeShaderBlock(FILE * fp,GFDEndianSwapMode swapMode,GFDAlignMode alignMode,GX2ComputeShader * pCS)1609 bool GFDWriteFileComputeShaderBlock(FILE *fp, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, GX2ComputeShader *pCS)
1610 {
1611     if(pCS == NULL)
1612         return false;
1613 
1614     // Get info about the actual shader program
1615     u32   nBytesProg = pCS->shaderSize;
1616     void* pDataProg  = pCS->shaderPtr;  // Current location, we will relocate it
1617 
1618     // Set NULL
1619     pCS->shaderPtr = NULL;
1620 
1621     // Now convert structure into a flat, relocatable format
1622     GFDDataTable *pDT_CS = GFDCreateBlockRelocateHeaderCSH(pCS);
1623 
1624     if(NULL == pDT_CS)
1625         return false;
1626 
1627     int nBytesCSStruct = pDT_CS->m_nDB;  // How big is our compute struct (changes size due to uniforms, samplers, and other varying things)
1628 
1629     // Write header for CS struct
1630     if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_CSH_HEADER, nBytesCSStruct))
1631         return false;
1632 
1633     // Write CS struct
1634     if(!GFDWriteFilePPCData(fp, (nBytesCSStruct + 3) / 4, GFD_ELEMENT_SIZE_32, (u32 *) pDT_CS->m_pDB))
1635         return false;
1636 
1637     // Add pad block
1638     if(alignMode)
1639     {
1640         // Calc padding size for shader align
1641         u32 padSize = ((u32)(ftell(fp) + 2 * GFD_BLOCK_HEADER_SIZE +(GX2_SHADER_ALIGNMENT-1)) & ~(GX2_SHADER_ALIGNMENT-1)) - (u32)ftell(fp) - 2 * GFD_BLOCK_HEADER_SIZE;
1642         while(padSize < 0)
1643             padSize += GX2_SHADER_ALIGNMENT;
1644 
1645         if(!GFDWriteFilePadBlock(fp, padSize))
1646             return false;
1647     }
1648 
1649     //  Write out Header for program block
1650     if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_CSH_PROGRAM, nBytesProg))
1651         return false;
1652 
1653     // Write program data block
1654     if(!GFDWriteFileGPUData(fp, (nBytesProg + 3) / 4, GFD_ELEMENT_SIZE_32, swapMode, (u32 *) pDataProg))
1655         return false;
1656 
1657     GFDDestroyDataTable(pDT_CS);
1658 
1659     // Restore
1660     pCS->shaderPtr =  pDataProg;
1661 
1662     return true;
1663 }
1664 
1665 
GFDWriteFileShader(char * pFilename,GFDGPUVersion gpuVer,GFDEndianSwapMode swapMode,GFDAlignMode alignMode,u32 numShader,const GFDShaders * pShadersOrig)1666 bool GFD_API GFDWriteFileShader(char* pFilename, GFDGPUVersion gpuVer, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, u32 numShader, const GFDShaders *pShadersOrig)
1667 {
1668     GFDShaders2 shaders = {0};
1669 
1670     if (!pShadersOrig)
1671     {
1672         return false;
1673     }
1674 
1675     shaders.abiVersion = GFD_DLL_ABI_VERSION;
1676     shaders.pVertexShader = pShadersOrig->pVertexShader;
1677     shaders.pGeometryShader = pShadersOrig->pGeometryShader;
1678     shaders.pPixelShader = pShadersOrig->pPixelShader;
1679 
1680     return GFDWriteFileShader2(pFilename, gpuVer, swapMode, alignMode, numShader, &shaders);
1681 }
1682 
GFDWriteFileShader2(char * pFilename,GFDGPUVersion gpuVer,GFDEndianSwapMode swapMode,GFDAlignMode alignMode,u32 numShader,const GFDShaders2 * pShaders)1683 bool GFD_API GFDWriteFileShader2(char* pFilename, GFDGPUVersion gpuVer, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, u32 numShader, const GFDShaders2 *pShaders)
1684 {
1685     FILE               *fpout = NULL;
1686     u32                 count = 0;
1687 
1688     if (!pShaders)
1689     {
1690         return false;
1691     }
1692 
1693     if ( GFD_DLL_ABI_TYPE(pShaders->abiVersion) != GFD_DLL_ABI_TYPE(GFD_DLL_ABI_VERSION) ||
1694          GFD_DLL_ABI_VERSION_NUM(pShaders->abiVersion) > 0)
1695     {
1696         return false;
1697     }
1698 
1699     if (!pFilename)
1700     {
1701         pFilename = GSH_DEFAULT_FILENAME;
1702     }
1703 
1704     // Open file
1705     if(GFDOpenFile(&fpout, pFilename, "wb") != 0)
1706     {
1707         printf("Error: Can't open %s\n", pFilename);
1708         return false;
1709     }
1710 
1711     // Check gpu version
1712     switch(gpuVer) {
1713     case GFD_GPU_VERSION_0:
1714         break;
1715     case GFD_GPU_VERSION_1:
1716         break;
1717     case GFD_GPU_VERSION_GPU7:
1718         break;
1719     default:
1720         printf("Warning: Unsupported GPU %d, using default\n", gpuVer);
1721         gpuVer = GFD_GPU_VERSION_GPU7;
1722         break;
1723     }
1724 
1725     // Write the file header
1726     if(!GFDWriteFileHeader(fpout, gpuVer, alignMode))
1727     {
1728         printf("Error: Can't write file header\n");
1729         GFDCloseFile(fpout);
1730         return false;
1731     }
1732 
1733     // Writes multiple shader blocks
1734     for (count = 0; count < numShader; count++)
1735     {
1736         if(NULL != &pShaders[count])
1737         {
1738             if(NULL != pShaders[count].pVertexShader)
1739             {
1740                 if(!GFDWriteFileVertexShaderBlock(fpout, swapMode, alignMode, pShaders[count].pVertexShader))
1741                 {
1742                     printf("Error: Can't write vsh block.\n");
1743                     GFDCloseFile(fpout);
1744                     return false;
1745                 }
1746             }
1747 
1748             if(NULL != pShaders[count].pPixelShader)
1749             {
1750                 if(!GFDWriteFilePixelShaderBlock(fpout, swapMode, alignMode, pShaders[count].pPixelShader))
1751                 {
1752                     printf("Error: Can't write psh block.\n");
1753                     GFDCloseFile(fpout);
1754                     return false;
1755                 }
1756             }
1757 
1758             if(NULL != pShaders[count].pGeometryShader)
1759             {
1760                 if(!GFDWriteFileGeometryShaderBlock(fpout, swapMode, alignMode, pShaders[count].pGeometryShader))
1761                 {
1762                     printf("Error: Can't write psh block.\n");
1763                     GFDCloseFile(fpout);
1764                     return false;
1765                 }
1766             }
1767 
1768             if(NULL != pShaders[count].pComputeShader)
1769             {
1770                 if(!GFDWriteFileComputeShaderBlock(fpout, swapMode, alignMode, pShaders[count].pComputeShader))
1771                 {
1772                     printf("Error: Can't write csh block.\n");
1773                     GFDCloseFile(fpout);
1774                     return false;
1775                 }
1776             }
1777         }
1778     }
1779 
1780     // Write an 'End' block to the file
1781     if(!GFDWriteFileBlockHeader(fpout, GFD_BLOCK_TYPE_END, 0))
1782     {
1783         printf("Error: Can't write end block header\n");
1784         GFDCloseFile(fpout);
1785         return false;
1786     }
1787 
1788     GFDCloseFile(fpout);
1789     return true;
1790 }
1791 
GFDAppendWriteFileShader(char * pFilename,GFDGPUVersion gpuVer,GFDEndianSwapMode swapMode,GFDAlignMode alignMode,u32 numShader,const GFDShaders * pShadersOrig)1792 bool GFD_API GFDAppendWriteFileShader(char* pFilename, GFDGPUVersion gpuVer, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, u32 numShader, const GFDShaders *pShadersOrig)
1793 {
1794     GFDShaders2 shaders = {0};
1795 
1796     if (!pShadersOrig)
1797     {
1798         return false;
1799     }
1800 
1801     // Convert to GFDShaders2 structure
1802     shaders.abiVersion = GFD_DLL_ABI_VERSION;
1803     shaders.pVertexShader = pShadersOrig->pVertexShader;
1804     shaders.pGeometryShader = pShadersOrig->pGeometryShader;
1805     shaders.pPixelShader = pShadersOrig->pPixelShader;
1806 
1807     // Call the new version with the updated
1808     return GFDAppendWriteFileShader2(pFilename, gpuVer, swapMode, alignMode, numShader, &shaders);
1809 }
1810 
GFDAppendWriteFileShader2(char * pFilename,GFDGPUVersion gpuVer,GFDEndianSwapMode swapMode,GFDAlignMode alignMode,u32 numShader,const GFDShaders2 * pShaders)1811 bool GFD_API GFDAppendWriteFileShader2(char* pFilename, GFDGPUVersion gpuVer, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, u32 numShader, const GFDShaders2 *pShaders)
1812 {
1813     FILE               *fpout = NULL;
1814     u32                 count = 0;
1815     GFDHeader           fileHeader;
1816 
1817     if (!pShaders)
1818     {
1819         return false;
1820     }
1821 
1822     if ( GFD_DLL_ABI_TYPE(pShaders->abiVersion) != GFD_DLL_ABI_TYPE(GFD_DLL_ABI_VERSION) ||
1823          GFD_DLL_ABI_VERSION_NUM(pShaders->abiVersion) > 0)
1824     {
1825         return false;
1826     }
1827 
1828     // open file
1829     if(GFDOpenFile(&fpout, pFilename, "rb+") != 0)
1830     {
1831         printf("Error: Can't open %s\n", pFilename);
1832         return false;
1833     }
1834 
1835     // Read File Header
1836     if(!GFDReadFilePPCData(&fileHeader, (GFD_HEADER_SIZE + 3) / 4, GFD_ELEMENT_SIZE_32, fpout))
1837     {
1838         GFDCloseFile(fpout);
1839         printf("Error: Can't read file header.\n");
1840         return false;
1841     }
1842 
1843     // check gpu version
1844     if(fileHeader.gpuVersion != gpuVer)
1845     {
1846         GFDCloseFile(fpout);
1847         printf("Error: GPU version is different.\n");
1848         return false;
1849     }
1850 
1851     // check header version
1852     if(!GFDCheckHeaderMagicVersions(&fileHeader))
1853     {
1854         GFDCloseFile(fpout);
1855         printf("Error: Format version is different.\n");
1856         return false;
1857     }
1858 
1859     // seeks to beginning of 'End' block
1860 	fseek(fpout, -(s32)GFD_BLOCK_HEADER_SIZE, SEEK_END);
1861 
1862     // append writes multiple shader blocks
1863     for (count = 0; count < numShader; count++)
1864     {
1865         if(NULL != &pShaders[count])
1866         {
1867             if(NULL != pShaders[count].pVertexShader)
1868             {
1869                 if(!GFDWriteFileVertexShaderBlock(fpout, swapMode, alignMode, pShaders[count].pVertexShader))
1870                 {
1871                     printf("Error: Can't write vsh block.\n");
1872                     GFDCloseFile(fpout);
1873                     return false;
1874                 }
1875             }
1876 
1877             if(NULL != pShaders[count].pPixelShader)
1878             {
1879                 if(!GFDWriteFilePixelShaderBlock(fpout, swapMode, alignMode, pShaders[count].pPixelShader))
1880                 {
1881                     printf("Error: Can't write psh block.\n");
1882                     GFDCloseFile(fpout);
1883                     return false;
1884                 }
1885             }
1886 
1887             if(NULL != pShaders[count].pGeometryShader)
1888             {
1889                 if(!GFDWriteFileGeometryShaderBlock(fpout, swapMode, alignMode, pShaders[count].pGeometryShader))
1890                 {
1891                     printf("Error: Can't write psh block.\n");
1892                     GFDCloseFile(fpout);
1893                     return false;
1894                 }
1895             }
1896 
1897             if(NULL != pShaders[count].pComputeShader)
1898             {
1899                 if(!GFDWriteFileComputeShaderBlock(fpout, swapMode, alignMode, pShaders[count].pComputeShader))
1900                 {
1901                     printf("Error: Can't write csh block.\n");
1902                     GFDCloseFile(fpout);
1903                     return false;
1904                 }
1905             }
1906         }
1907     }
1908 
1909     // Write an 'End' block to the file
1910     if(!GFDWriteFileBlockHeader(fpout, GFD_BLOCK_TYPE_END, 0))
1911     {
1912         printf("Error: Can't write end block header\n");
1913         GFDCloseFile(fpout);
1914         return false;
1915     }
1916 
1917     GFDCloseFile(fpout);
1918     return true;
1919 }
1920 
1921 // Write As Code-----
1922 
_GFDWriteGX2AttributesAsCode(FILE * fp,const char * sName,u32 nAttribs,GX2AttribVar * pAV)1923 void _GFDWriteGX2AttributesAsCode(FILE *fp, const char* sName, u32 nAttribs,  GX2AttribVar *pAV)
1924 {
1925     if (nAttribs == 0) {
1926         return;
1927     } else {
1928         fprintf(fp, "GX2AttribVar %s_attrib_vars[] = {\n", sName);
1929 
1930         for(u32 i = 0; i < nAttribs; i++)
1931         {
1932             GX2AttribVar *pAVI = &(pAV[i]);
1933             fprintf(fp,"    {\"%s\", %s, %u, %u}%c\n", pAVI->name,
1934                     varTypeName[pAVI->type], pAVI->arrayCount, pAVI->location,
1935                     (i==nAttribs-1)?' ':',');
1936         }
1937         fprintf(fp,"};\n");
1938     }
1939     fprintf(fp,"\n");
1940 }
1941 
_GFDWriteGX2SamplersAsCode(FILE * fp,const char * sName,u32 nSamplers,GX2SamplerVar * pSV)1942 void _GFDWriteGX2SamplersAsCode(FILE *fp, const char* sName, u32 nSamplers, GX2SamplerVar *pSV)
1943 {
1944     if (nSamplers == 0) {
1945         return;
1946     } else {
1947         fprintf(fp, "GX2SamplerVar %s_sampler_vars[] = {\n", sName);
1948 
1949         for(u32 i = 0; i < nSamplers; i++)
1950         {
1951             GX2SamplerVar *pSVI = &(pSV[i]);
1952             fprintf(fp,"    {\"%s\", %s, %u}%c\n", pSVI->name,
1953                     samplerTypeName[pSVI->type], pSVI->location,
1954                     (i==nSamplers-1)?' ':',');
1955         }
1956         fprintf(fp,"};\n");
1957     }
1958     fprintf(fp,"\n");
1959 }
1960 
_GFDWriteGX2UniformsAsCode(FILE * fp,const char * sName,u32 nUniforms,GX2UniformVar * pUV,GX2UniformInitialValue * ivBase)1961 void _GFDWriteGX2UniformsAsCode(FILE *fp, const char* sName, u32 nUniforms, GX2UniformVar *pUV, GX2UniformInitialValue *ivBase)
1962 {
1963     if (nUniforms == 0) {
1964         return;
1965     } else {
1966         fprintf(fp, "GX2UniformVar %s_uniforms[] = {\n", sName);
1967 
1968         for(u32 i = 0; i < nUniforms; i++)
1969         {
1970             GX2UniformVar *pUVI = &(pUV[i]);
1971             fprintf(fp,"    {\"%s\", %s, %u, %u, ", pUVI->name,
1972                     varTypeName[pUVI->type], pUVI->arrayCount, pUVI->offset);
1973             if (pUVI->blockIndex == GX2_UNIFORM_BLOCK_INDEX_INVALID) {
1974                 fprintf(fp,"\n     GX2_UNIFORM_BLOCK_INDEX_INVALID, ");
1975             } else {
1976                 fprintf(fp,"%u, ", pUVI->blockIndex);
1977             }
1978             fprintf(fp,"}%c\n", (i==nUniforms-1)?' ':',');
1979         }
1980         fprintf(fp,"};\n");
1981     }
1982     fprintf(fp,"\n");
1983 }
1984 
_GFDGX2UniformBlockAsCode(FILE * fp,const char * sName,u32 nUniBlocks,GX2UniformBlock * pUB)1985 void _GFDGX2UniformBlockAsCode(FILE *fp, const char* sName, u32 nUniBlocks, GX2UniformBlock *pUB)
1986 {
1987     if (nUniBlocks == 0) {
1988         return;
1989     } else {
1990         fprintf(fp, "GX2UniformBlock %s_uniform_blocks[] = {\n", sName);
1991 
1992         for(u32 i = 0; i < nUniBlocks; i++)
1993         {
1994             GX2UniformBlock *pUBI = &(pUB[i]);
1995             fprintf(fp,"    {\"%s\", %u, %u}%c\n", pUBI->name,
1996                     pUBI->location, pUBI->size,
1997                     (i==nUniBlocks-1)?' ':',');
1998         }
1999         fprintf(fp,"};\n");
2000     }
2001     fprintf(fp,"\n");
2002 }
2003 
_GFDWriteWordsAsHex(FILE * fp,u32 * ptr,u32 byteLen)2004 void _GFDWriteWordsAsHex(FILE *fp, u32 *ptr, u32 byteLen)
2005 {
2006     assert(fp != NULL);
2007     assert((byteLen & 0x03) == 0);
2008 
2009     fprintf(fp, "    ");
2010     for(u32 j = 0; j < byteLen/4; j++)
2011     {
2012         fprintf(fp,"0x%08x%c", ptr[j], (j==byteLen/4-1)?' ':',');
2013         if((j & 3) == 3)
2014         {
2015             if((j & 0x3f) == 0x3)
2016                 fprintf(fp, "    // 0x%04x\n    ", j-3);
2017             else
2018                 fprintf(fp, "\n    ");
2019         }
2020     }
2021 }
2022 
_GFDWriteWordsAsCode(FILE * fp,const char * name,u32 * ptr,u32 byteLen,const char * attrib)2023 void _GFDWriteWordsAsCode(FILE *fp, const char *name, u32 *ptr, u32 byteLen, const char *attrib)
2024 {
2025     assert(fp != NULL);
2026     assert((byteLen & 0x03) == 0);
2027 
2028     if (ptr == NULL || byteLen == 0) {
2029         return;
2030     }
2031 
2032     fprintf(fp, "%s static const u32 %s[%u] =\n{\n", attrib, name, byteLen/4);
2033 
2034     _GFDWriteWordsAsHex(fp, ptr, byteLen);
2035 
2036     fprintf(fp, "\n};\n\n");
2037 }
2038 
makeName(char * dst,char * src1,char * src2,u32 max)2039 static char *makeName(char *dst, char *src1, char *src2, u32 max)
2040 {
2041     dst[0]=0;
2042 
2043     // Avoid warnings about strncat being unsafe
2044     // (because it might write size+1 characters)
2045 
2046     // strncat(dst, src1, max-1);
2047     strncat_s(dst, max, src1, max-1);
2048     size_t len=strlen(dst);
2049     // strncat(dst+len, src2, max-len-1);
2050     strncat_s(dst+len, max-len, src2, max-len-1);
2051     return dst;
2052 }
2053 
GFDWriteFileShaderAsCodeWithSource(char * pFilename,GFDEndianSwapMode swapMode,const GFDShaders2 * pShaders,const GFDShadersSrc * pShadersSrc)2054 GFD_DECLSPEC bool GFD_API GFDWriteFileShaderAsCodeWithSource(char* pFilename, GFDEndianSwapMode swapMode, const GFDShaders2 *pShaders, const GFDShadersSrc *pShadersSrc)
2055 {
2056     FILE *fpout = NULL;
2057 
2058     // Get base name from filename (for structure names)
2059     char *slash1, *slash2, *dot;
2060 #define BASEMAX 256
2061     char basename[BASEMAX], tempname[BASEMAX];
2062 
2063     slash1 = strrchr(pFilename, '/');
2064     slash2 = strrchr(pFilename, '\\');
2065     if (slash2 > slash1) slash1 = slash2;
2066     basename[0]=0;
2067     if (slash1) {
2068         // strncat(basename, slash1+1, BASEMAX-1);
2069         strncat_s(basename, BASEMAX, slash1+1, BASEMAX-1);
2070     } else {
2071         // strncat(basename, pFilename, BASEMAX-1);
2072         strncat_s(basename, BASEMAX, pFilename, BASEMAX-1);
2073     }
2074     dot = strrchr(basename, '.');
2075     if (dot) *dot=0;
2076 
2077     if ( GFD_DLL_ABI_TYPE(pShaders->abiVersion) != GFD_DLL_ABI_TYPE(GFD_DLL_ABI_VERSION) ||
2078          GFD_DLL_ABI_VERSION_NUM(pShaders->abiVersion) > 0)
2079     {
2080         return false;
2081     }
2082 
2083     // open file
2084     if(GFDOpenFile(&fpout, pFilename, "w") != 0)
2085     {
2086         printf("Error: Can't open %s\n", pFilename);
2087         return false;
2088     }
2089 
2090     fprintf(fpout,"//--------------------------------------------------\n\n");
2091     fprintf(fpout,"// This file is automatically generated by gfd.\n\n");
2092     fprintf(fpout,"//--------------------------------------------------\n\n");
2093 
2094     // For Vertex Shader
2095     if(NULL != pShaders->pVertexShader)
2096     {
2097         fprintf(fpout, "// ---------- %s Vertex Shader ----------\n\n", basename);
2098 
2099         // First, write out initial values
2100         // (because uniforms refer to them)
2101 
2102         _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_VS_initial_values", BASEMAX),
2103                              (u32 *) pShaders->pVertexShader->initialValues,
2104                              pShaders->pVertexShader->numInitialValues * sizeof(GX2UniformInitialValue), "");
2105 
2106         // Swap endian for GPU7
2107         if(swapMode != GFD_ENDIAN_SWAP_MODE_8_IN_32)
2108 	    {
2109             int nElem = (pShaders->pVertexShader->shaderSize + 0x3) / 4;
2110             GFDEndianSwap8in32((u32*)pShaders->pVertexShader->shaderPtr, nElem);
2111 	    }
2112 
2113 		if ( pShadersSrc && pShadersSrc->pVertexShader )
2114 		{
2115 			char *next_token = NULL;
2116 			char *p = strtok_s((char*)pShadersSrc->pVertexShader, "\r\n", &next_token);
2117 
2118 			fprintf(fpout, "// Source Vertex Shader\n");
2119 			while ( p )
2120 			{
2121 				fprintf(fpout, "// %s\n", p);
2122 				p = strtok_s(NULL, "\r\n", &next_token);
2123 			}
2124 			fprintf(fpout, "// End Source Vertex Shader\n");
2125 		}
2126 
2127         // Then, write out the shader pieces,
2128         // and finally, write the shader itself.
2129         _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_VS_shaderPtr", BASEMAX),
2130                              (u32 *) pShaders->pVertexShader->shaderPtr,
2131                              pShaders->pVertexShader->shaderSize, "ALIGNVAR(256)");
2132 
2133         _GFDGX2UniformBlockAsCode(fpout, makeName(tempname, basename, "_VS", BASEMAX),
2134                                   pShaders->pVertexShader->numUniformBlocks,
2135                                   pShaders->pVertexShader->uniformBlocks);
2136 
2137         _GFDWriteGX2UniformsAsCode(fpout, makeName(tempname, basename, "_VS", BASEMAX),
2138                                    pShaders->pVertexShader->numUniforms,
2139                                    pShaders->pVertexShader->uniformVars, pShaders->pVertexShader->initialValues);
2140 
2141         _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_VS_loop_vars", BASEMAX),
2142                              (u32 *) pShaders->pVertexShader->_loopVars,
2143                              pShaders->pVertexShader->_numLoops * sizeof(GFDLoopVar), "");
2144 
2145         _GFDWriteGX2SamplersAsCode(fpout, makeName(tempname, basename, "_VS", BASEMAX),
2146                                    pShaders->pVertexShader->numSamplers,
2147                                    pShaders->pVertexShader->samplerVars);
2148 
2149         _GFDWriteGX2AttributesAsCode(fpout, makeName(tempname, basename, "_VS", BASEMAX),
2150                                      pShaders->pVertexShader->numAttribs,
2151                                      pShaders->pVertexShader->attribVars);
2152 
2153         fprintf(fpout, "\n");
2154         fprintf(fpout, "\n");
2155         fprintf(fpout, "static GX2VertexShader %s_VS = {\n", basename);
2156         fprintf(fpout, "  { // _regs\n");
2157         _GFDWriteWordsAsHex(fpout, (u32 *) &pShaders->pVertexShader->_regs, sizeof(GFDVertexShaderRegs));
2158         fprintf(fpout, "\n  },\n");
2159 
2160         fprintf(fpout, "  %u,\n", pShaders->pVertexShader->shaderSize);
2161         fprintf(fpout, "  (void *) %s_VS_shaderPtr,\n", basename);
2162 
2163         fprintf(fpout, "  %s,\n", shaderModeName[(u32)pShaders->pVertexShader->shaderMode]);
2164 
2165         fprintf(fpout, "  %u,\n", pShaders->pVertexShader->numUniformBlocks);
2166         if (pShaders->pVertexShader->numUniformBlocks) {
2167             fprintf(fpout, "  %s_VS_uniform_blocks,\n", basename);
2168         } else {
2169             fprintf(fpout, "  NULL,\n");
2170         }
2171 
2172         fprintf(fpout, "  %u,\n", pShaders->pVertexShader->numUniforms);
2173         if (pShaders->pVertexShader->numUniforms) {
2174             fprintf(fpout, "  %s_VS_uniforms,\n", basename);
2175         } else {
2176             fprintf(fpout, "  NULL,\n");
2177         }
2178 
2179         fprintf(fpout, "  %u,\n", pShaders->pVertexShader->numInitialValues);
2180         if (pShaders->pVertexShader->numInitialValues) {
2181             fprintf(fpout, "  (GX2UniformInitialValue *) %s_VS_initial_values,\n", basename);
2182         } else {
2183             fprintf(fpout, "  NULL,\n");
2184         }
2185 
2186         fprintf(fpout, "  %u,\n", pShaders->pVertexShader->_numLoops);
2187         if (pShaders->pVertexShader->_numLoops) {
2188             fprintf(fpout, "  (GFDLoopVar *) %s_VS_loop_vars,\n", basename);
2189         } else {
2190             fprintf(fpout, "  NULL,\n");
2191         }
2192 
2193         fprintf(fpout, "  %u,\n", pShaders->pVertexShader->numSamplers);
2194         if (pShaders->pVertexShader->numSamplers) {
2195             fprintf(fpout, "  %s_VS_sampler_vars,\n", basename);
2196         } else {
2197             fprintf(fpout, "  NULL,\n");
2198         }
2199 
2200         fprintf(fpout, "  %u,\n", pShaders->pVertexShader->numAttribs);
2201         if (pShaders->pVertexShader->numAttribs) {
2202             fprintf(fpout, "  %s_VS_attrib_vars,\n", basename);
2203         } else {
2204             fprintf(fpout, "  NULL,\n");
2205         }
2206 
2207         fprintf(fpout, "  %d,\n", (u32)pShaders->pVertexShader->ringItemsize);
2208 
2209         fprintf(fpout, "  (GX2Boolean)%d,\n", (u32)pShaders->pVertexShader->hasStreamOut);
2210 
2211         fprintf(fpout, "  { \n");
2212         _GFDWriteWordsAsHex(fpout, (u32 *) &pShaders->pVertexShader->streamOutVertexStride, sizeof(u32)*GX2_MAX_STREAMOUT_BUFFERS);
2213         fprintf(fpout, "\n  },\n");
2214 
2215 		// end of structure
2216         fprintf(fpout, "};\n\n\n");
2217     }
2218 
2219     // For Pixel Shader
2220     if(NULL != pShaders->pPixelShader )
2221     {
2222         fprintf(fpout, "// ---------- %s Pixel Shader ----------\n\n", basename);
2223 
2224         // First, write out initial values
2225         // (because uniforms refer to them)
2226         _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_PS_initial_values", BASEMAX),
2227                              (u32 *) pShaders->pPixelShader->initialValues,
2228                              pShaders->pPixelShader->numInitialValues * sizeof(GX2UniformInitialValue), "");
2229 
2230         // Swap endian for GPU7
2231         if(swapMode != GFD_ENDIAN_SWAP_MODE_8_IN_32)
2232 	    {
2233             int nElem = ( pShaders->pPixelShader->shaderSize + 0x3) / 4;
2234             GFDEndianSwap8in32((u32*) pShaders->pPixelShader->shaderPtr, nElem);
2235         }
2236 
2237 		if ( pShadersSrc && pShadersSrc->pPixelShader )
2238 		{
2239 			char *next_token = NULL;
2240 			char *p = strtok_s((char*)pShadersSrc->pPixelShader, "\r\n", &next_token);
2241 
2242 			fprintf(fpout, "// Source Pixel Shader\n");
2243 			while ( p )
2244 			{
2245 				fprintf(fpout, "// %s\n", p);
2246 				p = strtok_s(NULL, "\r\n", &next_token);
2247 			}
2248 			fprintf(fpout, "// End Source Pixel Shader\n");
2249 		}
2250 
2251         // Then, write out the shader pieces,
2252         // and finally, write the shader itself.
2253         _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_PS_shaderPtr", BASEMAX),
2254                              (u32 *) pShaders->pPixelShader->shaderPtr,
2255                              pShaders->pPixelShader->shaderSize, "ALIGNVAR(256)");
2256 
2257         _GFDGX2UniformBlockAsCode(fpout, makeName(tempname, basename, "_PS", BASEMAX),
2258                                   pShaders->pPixelShader->numUniformBlocks,
2259                                   pShaders->pPixelShader->uniformBlocks);
2260 
2261         _GFDWriteGX2UniformsAsCode(fpout, makeName(tempname, basename, "_PS", BASEMAX),
2262                                    pShaders->pPixelShader->numUniforms,
2263                                    pShaders->pPixelShader->uniformVars, pShaders->pPixelShader->initialValues);
2264 
2265         _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_PS_loop_vars", BASEMAX),
2266                              (u32 *) pShaders->pPixelShader->_loopVars,
2267                              pShaders->pPixelShader->_numLoops * sizeof(GFDLoopVar), "");
2268 
2269         _GFDWriteGX2SamplersAsCode(fpout, makeName(tempname, basename, "_PS", BASEMAX),
2270                                    pShaders->pPixelShader->numSamplers,
2271                                    pShaders->pPixelShader->samplerVars);
2272 
2273         fprintf(fpout, "\n");
2274         fprintf(fpout, "\n");
2275         fprintf(fpout, "static GX2PixelShader %s_PS = {\n", basename);
2276         fprintf(fpout, "  { // _regs\n");
2277         _GFDWriteWordsAsHex(fpout, (u32 *) &pShaders->pPixelShader->_regs, sizeof(GFDPixelShaderRegs));
2278         fprintf(fpout, "\n  },\n");
2279 
2280         fprintf(fpout, "  %u,\n", pShaders->pPixelShader->shaderSize);
2281         fprintf(fpout, "  (void *) %s_PS_shaderPtr,\n", basename);
2282 
2283         fprintf(fpout, "  %s,\n", shaderModeName[(u32)pShaders->pPixelShader->shaderMode]);
2284 
2285         fprintf(fpout, "  %u,\n", pShaders->pPixelShader->numUniformBlocks);
2286         if (pShaders->pPixelShader->numUniformBlocks) {
2287             fprintf(fpout, "  %s_PS_uniform_blocks,\n", basename);
2288         } else {
2289             fprintf(fpout, "  NULL,\n");
2290         }
2291 
2292         fprintf(fpout, "  %u,\n", pShaders->pPixelShader->numUniforms);
2293         if (pShaders->pPixelShader->numUniforms) {
2294             fprintf(fpout, "  %s_PS_uniforms,\n", basename);
2295         } else {
2296             fprintf(fpout, "  NULL,\n");
2297         }
2298 
2299         fprintf(fpout, "  %u,\n", pShaders->pPixelShader->numInitialValues);
2300         if (pShaders->pPixelShader->numInitialValues) {
2301             fprintf(fpout, "  (GX2UniformInitialValue *) %s_PS_initial_values,\n", basename);
2302         } else {
2303             fprintf(fpout, "  NULL,\n");
2304         }
2305 
2306         fprintf(fpout, "  %u,\n", pShaders->pPixelShader->_numLoops);
2307         if (pShaders->pPixelShader->_numLoops) {
2308             fprintf(fpout, "  (GFDLoopVar *) %s_PS_loop_vars,\n", basename);
2309         } else {
2310             fprintf(fpout, "  NULL,\n");
2311         }
2312 
2313         fprintf(fpout, "  %u,\n", pShaders->pPixelShader->numSamplers);
2314         if (pShaders->pPixelShader->numSamplers) {
2315             fprintf(fpout, "  %s_PS_sampler_vars,\n", basename);
2316         } else {
2317             fprintf(fpout, "  NULL,\n");
2318         }
2319 
2320     	// end of structure
2321         fprintf(fpout, "};\n\n\n");
2322     }
2323 
2324     // For Geometry Shader
2325     if(NULL != pShaders->pGeometryShader )
2326     {
2327         fprintf(fpout, "// ---------- %s Geometry Shader ----------\n\n", basename);
2328 
2329         // First, write out initial values
2330         // (because uniforms refer to them)
2331         _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_GS_initial_values", BASEMAX),
2332                              (u32 *) pShaders->pGeometryShader->initialValues,
2333                              pShaders->pGeometryShader->numInitialValues * sizeof(GX2UniformInitialValue), "");
2334         // Swap endian for GPU7
2335         if(swapMode != GFD_ENDIAN_SWAP_MODE_8_IN_32)
2336 	    {
2337             int nElem = ( pShaders->pGeometryShader->shaderSize + 0x3) / 4;
2338             GFDEndianSwap8in32((u32*) pShaders->pGeometryShader->shaderPtr, nElem);
2339         }
2340 
2341 		if ( pShadersSrc && pShadersSrc->pGeometryShader )
2342 		{
2343 			char *next_token = NULL;
2344 			char *p = strtok_s((char*)pShadersSrc->pGeometryShader, "\r\n", &next_token);
2345 
2346 			fprintf(fpout, "// Source Geometry Shader\n");
2347 			while ( p )
2348 			{
2349 				fprintf(fpout, "// %s\n", p);
2350 				p = strtok_s(NULL, "\r\n", &next_token);
2351 			}
2352 			fprintf(fpout, "// End Source Geometry Shader\n");
2353 		}
2354 
2355         // Then, write out the shader pieces,
2356         // and finally, write the shader itself.
2357         _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_GS_shaderPtr", BASEMAX),
2358                              (u32 *) pShaders->pGeometryShader->shaderPtr,
2359                              pShaders->pGeometryShader->shaderSize, "ALIGNVAR(256)");
2360 
2361         // Swap endian for GPU7
2362         if(swapMode != GFD_ENDIAN_SWAP_MODE_8_IN_32)
2363 	    {
2364             int nElem = ( pShaders->pGeometryShader->copyShaderSize + 0x3) / 4;
2365             GFDEndianSwap8in32((u32*) pShaders->pGeometryShader->copyShaderPtr, nElem);
2366         }
2367 
2368         _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_GS_copyShaderPtr", BASEMAX),
2369                              (u32 *) pShaders->pGeometryShader->copyShaderPtr,
2370                              pShaders->pGeometryShader->copyShaderSize, "ALIGNVAR(256)");
2371 
2372         _GFDGX2UniformBlockAsCode(fpout, makeName(tempname, basename, "_GS", BASEMAX),
2373                                   pShaders->pGeometryShader->numUniformBlocks,
2374                                   pShaders->pGeometryShader->uniformBlocks);
2375 
2376         _GFDWriteGX2UniformsAsCode(fpout, makeName(tempname, basename, "_GS", BASEMAX),
2377                                    pShaders->pGeometryShader->numUniforms,
2378                                    pShaders->pGeometryShader->uniformVars, pShaders->pGeometryShader->initialValues);
2379 
2380         _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_GS_loop_vars", BASEMAX),
2381                              (u32 *) pShaders->pGeometryShader->_loopVars,
2382                              pShaders->pGeometryShader->_numLoops * sizeof(GFDLoopVar), "");
2383 
2384         _GFDWriteGX2SamplersAsCode(fpout, makeName(tempname, basename, "_GS", BASEMAX),
2385                                    pShaders->pGeometryShader->numSamplers,
2386                                    pShaders->pGeometryShader->samplerVars);
2387 
2388         fprintf(fpout, "\n");
2389         fprintf(fpout, "\n");
2390         fprintf(fpout, "static GX2GeometryShader %s_GS = {\n", basename);
2391         fprintf(fpout, "  { // _regs\n");
2392         _GFDWriteWordsAsHex(fpout, (u32 *) &pShaders->pGeometryShader->_regs, sizeof(GFDGeometryShaderRegs));
2393         fprintf(fpout, "\n  },\n");
2394 
2395         fprintf(fpout, "  %u,\n", pShaders->pGeometryShader->shaderSize);
2396         fprintf(fpout, "  (void *) %s_GS_shaderPtr,\n", basename);
2397         fprintf(fpout, "  %u,\n", pShaders->pGeometryShader->copyShaderSize);
2398         fprintf(fpout, "  (void *) %s_GS_copyShaderPtr,\n", basename);
2399         fprintf(fpout, "  %s,\n", shaderModeName[(u32)pShaders->pGeometryShader->shaderMode]);
2400 
2401         fprintf(fpout, "  %u,\n", pShaders->pGeometryShader->numUniformBlocks);
2402         if (pShaders->pGeometryShader->numUniformBlocks) {
2403             fprintf(fpout, "  %s_GS_uniform_blocks,\n", basename);
2404         } else {
2405             fprintf(fpout, "  NULL,\n");
2406         }
2407 
2408         fprintf(fpout, "  %u,\n", pShaders->pGeometryShader->numUniforms);
2409         if (pShaders->pGeometryShader->numUniforms) {
2410             fprintf(fpout, "  %s_GS_uniforms,\n", basename);
2411         } else {
2412             fprintf(fpout, "  NULL,\n");
2413         }
2414 
2415         fprintf(fpout, "  %u,\n", pShaders->pGeometryShader->numInitialValues);
2416         if (pShaders->pGeometryShader->numInitialValues) {
2417             fprintf(fpout, "  (GX2UniformInitialValue *) %s_GS_initial_values,\n", basename);
2418         } else {
2419             fprintf(fpout, "  NULL,\n");
2420         }
2421 
2422         fprintf(fpout, "  %u,\n", pShaders->pGeometryShader->_numLoops);
2423         if (pShaders->pGeometryShader->_numLoops) {
2424             fprintf(fpout, "  (GFDLoopVar *) %s_GS_loop_vars,\n", basename);
2425         } else {
2426             fprintf(fpout, "  NULL,\n");
2427         }
2428 
2429         fprintf(fpout, "  %u,\n", pShaders->pGeometryShader->numSamplers);
2430         if (pShaders->pGeometryShader->numSamplers) {
2431             fprintf(fpout, "  %s_GS_sampler_vars,\n", basename);
2432         } else {
2433             fprintf(fpout, "  NULL,\n");
2434         }
2435 
2436         fprintf(fpout, "  %d,\n", (u32)pShaders->pGeometryShader->ringItemsize);
2437 
2438 		fprintf(fpout, "  (GX2Boolean)%d,\n", (u32)pShaders->pGeometryShader->hasStreamOut);
2439 
2440         fprintf(fpout, "  { \n");
2441         _GFDWriteWordsAsHex(fpout, (u32 *) &pShaders->pGeometryShader->streamOutVertexStride, sizeof(u32)*GX2_MAX_STREAMOUT_BUFFERS);
2442         fprintf(fpout, "\n  },\n");
2443 
2444 		// end of structure
2445         fprintf(fpout, "};\n\n\n");
2446     }
2447 
2448 
2449     // For Compute Shader
2450     if(NULL != pShaders->pComputeShader)
2451     {
2452         fprintf(fpout, "// ---------- %s Compute Shader ----------\n\n", basename);
2453 
2454         // Write this first since the uniforms refer to them.
2455         _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_CS_initial_values", BASEMAX),
2456                              (u32 *) pShaders->pComputeShader->initialValues,
2457                              pShaders->pComputeShader->numInitialValues * sizeof(GX2UniformInitialValue), "");
2458 
2459         // Swap endian for GPU7
2460         if(swapMode != GFD_ENDIAN_SWAP_MODE_8_IN_32)
2461 	    {
2462             int nElem = (pShaders->pComputeShader->shaderSize + 0x3) / 4;
2463             GFDEndianSwap8in32((u32*)pShaders->pComputeShader->shaderPtr, nElem);
2464 	    }
2465 
2466 		if ( pShadersSrc && pShadersSrc->pComputeShader )
2467 		{
2468 			char *next_token = NULL;
2469 			char *p = strtok_s((char*)pShadersSrc->pComputeShader, "\r\n", &next_token);
2470 
2471 			fprintf(fpout, "// Source Compute Shader\n");
2472 			while ( p )
2473 			{
2474 				fprintf(fpout, "// %s\n", p);
2475 				p = strtok_s(NULL, "\r\n", &next_token);
2476 			}
2477 			fprintf(fpout, "// End Source Compute Shader\n");
2478 		}
2479 
2480         // Then, write out the shader pieces,
2481         // and finally, write the shader itself.
2482         _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_CS_shaderPtr", BASEMAX),
2483                              (u32 *) pShaders->pComputeShader->shaderPtr,
2484                              pShaders->pComputeShader->shaderSize, "ALIGNVAR(256)");
2485 
2486 		_GFDGX2UniformBlockAsCode(fpout, makeName(tempname, basename, "_CS", BASEMAX),
2487                                   pShaders->pComputeShader->numUniformBlocks,
2488                                   pShaders->pComputeShader->uniformBlocks);
2489 
2490         _GFDWriteGX2SamplersAsCode(fpout, makeName(tempname, basename, "_CS", BASEMAX),
2491                                    pShaders->pComputeShader->numSamplers,
2492                                    pShaders->pComputeShader->samplerVars);
2493 
2494         _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_CS_loop_vars", BASEMAX),
2495                              (u32 *) pShaders->pComputeShader->_loopVars,
2496                              pShaders->pComputeShader->_numLoops * sizeof(GFDLoopVar), "");
2497 
2498         _GFDWriteGX2UniformsAsCode(fpout, makeName(tempname, basename, "_CS", BASEMAX),
2499                                    pShaders->pComputeShader->numUniforms,
2500                                    pShaders->pComputeShader->uniformVars, pShaders->pComputeShader->initialValues);
2501 
2502         fprintf(fpout, "\n");
2503         fprintf(fpout, "\n");
2504         fprintf(fpout, "static GX2ComputeShader %s_CS = {\n", basename);
2505         fprintf(fpout, "  { // _regs\n");
2506         _GFDWriteWordsAsHex(fpout, (u32 *) &pShaders->pComputeShader->_regs, sizeof(GFDComputeShaderRegs));
2507         fprintf(fpout, "\n  },\n");
2508 
2509         fprintf(fpout, "  %u,\n", pShaders->pComputeShader->shaderSize);
2510         fprintf(fpout, "  (void *) %s_CS_shaderPtr,\n", basename);
2511 
2512 
2513         fprintf(fpout, "  %u,\n", pShaders->pComputeShader->numUniformBlocks);
2514         if (pShaders->pComputeShader->numUniformBlocks) {
2515             fprintf(fpout, "  %s_CS_uniform_blocks,\n", basename);
2516         } else {
2517             fprintf(fpout, "  NULL,\n");
2518         }
2519 
2520         fprintf(fpout, "  %u,\n", pShaders->pComputeShader->numUniforms);
2521         if (pShaders->pComputeShader->numUniforms) {
2522             fprintf(fpout, "  %s_CS_uniforms,\n", basename);
2523         } else {
2524             fprintf(fpout, "  NULL,\n");
2525         }
2526 
2527         fprintf(fpout, "  %u,\n", pShaders->pComputeShader->numInitialValues);
2528         if (pShaders->pComputeShader->numInitialValues) {
2529             fprintf(fpout, "  (GX2UniformInitialValue *) %s_CS_initial_values,\n", basename);
2530         } else {
2531             fprintf(fpout, "  NULL,\n");
2532         }
2533 
2534         fprintf(fpout, "  %u,\n", pShaders->pComputeShader->_numLoops);
2535         if (pShaders->pComputeShader->_numLoops) {
2536             fprintf(fpout, "  (GFDLoopVar *) %s_CS_loop_vars,\n", basename);
2537         } else {
2538             fprintf(fpout, "  NULL,\n");
2539         }
2540 
2541         fprintf(fpout, "  %u,\n", pShaders->pComputeShader->numSamplers);
2542         if (pShaders->pComputeShader->numSamplers) {
2543             fprintf(fpout, "  %s_CS_sampler_vars,\n", basename);
2544         } else {
2545             fprintf(fpout, "  NULL,\n");
2546         }
2547 
2548         fprintf(fpout, "  %u,\n", pShaders->pComputeShader->layout_size_x);
2549         fprintf(fpout, "  %u,\n", pShaders->pComputeShader->layout_size_y);
2550         fprintf(fpout, "  %u,\n", pShaders->pComputeShader->layout_size_z);
2551         fprintf(fpout, "  (GX2Boolean)%d,\n", (u32)pShaders->pComputeShader->Over64Mode);
2552         fprintf(fpout, "  %d,\n", (u32)pShaders->pComputeShader->numWavesPerSIMD);
2553 
2554 		// end of structure
2555         fprintf(fpout, "};\n\n\n");
2556     }
2557 
2558     GFDCloseFile(fpout);
2559 
2560     return true;
2561 }
2562 
GFDWriteFileShaderAsCode(char * pFilename,GFDEndianSwapMode swapMode,const GFDShaders * pShadersOrig)2563 GFD_DECLSPEC bool GFD_API GFDWriteFileShaderAsCode(char* pFilename, GFDEndianSwapMode swapMode, const GFDShaders *pShadersOrig)
2564 {
2565     GFDShaders2 shaders = {0};
2566 
2567     if (!pShadersOrig)
2568     {
2569         return false;
2570     }
2571 
2572     // Convert to new version of the structure
2573     shaders.abiVersion = GFD_DLL_ABI_VERSION;
2574     shaders.pVertexShader = pShadersOrig->pVertexShader;
2575     shaders.pGeometryShader = pShadersOrig->pGeometryShader;
2576     shaders.pPixelShader = pShadersOrig->pPixelShader;
2577 
2578     return GFDWriteFileShaderAsCodeWithSource(pFilename, swapMode, &shaders, NULL);
2579 }
2580 
GFDWriteFileShaderAsCode2(char * pFilename,GFDEndianSwapMode swapMode,const GFDShaders2 * pShaders)2581 GFD_DECLSPEC bool GFD_API GFDWriteFileShaderAsCode2(char* pFilename, GFDEndianSwapMode swapMode, const GFDShaders2 *pShaders)
2582 {
2583     if (!pShaders)
2584     {
2585         return false;
2586     }
2587 
2588     if ( GFD_DLL_ABI_TYPE(pShaders->abiVersion) != GFD_DLL_ABI_TYPE(GFD_DLL_ABI_VERSION) ||
2589          GFD_DLL_ABI_VERSION_NUM(pShaders->abiVersion) > 0)
2590     {
2591         return false;
2592     }
2593 
2594     return GFDWriteFileShaderAsCodeWithSource(pFilename, swapMode, pShaders, NULL);
2595 }
2596 
2597