/*---------------------------------------------------------------------------* Copyright (C) Nintendo. All rights reserved. These coded instructions, statements, and computer programs contain proprietary information of Nintendo of America Inc. and/or Nintendo Company Ltd., and are protected by Federal copyright law. They may not be disclosed to third parties or copied or duplicated in any form, in whole or in part, without the prior written consent of Nintendo. *---------------------------------------------------------------------------*/ #include "types.h" #include #include #include #include "windows/gx2.h" #include "cafe/gfd.h" #include "gfdFile.h" #define GSH_DEFAULT_FILENAME "out.gsh" // definitions for private structures // GFD specific Shader structures to repack structure between 32 bit and 64 bit typedef struct _GFDVertexShaderRegs { u32 reg[GX2_NUM_VERTEX_SHADER_REGISTERS]; } GFDVertexShaderRegs; typedef struct _GFDGeometryShaderRegs { u32 reg[GX2_NUM_GEOMETRY_SHADER_REGISTERS]; } GFDGeometryShaderRegs; typedef struct _GFDPixelShaderRegs { u32 reg[GX2_NUM_PIXEL_SHADER_REGISTERS]; } GFDPixelShaderRegs; typedef struct _GFDComputeShaderRegs { u32 reg[GX2_NUM_COMPUTE_SHADER_REGISTERS]; } GFDComputeShaderRegs; typedef struct _GFDVertexShader { GFDVertexShaderRegs _regs; u32 shaderSize; u32 shaderPtr; GX2ShaderMode shaderMode; u32 numUniformBlocks; u32 uniformBlocks; u32 numUniforms; u32 uniformVars; u32 numInitialValues; u32 initialValues; u32 _numLoops; u32 _loopVars; u32 numSamplers; u32 samplerVars; u32 numAttribs; u32 attribVars; u32 ringItemsize; u32 hasStreamOut; u32 streamOutVertexStride[GX2_MAX_STREAMOUT_BUFFERS]; GX2RBuffer shaderProgram; } GFDVertexShader; // GFD specific Shader structures to repack structure between 32 bit and 64 bit typedef struct _GFDGeometryShader { GFDGeometryShaderRegs _regs; u32 shaderSize; u32 shaderPtr; u32 copyShaderSize; u32 copyShaderPtr; GX2ShaderMode shaderMode; u32 numUniformBlocks; u32 uniformBlocks; u32 numUniforms; u32 uniformVars; u32 numInitialValues; u32 initialValues; u32 _numLoops; u32 _loopVars; u32 numSamplers; u32 samplerVars; u32 ringItemsize; u32 hasStreamOut; u32 streamOutVertexStride[GX2_MAX_STREAMOUT_BUFFERS]; GX2RBuffer shaderProgram; GX2RBuffer copyShaderProgram; } GFDGeometryShader; // GFD specific Shader structures to repack structure between 32 bit and 64 bit typedef struct _GFDPixelShader { GFDPixelShaderRegs _regs; u32 shaderSize; u32 shaderPtr; GX2ShaderMode shaderMode; u32 numUniformBlocks; u32 uniformBlocks; u32 numUniforms; u32 uniformVars; u32 numInitialValues; u32 initialValues; u32 _numLoops; u32 _loopVars; u32 numSamplers; u32 samplerVars; GX2RBuffer shaderProgram; } GFDPixelShader; typedef struct _GFDComputeShader { GFDComputeShaderRegs _regs; u32 shaderSize; u32 shaderPtr; u32 numUniformBlocks; u32 uniformBlocks; u32 numUniforms; u32 uniformVars; u32 numInitialValues; u32 initialValues; u32 _numLoops; u32 _loopVars; u32 numSamplers; u32 samplerVars; u32 layout_size_x; u32 layout_size_y; u32 layout_size_z; u32 Over64Mode; u32 numWavesPerSIMD; GX2RBuffer shaderProgram; } GFDComputeShader; // GFD specific Uniform Block structures to repack structure between 32 bit and 64 bit typedef struct _GFDUniformBlock { u32 name; u32 location; u32 size; } GFDUniformBlock; // GFD specific Uniform Var structures to repack structure between 32 bit and 64 bit typedef struct _GFDUniformVar { u32 name; GX2VarType type; u32 arrayCount; u32 offset; u32 blockIndex; } GFDUniformVar; // GFD specific Attrib Var structures to repack structure between 32 bit and 64 bit typedef struct _GFDAttribVar { u32 name; GX2VarType type; u32 arrayCount; u32 location; } GFDAttribVar; // GFD specific Sampler Var structures to repack structure between 32 bit and 64 bit typedef struct _GFDSamplerVar { u32 name; GX2SamplerType type; u32 location; } GFDSamplerVar; typedef struct _GFDLoopVar { u32 reg[GX2_NUM_LOOP_VAR_U32_WORDS]; } GFDLoopVar; // name conversions // Note: These arrays must be kept in sync with the enum lists in gx2Enum.h // Those lists must be kept in sync with the compiler output. // The latter check is done elsewhere. The former is below. static const char *varTypeName[] = { "GX2_VAR_TYPE_VOID", // 0 "GX2_VAR_TYPE_BOOL", // 1 "GX2_VAR_TYPE_INT", // 2 "GX2_VAR_TYPE_UINT", // 3 "GX2_VAR_TYPE_FLOAT", // 4 "GX2_VAR_TYPE_DOUBLE", // 5 "GX2_VAR_TYPE_DVEC2", // 6 "GX2_VAR_TYPE_DVEC3", // 7 "GX2_VAR_TYPE_DVEC4", // 8 "GX2_VAR_TYPE_VEC2", // 9 "GX2_VAR_TYPE_VEC3", // 10 "GX2_VAR_TYPE_VEC4", // 11 "GX2_VAR_TYPE_BVEC2", // 12 "GX2_VAR_TYPE_BVEC3", // 13 "GX2_VAR_TYPE_BVEC4", // 14 "GX2_VAR_TYPE_IVEC2", // 15 "GX2_VAR_TYPE_IVEC3", // 16 "GX2_VAR_TYPE_IVEC4", // 17 "GX2_VAR_TYPE_UVEC2", // 18 "GX2_VAR_TYPE_UVEC3", // 19 "GX2_VAR_TYPE_UVEC4", // 20 "GX2_VAR_TYPE_MAT2", // 21 "GX2_VAR_TYPE_MAT2X3", // 22 "GX2_VAR_TYPE_MAT2X4", // 23 "GX2_VAR_TYPE_MAT3X2", // 24 "GX2_VAR_TYPE_MAT3", // 25 "GX2_VAR_TYPE_MAT3X4", // 26 "GX2_VAR_TYPE_MAT4X2", // 27 "GX2_VAR_TYPE_MAT4X3", // 28 "GX2_VAR_TYPE_MAT4", // 29 "GX2_VAR_TYPE_DMAT2", // 30 "GX2_VAR_TYPE_DMAT2X3", // 31 "GX2_VAR_TYPE_DMAT2X4", // 32 "GX2_VAR_TYPE_DMAT3X2", // 33 "GX2_VAR_TYPE_DMAT3", // 34 "GX2_VAR_TYPE_DMAT3X4", // 35 "GX2_VAR_TYPE_DMAT4X2", // 36 "GX2_VAR_TYPE_DMAT4X3", // 37 "GX2_VAR_TYPE_DMAT4" // 38 }; static const char *samplerTypeName[] = { "GX2_SAMPLER_TYPE_1D", // 0 "GX2_SAMPLER_TYPE_2D", // 1 "GX2_SAMPLER_TYPE_2D_RECT", // 2 "GX2_SAMPLER_TYPE_3D", // 3 "GX2_SAMPLER_TYPE_CUBE", // 4 "GX2_SAMPLER_TYPE_1D_SHADOW", // 5 "GX2_SAMPLER_TYPE_2D_SHADOW", // 6 "GX2_SAMPLER_TYPE_2D_RECT_SHADOW", // 7 "GX2_SAMPLER_TYPE_CUBE_SHADOW", // 8 "GX2_SAMPLER_TYPE_1D_ARRAY", // 9 "GX2_SAMPLER_TYPE_2D_ARRAY", // 10 "GX2_SAMPLER_TYPE_1D_ARRAY_SHADOW", // 11 "GX2_SAMPLER_TYPE_2D_ARRAY_SHADOW", // 12 "GX2_SAMPLER_TYPE_CUBE_ARRAY", // 13 "GX2_SAMPLER_TYPE_CUBE_ARRAY_SHADOW", // 14 "GX2_SAMPLER_TYPE_BUFFER", // 15 "GX2_SAMPLER_TYPE_RENDERBUFFER", // 16 "GX2_SAMPLER_TYPE_2D_MS", // 17 "GX2_SAMPLER_TYPE_2D_MS_ARRAY", // 18 "GX2_SAMPLER_TYPE_INT_1D", // 19 "GX2_SAMPLER_TYPE_INT_2D", // 20 "GX2_SAMPLER_TYPE_INT_2D_RECT", // 21 "GX2_SAMPLER_TYPE_INT_3D", // 22 "GX2_SAMPLER_TYPE_INT_CUBE", // 23 "GX2_SAMPLER_TYPE_INT_1D_ARRAY", // 24 "GX2_SAMPLER_TYPE_INT_2D_ARRAY", // 25 "GX2_SAMPLER_TYPE_INT_CUBE_ARRAY", // 26 "GX2_SAMPLER_TYPE_INT_BUFFER", // 27 "GX2_SAMPLER_TYPE_INT_RENDERBUFFER", // 28 "GX2_SAMPLER_TYPE_INT_2D_MS", // 29 "GX2_SAMPLER_TYPE_INT_2D_MS_ARRAY", // 30 "GX2_SAMPLER_TYPE_UNSIGNED_INT_1D", // 31 "GX2_SAMPLER_TYPE_UNSIGNED_INT_2D", // 32 "GX2_SAMPLER_TYPE_UNSIGNED_INT_2D_RECT", // 33 "GX2_SAMPLER_TYPE_UNSIGNED_INT_3D", // 34 "GX2_SAMPLER_TYPE_UNSIGNED_INT_CUBE", // 35 "GX2_SAMPLER_TYPE_UNSIGNED_INT_1D_ARRAY", // 36 "GX2_SAMPLER_TYPE_UNSIGNED_INT_2D_ARRAY", // 37 "GX2_SAMPLER_TYPE_UNSIGNED_INT_CUBE_ARRAY", // 38 "GX2_SAMPLER_TYPE_UNSIGNED_INT_BUFFER", // 39 "GX2_SAMPLER_TYPE_UNSIGNED_INT_RENDERBUFFER", // 40 "GX2_SAMPLER_TYPE_UNSIGNED_INT_2D_MS", // 41 "GX2_SAMPLER_TYPE_UNSIGNED_INT_2D_MS_ARRAY" // 42 }; static const char *shaderModeName[] = { "GX2_SHADER_MODE_UNIFORM_REGISTER", "GX2_SHADER_MODE_UNIFORM_BLOCK", "GX2_SHADER_MODE_GEOMETRY_SHADER" }; /// Cleans out extra debug flags attached to offset u32 GFDCleanTag(u32 Offset) {return Offset & ~GFD_TAG_MASK;} /// Verifies offset stored in file is tagged with GFD_TAG_DAT BOOL GFDCheckTagDAT(u32 Offset) {return (Offset & GFD_TAG_MASK) == GFD_TAG_DAT;} /// Verifies offset stored in file is tagged with GFD_TAG_STR BOOL GFDCheckTagSTR(u32 Offset) {return (Offset & GFD_TAG_MASK) == GFD_TAG_STR;} /// A block of null terminated strings all stored in the same char array /// All strings are padded with zeros at the end to be integer multiple of words (4) long. typedef struct _GFDStringTable { u32 m_n; ///< Current number of strings u32 m_nDB; ///< Current size of pDataBlock in chars u32 m_maxDB; ///< Max size of data block, again in chars char * m_pDB; ///< Block of data containing all the strings; } GFDStringTable; /// A table of data segments, stored contiguously in the same int array as [Size, Data] pairs. /// Data Tables are very similar to String Tables. They just point arrays of data /// rather than arrays null terminated strings. typedef struct _GFDDataTable { u32 m_n; ///< Current number of data blocks in hunk (good for error checking) u32 m_nDB; ///< Current size (e.g. next writeable offset) of m_pDB in chars u32 m_maxDB; ///< Max size of data block, again in chars char * m_pDB; ///< Block of data containing all the data } GFDDataTable; /// Create a new empty string table, with space for max strings; GFDStringTable *GFDCreateStringTable(u32 max) { GFDStringTable *pTable = (GFDStringTable *) malloc(sizeof(GFDStringTable)); if ( pTable ) { pTable->m_n = 0; pTable->m_nDB = 0; pTable->m_maxDB = max; pTable->m_pDB = (char *) malloc(pTable->m_maxDB * sizeof(char)); memset(pTable->m_pDB, 0, pTable->m_maxDB * sizeof(char)); } else { printf("Error! Failed to create string table!\n"); } return pTable; } /// Frees the string table and all data associated with it void GFDDestroyStringTable(GFDStringTable *pTable) { if(pTable) { free(pTable->m_pDB); memset(pTable, 0, sizeof(GFDStringTable)); // paranioa to avoid any chance of reuse free(pTable); } } /// Grows maximum space in the string table to the new size. Will not make table smaller. void GFDGrowStringTableDB(GFDStringTable *pTable, u32 newMaxDB) { if(newMaxDB > pTable->m_maxDB) // if want more space, malloc it up, and copy old data over { char *pDB = (char*) malloc(newMaxDB * sizeof(char)); // create new memory memset(pDB, 0, newMaxDB * sizeof(char)); memcpy(pDB, pTable->m_pDB, pTable->m_maxDB); // copy old data into this new location free(pTable->m_pDB); // cleanup, and save new values pTable->m_pDB = pDB; pTable->m_maxDB = newMaxDB; } } /// Adds null terminated string to the string table. /// Returns offset into the string table of the begining of this string u32 GFDAddStringTable(GFDStringTable *pTable, const char *str) { size_t len = strlen(str); // (We want our strings to be word aligned to make transfer accross 'network' easier) int len_pad = (len + 1 + 3) & ~0x3; // round length up to word boundary.. (+1 for first terminating 0) if(pTable->m_nDB + len_pad + 1 >= pTable->m_maxDB) { int newSize = pTable->m_nDB; if(len_pad >= newSize) newSize = len_pad*2; // be safe for really long strings newSize = (newSize + 0x1f) & ~0x1f; // round to nice size GFDGrowStringTableDB(pTable, pTable->m_nDB + newSize); // double size if needed } // We could check here for a duplicate string already in the table, // and return that index, but let's skip that optimization for now. int n = pTable->m_n; int off = pTable->m_nDB; // copy string into the string table strncpy_s(pTable->m_nDB + pTable->m_pDB, len+1, str, len+1); int zero = 0; // fill trailing pad with zeros too... strncpy_s(pTable->m_nDB + pTable->m_pDB + len + 1, sizeof(u32), (char*) &zero, len_pad - len-1); // update our structs pointing to it pTable->m_nDB += len_pad; pTable->m_n += 1; return off | GFD_TAG_STR; } /// Create a new empty string table, with space for max strings; GFDDataTable *GFDCreateDataTable(u32 max) { GFDDataTable *pTable = (GFDDataTable *) malloc(sizeof(GFDDataTable)); if ( pTable ) { pTable->m_n = 0; pTable->m_nDB = 0; pTable->m_maxDB = max * sizeof(u32); // assume chars are average of 8 or so in size (will grow separatly from ppStrings) pTable->m_pDB = (char *) malloc(pTable->m_maxDB * sizeof(char)); memset(pTable->m_pDB, 0xbb, pTable->m_maxDB * sizeof(char)); } else { printf("Error! Failed to create data table!\n"); } return pTable; } /// Frees the string table and all data associated with it void GFDDestroyDataTable(GFDDataTable *pTable) { if(pTable) { free(pTable->m_pDB); memset(pTable, 0, sizeof(GFDStringTable)); // paranioa to avoid any chance of reuse free(pTable); } } /// Grows maximum space of data table to the new size, in bytes. Will not make table smaller. void GFDGrowDataTableDB(GFDDataTable *pTable, u32 newMaxDB) { if(newMaxDB > pTable->m_maxDB) // if want more space, malloc it up, and copy old data over { char *pDB = (char *) malloc(newMaxDB * sizeof(char)); // create new memory memset(pDB, 0, newMaxDB * sizeof(char)); memcpy(pDB, pTable->m_pDB, pTable->m_maxDB); // copy old data into this new location free(pTable->m_pDB); // cleanup, and save new values pTable->m_pDB = pDB; pTable->m_maxDB = newMaxDB; } } /// Adds a new hunk of data to the data table, of specified length in bytes. /// nBytes must be integer multiple of 4. /// Returns byte offset into that table. /// adds new block to our hunk table. Returns new size u32 GFDAddDataTable(GFDDataTable *pTable, void *data, u32 nBytes) { assert( (nBytes & 0x3) == 0 && "nBytes must be multiple of 4"); if(pTable->m_nDB + nBytes + sizeof(u32) >= pTable->m_maxDB) { size_t newSize = pTable->m_nDB; // double size if(nBytes >= newSize) newSize = nBytes*2; // if doubling isn't enough, make bigger int finalSize = (pTable->m_nDB + newSize + 0x1f) & ~0x1f; // finally round to nearest 32 bytes GFDGrowDataTableDB(pTable, finalSize); } u32 off = pTable->m_nDB; // offset is data, after the length... memcpy(pTable->m_pDB + pTable->m_nDB, data, nBytes); // update our structs pointing to it pTable->m_nDB += nBytes; pTable->m_n += 1; return off | GFD_TAG_DAT; // OR in this silly constant to use for catching errors } //-------------------------------------------------------------------------- /// Repack a vertex shader from a 64-bit structure to a 32-bit structure. /// We output a GX2VertexShader * for convenience, but it is not valid for 64-bit. /// All pointers are cast to 32-bit integers. It is therefore 7*4 bytes shorter. /// The return value is the resulting 32-bit structure size. u32 GFDRepackVertexShaderFor32Bit(GX2VertexShader *pVSin64, GFDVertexShader *pVSout32) { assert(sizeof(pVSout32->_regs) == sizeof(pVSin64->_regs)); memcpy(&pVSout32->_regs, pVSin64->_regs, sizeof(pVSout32->_regs)); pVSout32->shaderSize = pVSin64->shaderSize; pVSout32->shaderPtr = (u32) pVSin64->shaderPtr; pVSout32->shaderMode = pVSin64->shaderMode; pVSout32->numUniformBlocks = pVSin64->numUniformBlocks; pVSout32->uniformBlocks = (u32) pVSin64->uniformBlocks; pVSout32->numUniforms = pVSin64->numUniforms; pVSout32->uniformVars = (u32) pVSin64->uniformVars; pVSout32->numInitialValues = pVSin64->numInitialValues; pVSout32->initialValues = (u32) pVSin64->initialValues; pVSout32->_numLoops = pVSin64->_numLoops; pVSout32->_loopVars = (u32) pVSin64->_loopVars; pVSout32->numSamplers = pVSin64->numSamplers; pVSout32->samplerVars = (u32) pVSin64->samplerVars; pVSout32->numAttribs = pVSin64->numAttribs; pVSout32->attribVars = (u32) pVSin64->attribVars; pVSout32->ringItemsize = pVSin64->ringItemsize; pVSout32->hasStreamOut = (u32) pVSin64->hasStreamOut; assert(sizeof(pVSout32->streamOutVertexStride) == sizeof(pVSin64->streamOutVertexStride)); memcpy(&pVSout32->streamOutVertexStride, pVSin64->streamOutVertexStride, sizeof(pVSout32->streamOutVertexStride)); pVSout32->shaderProgram = pVSin64->shaderProgram; return sizeof(GFDVertexShader); } /// Repack a pixel shader from a 64-bit structure to a 32-bit structure. /// We output a GX2PixelShader * for convenience, but it is not valid for 64-bit. /// All pointers are cast to 32-bit integers. It is therefore 6*4 bytes shorter. /// The return value is the resulting 32-bit structure size. u32 GFDRepackPixelShaderFor32Bit(GX2PixelShader *pPSin64, GFDPixelShader *pPSout32) { assert(sizeof(pPSout32->_regs) == sizeof(pPSin64->_regs)); memcpy(&pPSout32->_regs, pPSin64->_regs, sizeof(pPSout32->_regs)); pPSout32->shaderSize = pPSin64->shaderSize; pPSout32->shaderPtr = (u32) pPSin64->shaderPtr; pPSout32->shaderMode = pPSin64->shaderMode; pPSout32->numUniformBlocks = pPSin64->numUniformBlocks; pPSout32->uniformBlocks = (u32) pPSin64->uniformBlocks; pPSout32->numUniforms = pPSin64->numUniforms; pPSout32->uniformVars = (u32) pPSin64->uniformVars; pPSout32->numInitialValues = pPSin64->numInitialValues; pPSout32->initialValues = (u32) pPSin64->initialValues; pPSout32->_numLoops = pPSin64->_numLoops; pPSout32->_loopVars = (u32) pPSin64->_loopVars; pPSout32->numSamplers = pPSin64->numSamplers; pPSout32->samplerVars = (u32) pPSin64->samplerVars; pPSout32->shaderProgram = pPSin64->shaderProgram; return sizeof(GFDPixelShader); } /// Repack a geometry shader from a 64-bit structure to a 32-bit structure. /// We output a GX2GeometryShader * for convenience, but it is not valid for 64-bit. /// All pointers are cast to 32-bit integers. It is therefore 6*4 bytes shorter. /// The return value is the resulting 32-bit structure size. u32 GFDRepackGeometryShaderFor32Bit(GX2GeometryShader *pGSin64, GFDGeometryShader *pGSout32) { assert(sizeof(pGSout32->_regs) == sizeof(pGSin64->_regs)); memcpy(&pGSout32->_regs, pGSin64->_regs, sizeof(pGSout32->_regs)); pGSout32->shaderSize = pGSin64->shaderSize; pGSout32->shaderPtr = (u32) pGSin64->shaderPtr; pGSout32->copyShaderSize = pGSin64->copyShaderSize; pGSout32->copyShaderPtr = (u32) pGSin64->copyShaderPtr; pGSout32->shaderMode = pGSin64->shaderMode; pGSout32->numUniformBlocks = pGSin64->numUniformBlocks; pGSout32->uniformBlocks = (u32) pGSin64->uniformBlocks; pGSout32->numUniforms = pGSin64->numUniforms; pGSout32->uniformVars = (u32) pGSin64->uniformVars; pGSout32->numInitialValues = pGSin64->numInitialValues; pGSout32->initialValues = (u32) pGSin64->initialValues; pGSout32->_numLoops = pGSin64->_numLoops; pGSout32->_loopVars = (u32) pGSin64->_loopVars; pGSout32->numSamplers = pGSin64->numSamplers; pGSout32->samplerVars = (u32) pGSin64->samplerVars; pGSout32->ringItemsize = pGSin64->ringItemsize; pGSout32->hasStreamOut = (u32) pGSin64->hasStreamOut; assert(sizeof(pGSout32->streamOutVertexStride) == sizeof(pGSin64->streamOutVertexStride)); memcpy(&pGSout32->streamOutVertexStride, pGSin64->streamOutVertexStride, sizeof(pGSout32->streamOutVertexStride)); pGSout32->shaderProgram = pGSin64->shaderProgram; pGSout32->copyShaderProgram = pGSin64->copyShaderProgram; return sizeof(GFDGeometryShader); } /// Repack a compute shader from a 64-bit structure to a 32-bit structure. /// We output a GX2ComputeShader * for convenience, but it is not valid for 64-bit. /// All pointers are cast to 32-bit integers. It is therefore 7*4 bytes shorter. /// The return value is the resulting 32-bit structure size. u32 GFDRepackComputeShaderFor32Bit(GX2ComputeShader *pCSin64, GFDComputeShader *pCSout32) { assert(sizeof(pCSout32->_regs) == sizeof(pCSin64->_regs)); memcpy(&pCSout32->_regs, pCSin64->_regs, sizeof(pCSout32->_regs)); pCSout32->shaderSize = pCSin64->shaderSize; pCSout32->shaderPtr = (u32) pCSin64->shaderPtr; pCSout32->numUniformBlocks = pCSin64->numUniformBlocks; pCSout32->uniformBlocks = (u32) pCSin64->uniformBlocks; pCSout32->numUniforms = pCSin64->numUniforms; pCSout32->uniformVars = (u32) pCSin64->uniformVars; pCSout32->numInitialValues = pCSin64->numInitialValues; pCSout32->initialValues = (u32) pCSin64->initialValues; pCSout32->_numLoops = pCSin64->_numLoops; pCSout32->_loopVars = (u32) pCSin64->_loopVars; pCSout32->numSamplers = pCSin64->numSamplers; pCSout32->samplerVars = (u32) pCSin64->samplerVars; pCSout32->layout_size_x = pCSin64->layout_size_x; pCSout32->layout_size_y = pCSin64->layout_size_y; pCSout32->layout_size_z = pCSin64->layout_size_z; pCSout32->Over64Mode = pCSin64->Over64Mode; pCSout32->numWavesPerSIMD = pCSin64->numWavesPerSIMD; pCSout32->shaderProgram = pCSin64->shaderProgram; return sizeof(GFDComputeShader); } /// Repack a uniform block array from a 64-bit structure to a 32-bit structure. /// We output a GX2UniformBlock * for convenience, but it is not valid for 64-bit. /// All pointers are cast to 32-bit integers. It is therefore 1*4*n bytes shorter. /// The return value is the resulting 32-bit structure size. u32 GFDRepackUniformBlockArrayFor32Bit(GX2UniformBlock *pUBin64, GFDUniformBlock *pUBout32, u32 n) { for(u32 i=0; im_nDB; // current offset... // s1: Store each uniform block name (in common string table), as well as pointer to the initial value in uniform block for(u32 i = 0; i < vsCopy.numUniformBlocks; i++) { pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, vsCopy.uniformBlocks[i].name ); pAddr[nE] = pOffset[oUniformBuffers] + i * sizeof(GFDUniformBlock) + (u32)&vsCopy.uniformBlocks[i].name - (u32)&vsCopy.uniformBlocks[i]; nE++; } // s2: Store each uniform name (in common string table) for(u32 i = 0; i < vsCopy.numUniforms; i++) { pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, vsCopy.uniformVars[i].name ); pAddr[nE] = pOffset[oUniforms] + i * sizeof(GFDUniformVar) + (u32)&vsCopy.uniformVars[i].name - (u32)&vsCopy.uniformVars[i]; nE++; } // s3: Store each sampler name (in common string table) for(u32 i = 0; i < vsCopy.numSamplers; i++) { pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, vsCopy.samplerVars[i].name ); pAddr[nE] = pOffset[oSamplers] + i * sizeof(GFDSamplerVar) + (u32)&vsCopy.samplerVars[i].name - (u32)&vsCopy.samplerVars[i]; nE++; } // s4: Store each attrib name (in common string table) for(u32 i = 0; i < vsCopy.numAttribs; i++) { pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, vsCopy.attribVars[i].name ); pAddr[nE] = pOffset[oAttrib_names] + i * sizeof(GFDAttribVar) + (u32)&vsCopy.attribVars[i].name - (u32)&vsCopy.attribVars[i]; nE++; } // 8: Store the string table (watch out for 1-3 bytes of padding) int oStringTable = nE; // Note, although arrays of chars don't seem to be modified to go into network order, we cache our // stringtable as a block in a word array which does get byte-flipped. So let's pre-flip it here // so it comes out right. int nWordsStrTbl = (pStrTable->m_nDB + 0x3) / 4; GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl); pOffset[nE] = GFDAddDataTable(pDT, pStrTable->m_pDB, nWordsStrTbl*4); // simply write out string table data pAddr[nE] = 0; // don't patch this location nE++; // let's convert it back so if we read it latter, we won't have problems GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl); // end0: Turn all our patch addresses to offsets for(int i = 0; i < nE; i++) { if(pAddr[i] != 0) { *((u32*) (pDT->m_pDB + GFDCleanTag(pAddr[i]))) = pOffset[i]; } } // 9: Store the offset patch list // (not *really* needed, could reconstruct if know all data types, but makes it a *lot* easier) // After reading data block in at addrX, increment each location i of *(addrX + pAddr[i]) += addrX; // By putting this after main, we can allocate space for it at same time and dellocate it when main goes away, // without affecting the main data. int oPatchTable = nE; pOffset[nE] = GFDAddDataTable(pDT, pAddr, nElements * sizeof(u32)); // simply write out zeros now.. pAddr[nE] = 0; // don't patch this location either nE++; // 10: Finally, a small structure describing this data block. GFDBlockRelocationHeader *vshTrailer = (GFDBlockRelocationHeader *) malloc( sizeof(GFDBlockRelocationHeader) ); memset(vshTrailer, 0, sizeof(GFDBlockRelocationHeader)); vshTrailer->magic = GFD_SWAP_8_IN_32(GFD_BLOCK_RELOCATION_HEADER_MAGIC); vshTrailer->type = 0; vshTrailer->size = sizeof(GFDBlockRelocationHeader); int oTrailer = nE; // Fill in our trailer and write it out vshTrailer->dataSize = GFDCleanTag(pOffset[oPatchTable]) - GFDCleanTag(pOffset[oMain]); // size of the main data section (allocate this size of contiguous memory) vshTrailer->dataOffset = pOffset[oMain]; // offset of the main data section in this block vshTrailer->stringTableCharNumber = pStrTable->m_nDB; // number of characters in the string table vshTrailer->stringTableOffset = pOffset[oStringTable]; // offset of string table in this block vshTrailer->patchTableOffsetNumber = nE; // number of offsets in the patch table vshTrailer->patchTableOffset = pOffset[oPatchTable] ; // offset of the patch table in this block pOffset[nE] = GFDAddDataTable(pDT, vshTrailer, sizeof(GFDBlockRelocationHeader)); pAddr[nE] = 0; // don't patch this location nE++; free(vshTrailer); vshTrailer = NULL; assert(offStringTable == GFDCleanTag(pOffset[oStringTable]) && "Guess for offset table in 7a was wrong"); assert(nE <= nElements && "Too few offsets allocated"); free(pAddr); free(pOffset); GFDDestroyStringTable( pStrTable ); return pDT; } /// Create the flat datablock representation of a GX2PixelShader structure /// Call GFDDataTableDestroy() on returned object once doen with it. GFDDataTable* GFDCreateBlockRelocateHeaderPSH(GX2PixelShader *pPS) { // Create second data structure to hold flattened, offseted version of our original shader GX2PixelShader psCopy; memcpy(&psCopy, pPS, sizeof(GX2PixelShader)); // Walk thru copy, converting all pointers to data blocks in table, and changing // addresses to offsets into the data block // Create data table to hold the structure elements GFDDataTable *pDT = GFDCreateDataTable(sizeof(GX2PixelShader) + psCopy.numUniforms * 8 + 1024); // todo - pick better number // How many pointers do we need to patch? (The 10 here is empirically determined - asserts at end if if wrong) // 10 = 1 (shader itself) + 6 in GX2PixelShader + 1 (string table) + 1 (patch list) + 1 (trailer) int nElements = 10 + psCopy.numUniformBlocks + 1*psCopy.numUniforms + psCopy.numSamplers; int nE = 0; u32 size; u32 *pAddr = (u32*) malloc( nElements * sizeof(u32)); // src offsets into data block that need patching u32 *pOffset = (u32*) malloc( nElements * sizeof(u32)); // dst offsets into data block for, containing offets to write memset(pOffset, 0, nElements * sizeof(u32)); memset(pAddr, 0, nElements * sizeof(u32)); // 0: Store main structure itself (we'll rewrite offsets at the end). // For allocation and alignment purposes, this needs to be the first hunk in the data table GFDPixelShader psCopy32; size = GFDRepackPixelShaderFor32Bit(&psCopy, &psCopy32); int oMain = nE; pOffset[nE] = GFDAddDataTable(pDT, &psCopy32, size); pAddr[nE] = 0; // don't patch this location nE++; // 1: Store uniform block array GFDUniformBlock *pUB = (GFDUniformBlock *) malloc(sizeof(GFDUniformBlock)*psCopy.numUniformBlocks); size = GFDRepackUniformBlockArrayFor32Bit(psCopy.uniformBlocks, pUB, psCopy.numUniformBlocks); int oUniformBuffers = nE; pOffset[nE] = GFDAddDataTable(pDT, pUB, size); pAddr[nE] = (psCopy.numUniformBlocks == 0) ? 0 : pOffset[oMain] + (u32)&psCopy32.uniformBlocks - (u32)&psCopy32; nE++; free(pUB); // 2: Store uniform array GFDUniformVar *pUV = (GFDUniformVar *) malloc(sizeof(GFDUniformVar)*psCopy.numUniforms); size = GFDRepackUniformVarArrayFor32Bit(psCopy.uniformVars, pUV, psCopy.numUniforms); int oUniforms = nE; pOffset[nE] = GFDAddDataTable(pDT, pUV, size); pAddr[nE] = (psCopy.numUniforms == 0) ? 0 : pOffset[oMain] + (u32)&psCopy32.uniformVars - (u32)&psCopy32; nE++; free(pUV); // 3: Store uniform initial values // Note initial values points to uniform block, a contiguous section of GX2UniformInitialValues int oUniformData = nE; pOffset[nE] = GFDAddDataTable(pDT, psCopy.initialValues, psCopy.numInitialValues * sizeof(GX2UniformInitialValue)); pAddr[nE] = (psCopy.numInitialValues == 0) ? 0 : pOffset[oMain] + (u32)&psCopy32.initialValues - (u32)&psCopy32; nE++; // 4: Store loop array int oLoops = nE; pOffset[nE] = GFDAddDataTable(pDT, psCopy._loopVars, psCopy._numLoops * sizeof(GFDLoopVar)); pAddr[nE] = (psCopy._numLoops == 0) ? 0 : pOffset[oMain] + (u32)&psCopy32._loopVars - (u32)&psCopy32; nE++; // 5: Store sampler descriptors GFDSamplerVar *pSV = (GFDSamplerVar *) malloc(sizeof(GFDSamplerVar)*psCopy.numSamplers); size = GFDRepackSamplerVarArrayFor32Bit(psCopy.samplerVars, pSV, psCopy.numSamplers); int oSamplers = nE; pOffset[nE] = GFDAddDataTable(pDT, pSV, size); pAddr[nE] = (psCopy.numSamplers == 0) ? 0 : pOffset[oMain] + (u32)&psCopy32.samplerVars - (u32)&psCopy32; nE++; free(pSV); // 6a: Create a string table to store all the strings in const int kAvgCharsPerString = 12; // will auto-grow if actually biger GFDStringTable *pStrTable = GFDCreateStringTable( ( psCopy.numUniformBlocks + psCopy.numUniforms + psCopy.numSamplers) * kAvgCharsPerString); u32 offStringTable = pDT->m_nDB; // current offset... // s1: Store each uniform block name (in common string table), as well as pointer to the initial value in uniform block for(u32 i = 0; i < psCopy.numUniformBlocks; i++) { pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, psCopy.uniformBlocks[i].name ); pAddr[nE] = pOffset[oUniformBuffers] + i * sizeof(GFDUniformBlock) + (u32)&psCopy.uniformBlocks[i].name - (u32)&psCopy.uniformBlocks[i]; nE++; } // s2: Store each uniform name (in common string table) for(u32 i = 0; i < psCopy.numUniforms; i++) { pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, psCopy.uniformVars[i].name ); pAddr[nE] = pOffset[oUniforms] + i * sizeof(GFDUniformVar) + (u32)&psCopy.uniformVars[i].name - (u32)&psCopy.uniformVars[i]; nE++; } // s3: Store each sampler name (in common string table) for(u32 i = 0; i < psCopy.numSamplers; i++) { pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, psCopy.samplerVars[i].name ); pAddr[nE] = pOffset[oSamplers] + i * sizeof(GFDSamplerVar) + (u32)&psCopy.samplerVars[i].name - (u32)&psCopy.samplerVars[i]; nE++; } // 7: Store the string table (watch out for 1-3 bytes of padding) int oStringTable = nE; // Note, although arrays of chars don't seem to be modified to go into network order, we cache our // stringtable as a block in a word array which does get byte-flipped. So let's pre-flip it here // so it comes out right. int nWordsStrTbl = (pStrTable->m_nDB + 0x3) / 4; GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl); pOffset[nE] = GFDAddDataTable(pDT, pStrTable->m_pDB, nWordsStrTbl*4); // simply write out string table data pAddr[nE] = 0; // don't patch this location nE++; // let's convert it back so if we read it latter, we won't have problems GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl); // end0: Turn all our patch addresses to offsets for(int i = 0; i < nE; i++) { if(pAddr[i] != 0) { *((u32*) (pDT->m_pDB + GFDCleanTag(pAddr[i]))) = pOffset[i]; } } // 8: Store the offset patch list // (not *really* needed, could reconstruct if know all data types, but makes *alot* easier) // After reading data block in at addrX, increment each location i of *(addrX + pAddr[i]) += addrX; // By putting this after main, we can allocate space for it at same time and dellocate it when main goes away, // without affecting the main data. int oPatchTable = nE; pOffset[nE] = GFDAddDataTable(pDT, pAddr, nElements * sizeof(u32)); // simply write out zeros now.. pAddr[nE] = 0; // don't patch this location either nE++; // 9: Finally, a small structure describing this data block. GFDBlockRelocationHeader *pshTrailer = (GFDBlockRelocationHeader *) malloc( sizeof(GFDBlockRelocationHeader) ); memset(pshTrailer, 0, sizeof(GFDBlockRelocationHeader)); pshTrailer->magic = GFD_SWAP_8_IN_32(GFD_BLOCK_RELOCATION_HEADER_MAGIC); pshTrailer->type = 0; pshTrailer->size = sizeof(GFDBlockRelocationHeader); int oTrailer = nE; // Fill in our trailer and write it out pshTrailer->dataSize = GFDCleanTag(pOffset[oPatchTable]) - GFDCleanTag(pOffset[oMain]); // size of the main data section (allocate this size of contiguous memory) pshTrailer->dataOffset = pOffset[oMain]; // offset of the main data section in this block pshTrailer->stringTableCharNumber = pStrTable->m_nDB; // number of characters in the string table pshTrailer->stringTableOffset = pOffset[oStringTable]; // offset of string table in this block pshTrailer->patchTableOffsetNumber = nE; // number of offsets in the patch table pshTrailer->patchTableOffset = pOffset[oPatchTable] ; // offset of the patch table in this block pOffset[nE] = GFDAddDataTable(pDT, pshTrailer, sizeof(GFDBlockRelocationHeader)); pAddr[nE] = 0; // don't patch this location nE++; free(pshTrailer); pshTrailer = NULL; assert(offStringTable == GFDCleanTag(pOffset[oStringTable]) && "Guess for offset table in 7a was wrong"); assert(nE <= nElements && "Too few offsets allocated"); free(pAddr); free(pOffset); GFDDestroyStringTable( pStrTable ); return pDT; } /// Create the flat datablock representation of a GX2GeometryShader structure /// Call GFDDataTableDestroy() on returned object once doen with it. GFDDataTable* GFDCreateBlockRelocateHeaderGSH(GX2GeometryShader *pGS) { // Create second data structure to hold flattened, offseted version of our original shader GX2GeometryShader gsCopy; memcpy(&gsCopy, pGS, sizeof(GX2GeometryShader)); // Walk thru copy, converting all pointers to data blocks in table, and changing // addresses to offsets into the data block // Create data table to hold the structure elements GFDDataTable *pDT = GFDCreateDataTable(sizeof(GX2GeometryShader) + gsCopy.numUniforms * 8 + 512); // todo - pick better number // How many pointers do we need to patch? (The 11 here is empirically determined - asserts at end if if wrong) // 11 = 1 (shader itself) + 7 in GX2GeometryShader + 1 (string table) + 1 (patch list) + 1 (trailer) int nElements = 11 + gsCopy.numUniformBlocks + gsCopy.numUniforms + gsCopy.numSamplers /*+ gsCopy.numAttribs */; int nE = 0; u32 size; u32 *pAddr = (u32*) malloc( nElements * sizeof(u32)); // src offsets into data block that need patching u32 *pOffset = (u32*) malloc( nElements * sizeof(u32)); // dst offsets into data block for, containing offets to write memset(pOffset, 0, nElements * sizeof(u32)); memset(pAddr, 0, nElements * sizeof(u32)); // 0: Store main structure itself (we'll rewrite offsets at the end). // For allocation and alignment purposes, this needs to be the first hunk in the data table GFDGeometryShader gsCopy32; size = GFDRepackGeometryShaderFor32Bit(&gsCopy, &gsCopy32); int oMain = nE; pOffset[nE] = GFDAddDataTable(pDT, &gsCopy32, size); pAddr[nE] = 0; // don't patch this location nE++; // 1: Store uniform block/buffer array GFDUniformBlock *pUB = (GFDUniformBlock *) malloc(sizeof(GFDUniformBlock)*gsCopy.numUniformBlocks); size = GFDRepackUniformBlockArrayFor32Bit(gsCopy.uniformBlocks, pUB, gsCopy.numUniformBlocks); int oUniformBuffers = nE; pOffset[nE] = GFDAddDataTable(pDT, pUB, size); pAddr[nE] = (gsCopy.numUniformBlocks == 0) ? 0 : pOffset[oMain] + (u32)&gsCopy32.uniformBlocks - (u32)&gsCopy32; nE++; free(pUB); // 2: Store uniform array GFDUniformVar *pUV = (GFDUniformVar *) malloc(sizeof(GFDUniformVar)*gsCopy.numUniforms); size = GFDRepackUniformVarArrayFor32Bit(gsCopy.uniformVars, pUV, gsCopy.numUniforms); int oUniforms = nE; pOffset[nE] = GFDAddDataTable(pDT, pUV, size); pAddr[nE] = (gsCopy.numUniforms == 0) ? 0 : pOffset[oMain] + (u32)&gsCopy32.uniformVars - (u32)&gsCopy32; nE++; free(pUV); // 3: Store uniform initial values // Note initial values points to uniform block, a contiguous section of GX2UniformInitialValues int oUniformData = nE; pOffset[nE] = GFDAddDataTable(pDT, gsCopy.initialValues, gsCopy.numInitialValues * sizeof(GX2UniformInitialValue)); pAddr[nE] = (gsCopy.numInitialValues == 0) ? 0 : pOffset[oMain] + (u32)&gsCopy32.initialValues - (u32)&gsCopy32; nE++; // 4: Store loop int oLoops = nE; pOffset[nE] = GFDAddDataTable(pDT, gsCopy._loopVars, gsCopy._numLoops * sizeof(GFDLoopVar)); pAddr[nE] = (gsCopy._numLoops == 0) ? 0 : pOffset[oMain] + (u32)&gsCopy32._loopVars - (u32)&gsCopy32; nE++; // 5: Store sampler descriptors GFDSamplerVar *pSV = (GFDSamplerVar *) malloc(sizeof(GFDSamplerVar)*gsCopy.numSamplers); size = GFDRepackSamplerVarArrayFor32Bit(gsCopy.samplerVars, pSV, gsCopy.numSamplers); int oSamplers = nE; pOffset[nE] = GFDAddDataTable(pDT, pSV, size); pAddr[nE] = (gsCopy.numSamplers == 0) ? 0 : pOffset[oMain] + (u32)&gsCopy32.samplerVars - (u32)&gsCopy32; nE++; free(pSV); // 6a: Create a string table to store all the strings in const int kAvgCharsPerString = 12; // will auto-grow if actually biger GFDStringTable *pStrTable = GFDCreateStringTable( ( gsCopy.numUniformBlocks + gsCopy.numUniforms + gsCopy.numSamplers) * kAvgCharsPerString); u32 offStringTable = pDT->m_nDB; // current offset... // s1: Store each uniform block name (in common string table), as well as pointer to the initial value in uniform block for(u32 i = 0; i < gsCopy.numUniformBlocks; i++) { pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, gsCopy.uniformBlocks[i].name ); pAddr[nE] = pOffset[oUniformBuffers] + i * sizeof(GFDUniformBlock) + (u32)&gsCopy.uniformBlocks[i].name - (u32)&gsCopy.uniformBlocks[i]; nE++; } // s2: Store each uniform name (in common string table) for(u32 i = 0; i < gsCopy.numUniforms; i++) { pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, gsCopy.uniformVars[i].name ); pAddr[nE] = pOffset[oUniforms] + i * sizeof(GFDUniformVar) + (u32)&gsCopy.uniformVars[i].name - (u32)&gsCopy.uniformVars[i]; nE++; } // s3: Store each sampler name (in common string table) for(u32 i = 0; i < gsCopy.numSamplers; i++) { pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, gsCopy.samplerVars[i].name ); pAddr[nE] = pOffset[oSamplers] + i * sizeof(GFDSamplerVar) + (u32)&gsCopy.samplerVars[i].name - (u32)&gsCopy.samplerVars[i]; nE++; } // 7: Store the string table (watch out for 1-3 bytes of padding) int oStringTable = nE; // Note, although arrays of chars don't seem to be modified to go into network order, we cache our // stringtable as a block in a word array which does get byte-flipped. So let's pre-flip it here // so it comes out right. int nWordsStrTbl = (pStrTable->m_nDB + 0x3) / 4; GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl); pOffset[nE] = GFDAddDataTable(pDT, pStrTable->m_pDB, nWordsStrTbl*4); // simply write out string table data pAddr[nE] = 0; // don't patch this location nE++; // let's convert it back so if we read it latter, we won't have problems GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl); // end0: Turn all our patch addresses to offsets for(int i = 0; i < nE; i++) { if(pAddr[i] != 0) { *((u32*) (pDT->m_pDB + GFDCleanTag(pAddr[i]))) = pOffset[i]; } } // 8: Store the offset patch list // (not *really* needed, could reconstruct if know all data types, but makes it a *lot* easier) // After reading data block in at addrX, increment each location i of *(addrX + pAddr[i]) += addrX; // By putting this after main, we can allocate space for it at same time and dellocate it when main goes away, // without affecting the main data. int oPatchTable = nE; pOffset[nE] = GFDAddDataTable(pDT, pAddr, nElements * sizeof(u32)); // simply write out zeros now.. pAddr[nE] = 0; // don't patch this location either nE++; // 9: Finally, a small structure describing this data block. GFDBlockRelocationHeader *gshTrailer = (GFDBlockRelocationHeader *) malloc( sizeof(GFDBlockRelocationHeader) ); memset(gshTrailer, 0, sizeof(GFDBlockRelocationHeader)); gshTrailer->magic = GFD_SWAP_8_IN_32(GFD_BLOCK_RELOCATION_HEADER_MAGIC); gshTrailer->type = 0; gshTrailer->size = sizeof(GFDBlockRelocationHeader); int oTrailer = nE; // Fill in our trailer and write it out gshTrailer->dataSize = GFDCleanTag(pOffset[oPatchTable]) - GFDCleanTag(pOffset[oMain]); // size of the main data section (allocate this size of contiguous memory) gshTrailer->dataOffset = pOffset[oMain]; // offset of the main data section in this block gshTrailer->stringTableCharNumber = pStrTable->m_nDB; // number of characters in the string table gshTrailer->stringTableOffset = pOffset[oStringTable]; // offset of string table in this block gshTrailer->patchTableOffsetNumber = nE; // number of offsets in the patch table gshTrailer->patchTableOffset = pOffset[oPatchTable] ; // offset of the patch table in this block pOffset[nE] = GFDAddDataTable(pDT, gshTrailer, sizeof(GFDBlockRelocationHeader)); pAddr[nE] = 0; // don't patch this location nE++; free(gshTrailer); gshTrailer = NULL; assert(offStringTable == GFDCleanTag(pOffset[oStringTable]) && "Guess for offset table in 7a was wrong"); assert(nE <= nElements && "Too few offsets allocated"); free(pAddr); free(pOffset); GFDDestroyStringTable( pStrTable ); return pDT; } /// Create the flat datablock representation of a GX2ComputeShader structure /// Call GFDDataTableDestroy() on returned object once done with it. GFDDataTable* GFDCreateBlockRelocateHeaderCSH(GX2ComputeShader *pCS) { // Create second data structure to hold flattened, offseted version of our original shader GX2ComputeShader csCopy; memcpy(&csCopy, pCS, sizeof(GX2ComputeShader)); // Walk thru copy, converting all pointers to data blocks in table, and changing // addresses to offsets into the data block // Create data table to hold the structure elements GFDDataTable *pDT = GFDCreateDataTable(sizeof(GX2ComputeShader) + csCopy.numUniformBlocks * sizeof(GX2UniformBlock) + csCopy.numUniforms * sizeof(GX2UniformVar) + csCopy.numSamplers * sizeof(GX2SamplerVar) + 512); // todo - pick a better number for string table if ( !pDT ) { printf("Error! Can't create GFD Data Table!\n"); return NULL; } // How many pointers do we need to patch? (The value here is empirically determined - asserts at end if if wrong) // 10 = 1 (shader itself) + 6 in GX2ComputeShader + 1 (string table) + 1 (patch list) + 1 (trailer) int nElements = 10 + csCopy.numUniformBlocks + csCopy.numUniforms + csCopy.numSamplers; int nE = 0; u32 size; u32 *pAddr = (u32*) malloc( nElements * sizeof(u32)); // src offsets into data block that need patching u32 *pOffset = (u32*) malloc( nElements * sizeof(u32)); // dst offsets into data block for, containing offets to write if ( !pAddr || !pOffset ) { printf("Error! Memory allocation failure!\n"); return NULL; } memset(pOffset, 0, nElements * sizeof(u32)); memset(pAddr, 0, nElements * sizeof(u32)); // 0: Store main structure itself (we'll rewrite offsets at the end). // For allocation and alignment purposes, this needs to be the first hunk in the data table GFDComputeShader csCopy32; size = GFDRepackComputeShaderFor32Bit(&csCopy, &csCopy32); int oMain = nE; pOffset[nE] = GFDAddDataTable(pDT, &csCopy32, size); pAddr[nE] = 0; // don't patch this location nE++; // 1: Store uniform block/buffer array GFDUniformBlock *pUB = (GFDUniformBlock *) malloc(sizeof(GFDUniformBlock)*csCopy.numUniformBlocks); if ( !pUB ) { printf("Error! Failed to allocate Uniform Block structure!\n"); return NULL; } size = GFDRepackUniformBlockArrayFor32Bit(csCopy.uniformBlocks, pUB, csCopy.numUniformBlocks); int oUniformBuffers = nE; pOffset[nE] = GFDAddDataTable(pDT, pUB, size); pAddr[nE] = (csCopy.numUniformBlocks == 0) ? 0 : pOffset[oMain] + (u32)&csCopy32.uniformBlocks - (u32)&csCopy32; nE++; free(pUB); // 2: Store uniform array GFDUniformVar *pUV = (GFDUniformVar *) malloc(sizeof(GFDUniformVar)*csCopy.numUniforms); if ( !pUV ) { printf("Error! Failed to allocate Uniform Variable structure!\n"); return NULL; } size = GFDRepackUniformVarArrayFor32Bit(csCopy.uniformVars, pUV, csCopy.numUniforms); int oUniforms = nE; pOffset[nE] = GFDAddDataTable(pDT, pUV, size); pAddr[nE] = (csCopy.numUniforms == 0) ? 0 : pOffset[oMain] + (u32)&csCopy32.uniformVars - (u32)&csCopy32; nE++; free(pUV); // 3: Store uniform initial values // Note initial values points to uniform block, a contiguous section of GX2UniformInitialValues int oUniformData = nE; pOffset[nE] = GFDAddDataTable(pDT, csCopy.initialValues, csCopy.numInitialValues * sizeof(GX2UniformInitialValue)); pAddr[nE] = (csCopy.numInitialValues == 0) ? 0 : pOffset[oMain] + (u32)&csCopy32.initialValues - (u32)&csCopy32; nE++; // 4: Store loop variables int oLoops = nE; pOffset[nE] = GFDAddDataTable(pDT, csCopy._loopVars, csCopy._numLoops * sizeof(GFDLoopVar)); pAddr[nE] = (csCopy._numLoops == 0) ? 0 : pOffset[oMain] + (u32)&csCopy32._loopVars - (u32)&csCopy32; nE++; // 5: Store sampler descriptors GFDSamplerVar *pSV = (GFDSamplerVar *) malloc(sizeof(GFDSamplerVar)*csCopy.numSamplers); if ( !pSV ) { printf("Error! Failed to allocate Sampler Variable structure!\n"); return NULL; } size = GFDRepackSamplerVarArrayFor32Bit(csCopy.samplerVars, pSV, csCopy.numSamplers); int oSamplers = nE; pOffset[nE] = GFDAddDataTable(pDT, pSV, size); pAddr[nE] = (csCopy.numSamplers == 0) ? 0 : pOffset[oMain] + (u32)&csCopy32.samplerVars - (u32)&csCopy32; nE++; free(pSV); // 6a: Create a string table to store all the strings in const int kAvgCharsPerString = 12; // will auto-grow if actually biger GFDStringTable *pStrTable = GFDCreateStringTable( ( csCopy.numUniformBlocks + csCopy.numUniforms + csCopy.numSamplers) * kAvgCharsPerString); u32 offStringTable = pDT->m_nDB; // current offset... // s1: Store each uniform block name (in common string table), as well as pointer to the initial value in uniform block for(u32 i = 0; i < csCopy.numUniformBlocks; i++) { pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, csCopy.uniformBlocks[i].name ); pAddr[nE] = pOffset[oUniformBuffers] + i * sizeof(GFDUniformBlock) + (u32)&csCopy.uniformBlocks[i].name - (u32)&csCopy.uniformBlocks[i]; nE++; } // s2: Store each uniform name (in common string table) for(u32 i = 0; i < csCopy.numUniforms; i++) { pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, csCopy.uniformVars[i].name ); pAddr[nE] = pOffset[oUniforms] + i * sizeof(GFDUniformVar) + (u32)&csCopy.uniformVars[i].name - (u32)&csCopy.uniformVars[i]; nE++; } // s3: Store each sampler name (in common string table) for(u32 i = 0; i < csCopy.numSamplers; i++) { pOffset[nE] = offStringTable + GFDAddStringTable(pStrTable, csCopy.samplerVars[i].name ); pAddr[nE] = pOffset[oSamplers] + i * sizeof(GFDSamplerVar) + (u32)&csCopy.samplerVars[i].name - (u32)&csCopy.samplerVars[i]; nE++; } // 7: Store the string table (watch out for 1-3 bytes of padding) int oStringTable = nE; // Note, although arrays of chars don't seem to be modified to go into network order, we cache our // stringtable as a block in a word array which does get byte-flipped. So let's pre-flip it here // so it comes out right. int nWordsStrTbl = (pStrTable->m_nDB + 0x3) / 4; GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl); pOffset[nE] = GFDAddDataTable(pDT, pStrTable->m_pDB, nWordsStrTbl*4); // simply write out string table data pAddr[nE] = 0; // don't patch this location nE++; // let's convert it back so if we read it latter, we won't have problems GFDEndianSwap8in32((u32*) pStrTable->m_pDB, nWordsStrTbl); // end0: Turn all our patch addresses to offsets for(int i = 0; i < nE; i++) { if(pAddr[i] != 0) { *((u32*) (pDT->m_pDB + GFDCleanTag(pAddr[i]))) = pOffset[i]; } } // 8: Store the offset patch list // (not *really* needed, could reconstruct if know all data types, but makes it a *lot* easier) // After reading data block in at addrX, increment each location i of *(addrX + pAddr[i]) += addrX; // By putting this after main, we can allocate space for it at same time and dellocate it when main goes away, // without affecting the main data. int oPatchTable = nE; pOffset[nE] = GFDAddDataTable(pDT, pAddr, nElements * sizeof(u32)); // simply write out zeros now.. pAddr[nE] = 0; // don't patch this location either nE++; // 9: Finally, a small structure describing this data block. GFDBlockRelocationHeader *vshTrailer = (GFDBlockRelocationHeader *) malloc( sizeof(GFDBlockRelocationHeader) ); if ( !vshTrailer ) { printf("Error! Failed to allocate Block Relocation structure!\n"); return NULL; } memset(vshTrailer, 0, sizeof(GFDBlockRelocationHeader)); vshTrailer->magic = GFD_SWAP_8_IN_32(GFD_BLOCK_RELOCATION_HEADER_MAGIC); vshTrailer->type = 0; vshTrailer->size = sizeof(GFDBlockRelocationHeader); int oTrailer = nE; // Fill in our trailer and write it out vshTrailer->dataSize = GFDCleanTag(pOffset[oPatchTable]) - GFDCleanTag(pOffset[oMain]); // size of the main data section (allocate this size of contiguous memory) vshTrailer->dataOffset = pOffset[oMain]; // offset of the main data section in this block vshTrailer->stringTableCharNumber = pStrTable->m_nDB; // number of characters in the string table vshTrailer->stringTableOffset = pOffset[oStringTable]; // offset of string table in this block vshTrailer->patchTableOffsetNumber = nE; // number of offsets in the patch table vshTrailer->patchTableOffset = pOffset[oPatchTable] ; // offset of the patch table in this block pOffset[nE] = GFDAddDataTable(pDT, vshTrailer, sizeof(GFDBlockRelocationHeader)); pAddr[nE] = 0; // don't patch this location nE++; free(vshTrailer); vshTrailer = NULL; assert(offStringTable == GFDCleanTag(pOffset[oStringTable]) && "Guess for offset table in 7a was wrong"); assert(nE <= nElements && "Too few offsets allocated"); free(pAddr); free(pOffset); GFDDestroyStringTable( pStrTable ); return pDT; } bool GFDWriteFileVertexShaderBlock(FILE *fp, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, GX2VertexShader *pVS) { if(pVS == NULL) return false; // Get info about the actual shader program u32 nBytesProg = pVS->shaderSize; void* pDataProg = pVS->shaderPtr; // Current location, we will relocate it // Set NULL pVS->shaderPtr = NULL; // Now convert structure into a flat, relocatable format GFDDataTable *pDT_VS = GFDCreateBlockRelocateHeaderVSH(pVS); if(NULL == pDT_VS) return false; int nBytesVSStruct = pDT_VS->m_nDB; // How big is our vertex struct (changes size due to uniforms, samplers, and other varying things) // Write header for VS struct if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_VSH_HEADER, nBytesVSStruct)) return false; // Write VS struct if(!GFDWriteFilePPCData(fp, (nBytesVSStruct + 3) / 4, GFD_ELEMENT_SIZE_32, (u32 *) pDT_VS->m_pDB)) return false; // Add pad block if(alignMode) { // Calc padding size for shader align u32 padSize = ((u32)(ftell(fp) + 2 * GFD_BLOCK_HEADER_SIZE +(GX2_SHADER_ALIGNMENT-1)) & ~(GX2_SHADER_ALIGNMENT-1)) - (u32)ftell(fp) - 2 * GFD_BLOCK_HEADER_SIZE; while(padSize < 0) padSize += GX2_SHADER_ALIGNMENT; if(!GFDWriteFilePadBlock(fp, padSize)) return false; } // Write out Header for program block if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_VSH_PROGRAM, nBytesProg)) return false; // Write program data block if(!GFDWriteFileGPUData(fp, (nBytesProg + 3) / 4, GFD_ELEMENT_SIZE_32, swapMode, (u32 *) pDataProg)) return false; GFDDestroyDataTable(pDT_VS); // Restore pVS->shaderPtr = pDataProg; return true; } bool GFDWriteFilePixelShaderBlock(FILE *fp, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, GX2PixelShader *pPS) { if(pPS == NULL) return false; // Get info about the actual shader program u32 nBytesProg = pPS->shaderSize; void* pDataProg = pPS->shaderPtr; // Current location, we will relocate it // Set NULL pPS->shaderPtr = NULL; // Convert structure into a flat, relocatable format GFDDataTable *pDT_PS = GFDCreateBlockRelocateHeaderPSH(pPS); if(NULL == pDT_PS) return false; int nBytesPSStruct = pDT_PS->m_nDB; // How big is our pixel shader struct (changes size due to uniforms, samplers, and other varying things) // Write header for PS struct if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_PSH_HEADER, nBytesPSStruct)) return false; // Write PS struct if(!GFDWriteFilePPCData(fp, (nBytesPSStruct + 3) / 4, GFD_ELEMENT_SIZE_32, (u32 *) pDT_PS->m_pDB)) return false; // Add pad block if(alignMode) { // Calc padding size for shader align u32 padSize = ((u32)(ftell(fp) + 2 * GFD_BLOCK_HEADER_SIZE +(GX2_SHADER_ALIGNMENT-1)) & ~(GX2_SHADER_ALIGNMENT-1)) - (u32)ftell(fp) - 2 * GFD_BLOCK_HEADER_SIZE; while(padSize < 0) padSize += GX2_SHADER_ALIGNMENT; if(!GFDWriteFilePadBlock(fp, padSize)) return false; } // Write Header for program block if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_PSH_PROGRAM, nBytesProg)) return false; // Write program data block if(!GFDWriteFileGPUData(fp, (nBytesProg + 3) / 4, GFD_ELEMENT_SIZE_32, swapMode, (u32 *) pDataProg)) return false; GFDDestroyDataTable(pDT_PS); // Restore pPS->shaderPtr = pDataProg; return true; } bool GFDWriteFileGeometryShaderBlock(FILE *fp, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, GX2GeometryShader *pGS) { if(pGS == NULL) return false; // Get info about the actual shader program u32 nBytesProg = pGS->shaderSize; void* pDataProg = pGS->shaderPtr; // Current location, we will relocate it u32 nBytesCopyProg = pGS->copyShaderSize; void* pDataCopyProg = pGS->copyShaderPtr; // Current location, we will relocate it // Set NULL pGS->shaderPtr = NULL; pGS->copyShaderPtr = NULL; // Convert structure into a flat, relocatable format GFDDataTable *pDT_GS = GFDCreateBlockRelocateHeaderGSH(pGS); if(NULL == pDT_GS) return false; int nBytesGSStruct = pDT_GS->m_nDB; // How big is our shader struct (changes size due to uniforms, samplers, and other varying things) // Write header for GS struct if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_GSH_HEADER, nBytesGSStruct)) return false; // Write GS struct if(!GFDWriteFilePPCData(fp, (nBytesGSStruct + 3) / 4, GFD_ELEMENT_SIZE_32, (u32 *) pDT_GS->m_pDB)) return false; // Add pad block if(alignMode) { // Calc padding size for shader align u32 padSize = ((u32)(ftell(fp) + 2 * GFD_BLOCK_HEADER_SIZE +(GX2_SHADER_ALIGNMENT-1)) & ~(GX2_SHADER_ALIGNMENT-1)) - (u32)ftell(fp) - 2 * GFD_BLOCK_HEADER_SIZE; while(padSize < 0) padSize += GX2_SHADER_ALIGNMENT; if(!GFDWriteFilePadBlock(fp, padSize)) return false; } // Write Header for program block if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_GSH_PROGRAM, nBytesProg)) return false; // Write program data block if(!GFDWriteFileGPUData(fp, (nBytesProg + 3) / 4, GFD_ELEMENT_SIZE_32, swapMode, (u32 *) pDataProg)) return false; // Add pad block if(alignMode) { // Calc padding size for shader align u32 padSize = ((u32)(ftell(fp) + 2 * GFD_BLOCK_HEADER_SIZE +(GX2_SHADER_ALIGNMENT-1)) & ~(GX2_SHADER_ALIGNMENT-1)) - (u32)ftell(fp) - 2 * GFD_BLOCK_HEADER_SIZE; while(padSize < 0) padSize += GX2_SHADER_ALIGNMENT; if(!GFDWriteFilePadBlock(fp, padSize)) return false; } // Write Header for copy program block if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_GSH_COPY_PROGRAM, nBytesCopyProg)) return false; // Write copy program data block if(!GFDWriteFileGPUData(fp, (nBytesCopyProg + 3) / 4, GFD_ELEMENT_SIZE_32, swapMode, (u32 *) pDataCopyProg)) return false; GFDDestroyDataTable(pDT_GS); // Restore pGS->shaderPtr = pDataProg; pGS->copyShaderPtr = pDataCopyProg; return true; } bool GFDWriteFileComputeShaderBlock(FILE *fp, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, GX2ComputeShader *pCS) { if(pCS == NULL) return false; // Get info about the actual shader program u32 nBytesProg = pCS->shaderSize; void* pDataProg = pCS->shaderPtr; // Current location, we will relocate it // Set NULL pCS->shaderPtr = NULL; // Now convert structure into a flat, relocatable format GFDDataTable *pDT_CS = GFDCreateBlockRelocateHeaderCSH(pCS); if(NULL == pDT_CS) return false; int nBytesCSStruct = pDT_CS->m_nDB; // How big is our compute struct (changes size due to uniforms, samplers, and other varying things) // Write header for CS struct if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_CSH_HEADER, nBytesCSStruct)) return false; // Write CS struct if(!GFDWriteFilePPCData(fp, (nBytesCSStruct + 3) / 4, GFD_ELEMENT_SIZE_32, (u32 *) pDT_CS->m_pDB)) return false; // Add pad block if(alignMode) { // Calc padding size for shader align u32 padSize = ((u32)(ftell(fp) + 2 * GFD_BLOCK_HEADER_SIZE +(GX2_SHADER_ALIGNMENT-1)) & ~(GX2_SHADER_ALIGNMENT-1)) - (u32)ftell(fp) - 2 * GFD_BLOCK_HEADER_SIZE; while(padSize < 0) padSize += GX2_SHADER_ALIGNMENT; if(!GFDWriteFilePadBlock(fp, padSize)) return false; } // Write out Header for program block if(!GFDWriteFileBlockHeader(fp, GFD_BLOCK_TYPE_GX2_CSH_PROGRAM, nBytesProg)) return false; // Write program data block if(!GFDWriteFileGPUData(fp, (nBytesProg + 3) / 4, GFD_ELEMENT_SIZE_32, swapMode, (u32 *) pDataProg)) return false; GFDDestroyDataTable(pDT_CS); // Restore pCS->shaderPtr = pDataProg; return true; } bool GFD_API GFDWriteFileShader(char* pFilename, GFDGPUVersion gpuVer, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, u32 numShader, const GFDShaders *pShadersOrig) { GFDShaders2 shaders = {0}; if (!pShadersOrig) { return false; } shaders.abiVersion = GFD_DLL_ABI_VERSION; shaders.pVertexShader = pShadersOrig->pVertexShader; shaders.pGeometryShader = pShadersOrig->pGeometryShader; shaders.pPixelShader = pShadersOrig->pPixelShader; return GFDWriteFileShader2(pFilename, gpuVer, swapMode, alignMode, numShader, &shaders); } bool GFD_API GFDWriteFileShader2(char* pFilename, GFDGPUVersion gpuVer, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, u32 numShader, const GFDShaders2 *pShaders) { FILE *fpout = NULL; u32 count = 0; if (!pShaders) { return false; } if ( GFD_DLL_ABI_TYPE(pShaders->abiVersion) != GFD_DLL_ABI_TYPE(GFD_DLL_ABI_VERSION) || GFD_DLL_ABI_VERSION_NUM(pShaders->abiVersion) > 0) { return false; } if (!pFilename) { pFilename = GSH_DEFAULT_FILENAME; } // Open file if(GFDOpenFile(&fpout, pFilename, "wb") != 0) { printf("Error: Can't open %s\n", pFilename); return false; } // Check gpu version switch(gpuVer) { case GFD_GPU_VERSION_0: break; case GFD_GPU_VERSION_1: break; case GFD_GPU_VERSION_GPU7: break; default: printf("Warning: Unsupported GPU %d, using default\n", gpuVer); gpuVer = GFD_GPU_VERSION_GPU7; break; } // Write the file header if(!GFDWriteFileHeader(fpout, gpuVer, alignMode)) { printf("Error: Can't write file header\n"); GFDCloseFile(fpout); return false; } // Writes multiple shader blocks for (count = 0; count < numShader; count++) { if(NULL != &pShaders[count]) { if(NULL != pShaders[count].pVertexShader) { if(!GFDWriteFileVertexShaderBlock(fpout, swapMode, alignMode, pShaders[count].pVertexShader)) { printf("Error: Can't write vsh block.\n"); GFDCloseFile(fpout); return false; } } if(NULL != pShaders[count].pPixelShader) { if(!GFDWriteFilePixelShaderBlock(fpout, swapMode, alignMode, pShaders[count].pPixelShader)) { printf("Error: Can't write psh block.\n"); GFDCloseFile(fpout); return false; } } if(NULL != pShaders[count].pGeometryShader) { if(!GFDWriteFileGeometryShaderBlock(fpout, swapMode, alignMode, pShaders[count].pGeometryShader)) { printf("Error: Can't write psh block.\n"); GFDCloseFile(fpout); return false; } } if(NULL != pShaders[count].pComputeShader) { if(!GFDWriteFileComputeShaderBlock(fpout, swapMode, alignMode, pShaders[count].pComputeShader)) { printf("Error: Can't write csh block.\n"); GFDCloseFile(fpout); return false; } } } } // Write an 'End' block to the file if(!GFDWriteFileBlockHeader(fpout, GFD_BLOCK_TYPE_END, 0)) { printf("Error: Can't write end block header\n"); GFDCloseFile(fpout); return false; } GFDCloseFile(fpout); return true; } bool GFD_API GFDAppendWriteFileShader(char* pFilename, GFDGPUVersion gpuVer, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, u32 numShader, const GFDShaders *pShadersOrig) { GFDShaders2 shaders = {0}; if (!pShadersOrig) { return false; } // Convert to GFDShaders2 structure shaders.abiVersion = GFD_DLL_ABI_VERSION; shaders.pVertexShader = pShadersOrig->pVertexShader; shaders.pGeometryShader = pShadersOrig->pGeometryShader; shaders.pPixelShader = pShadersOrig->pPixelShader; // Call the new version with the updated return GFDAppendWriteFileShader2(pFilename, gpuVer, swapMode, alignMode, numShader, &shaders); } bool GFD_API GFDAppendWriteFileShader2(char* pFilename, GFDGPUVersion gpuVer, GFDEndianSwapMode swapMode, GFDAlignMode alignMode, u32 numShader, const GFDShaders2 *pShaders) { FILE *fpout = NULL; u32 count = 0; GFDHeader fileHeader; if (!pShaders) { return false; } if ( GFD_DLL_ABI_TYPE(pShaders->abiVersion) != GFD_DLL_ABI_TYPE(GFD_DLL_ABI_VERSION) || GFD_DLL_ABI_VERSION_NUM(pShaders->abiVersion) > 0) { return false; } // open file if(GFDOpenFile(&fpout, pFilename, "rb+") != 0) { printf("Error: Can't open %s\n", pFilename); return false; } // Read File Header if(!GFDReadFilePPCData(&fileHeader, (GFD_HEADER_SIZE + 3) / 4, GFD_ELEMENT_SIZE_32, fpout)) { GFDCloseFile(fpout); printf("Error: Can't read file header.\n"); return false; } // check gpu version if(fileHeader.gpuVersion != gpuVer) { GFDCloseFile(fpout); printf("Error: GPU version is different.\n"); return false; } // check header version if(!GFDCheckHeaderMagicVersions(&fileHeader)) { GFDCloseFile(fpout); printf("Error: Format version is different.\n"); return false; } // seeks to beginning of 'End' block fseek(fpout, -(s32)GFD_BLOCK_HEADER_SIZE, SEEK_END); // append writes multiple shader blocks for (count = 0; count < numShader; count++) { if(NULL != &pShaders[count]) { if(NULL != pShaders[count].pVertexShader) { if(!GFDWriteFileVertexShaderBlock(fpout, swapMode, alignMode, pShaders[count].pVertexShader)) { printf("Error: Can't write vsh block.\n"); GFDCloseFile(fpout); return false; } } if(NULL != pShaders[count].pPixelShader) { if(!GFDWriteFilePixelShaderBlock(fpout, swapMode, alignMode, pShaders[count].pPixelShader)) { printf("Error: Can't write psh block.\n"); GFDCloseFile(fpout); return false; } } if(NULL != pShaders[count].pGeometryShader) { if(!GFDWriteFileGeometryShaderBlock(fpout, swapMode, alignMode, pShaders[count].pGeometryShader)) { printf("Error: Can't write psh block.\n"); GFDCloseFile(fpout); return false; } } if(NULL != pShaders[count].pComputeShader) { if(!GFDWriteFileComputeShaderBlock(fpout, swapMode, alignMode, pShaders[count].pComputeShader)) { printf("Error: Can't write csh block.\n"); GFDCloseFile(fpout); return false; } } } } // Write an 'End' block to the file if(!GFDWriteFileBlockHeader(fpout, GFD_BLOCK_TYPE_END, 0)) { printf("Error: Can't write end block header\n"); GFDCloseFile(fpout); return false; } GFDCloseFile(fpout); return true; } // Write As Code----- void _GFDWriteGX2AttributesAsCode(FILE *fp, const char* sName, u32 nAttribs, GX2AttribVar *pAV) { if (nAttribs == 0) { return; } else { fprintf(fp, "GX2AttribVar %s_attrib_vars[] = {\n", sName); for(u32 i = 0; i < nAttribs; i++) { GX2AttribVar *pAVI = &(pAV[i]); fprintf(fp," {\"%s\", %s, %u, %u}%c\n", pAVI->name, varTypeName[pAVI->type], pAVI->arrayCount, pAVI->location, (i==nAttribs-1)?' ':','); } fprintf(fp,"};\n"); } fprintf(fp,"\n"); } void _GFDWriteGX2SamplersAsCode(FILE *fp, const char* sName, u32 nSamplers, GX2SamplerVar *pSV) { if (nSamplers == 0) { return; } else { fprintf(fp, "GX2SamplerVar %s_sampler_vars[] = {\n", sName); for(u32 i = 0; i < nSamplers; i++) { GX2SamplerVar *pSVI = &(pSV[i]); fprintf(fp," {\"%s\", %s, %u}%c\n", pSVI->name, samplerTypeName[pSVI->type], pSVI->location, (i==nSamplers-1)?' ':','); } fprintf(fp,"};\n"); } fprintf(fp,"\n"); } void _GFDWriteGX2UniformsAsCode(FILE *fp, const char* sName, u32 nUniforms, GX2UniformVar *pUV, GX2UniformInitialValue *ivBase) { if (nUniforms == 0) { return; } else { fprintf(fp, "GX2UniformVar %s_uniforms[] = {\n", sName); for(u32 i = 0; i < nUniforms; i++) { GX2UniformVar *pUVI = &(pUV[i]); fprintf(fp," {\"%s\", %s, %u, %u, ", pUVI->name, varTypeName[pUVI->type], pUVI->arrayCount, pUVI->offset); if (pUVI->blockIndex == GX2_UNIFORM_BLOCK_INDEX_INVALID) { fprintf(fp,"\n GX2_UNIFORM_BLOCK_INDEX_INVALID, "); } else { fprintf(fp,"%u, ", pUVI->blockIndex); } fprintf(fp,"}%c\n", (i==nUniforms-1)?' ':','); } fprintf(fp,"};\n"); } fprintf(fp,"\n"); } void _GFDGX2UniformBlockAsCode(FILE *fp, const char* sName, u32 nUniBlocks, GX2UniformBlock *pUB) { if (nUniBlocks == 0) { return; } else { fprintf(fp, "GX2UniformBlock %s_uniform_blocks[] = {\n", sName); for(u32 i = 0; i < nUniBlocks; i++) { GX2UniformBlock *pUBI = &(pUB[i]); fprintf(fp," {\"%s\", %u, %u}%c\n", pUBI->name, pUBI->location, pUBI->size, (i==nUniBlocks-1)?' ':','); } fprintf(fp,"};\n"); } fprintf(fp,"\n"); } void _GFDWriteWordsAsHex(FILE *fp, u32 *ptr, u32 byteLen) { assert(fp != NULL); assert((byteLen & 0x03) == 0); fprintf(fp, " "); for(u32 j = 0; j < byteLen/4; j++) { fprintf(fp,"0x%08x%c", ptr[j], (j==byteLen/4-1)?' ':','); if((j & 3) == 3) { if((j & 0x3f) == 0x3) fprintf(fp, " // 0x%04x\n ", j-3); else fprintf(fp, "\n "); } } } void _GFDWriteWordsAsCode(FILE *fp, const char *name, u32 *ptr, u32 byteLen, const char *attrib) { assert(fp != NULL); assert((byteLen & 0x03) == 0); if (ptr == NULL || byteLen == 0) { return; } fprintf(fp, "%s static const u32 %s[%u] =\n{\n", attrib, name, byteLen/4); _GFDWriteWordsAsHex(fp, ptr, byteLen); fprintf(fp, "\n};\n\n"); } static char *makeName(char *dst, char *src1, char *src2, u32 max) { dst[0]=0; // Avoid warnings about strncat being unsafe // (because it might write size+1 characters) // strncat(dst, src1, max-1); strncat_s(dst, max, src1, max-1); size_t len=strlen(dst); // strncat(dst+len, src2, max-len-1); strncat_s(dst+len, max-len, src2, max-len-1); return dst; } GFD_DECLSPEC bool GFD_API GFDWriteFileShaderAsCodeWithSource(char* pFilename, GFDEndianSwapMode swapMode, const GFDShaders2 *pShaders, const GFDShadersSrc *pShadersSrc) { FILE *fpout = NULL; // Get base name from filename (for structure names) char *slash1, *slash2, *dot; #define BASEMAX 256 char basename[BASEMAX], tempname[BASEMAX]; slash1 = strrchr(pFilename, '/'); slash2 = strrchr(pFilename, '\\'); if (slash2 > slash1) slash1 = slash2; basename[0]=0; if (slash1) { // strncat(basename, slash1+1, BASEMAX-1); strncat_s(basename, BASEMAX, slash1+1, BASEMAX-1); } else { // strncat(basename, pFilename, BASEMAX-1); strncat_s(basename, BASEMAX, pFilename, BASEMAX-1); } dot = strrchr(basename, '.'); if (dot) *dot=0; if ( GFD_DLL_ABI_TYPE(pShaders->abiVersion) != GFD_DLL_ABI_TYPE(GFD_DLL_ABI_VERSION) || GFD_DLL_ABI_VERSION_NUM(pShaders->abiVersion) > 0) { return false; } // open file if(GFDOpenFile(&fpout, pFilename, "w") != 0) { printf("Error: Can't open %s\n", pFilename); return false; } fprintf(fpout,"//--------------------------------------------------\n\n"); fprintf(fpout,"// This file is automatically generated by gfd.\n\n"); fprintf(fpout,"//--------------------------------------------------\n\n"); // For Vertex Shader if(NULL != pShaders->pVertexShader) { fprintf(fpout, "// ---------- %s Vertex Shader ----------\n\n", basename); // First, write out initial values // (because uniforms refer to them) _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_VS_initial_values", BASEMAX), (u32 *) pShaders->pVertexShader->initialValues, pShaders->pVertexShader->numInitialValues * sizeof(GX2UniformInitialValue), ""); // Swap endian for GPU7 if(swapMode != GFD_ENDIAN_SWAP_MODE_8_IN_32) { int nElem = (pShaders->pVertexShader->shaderSize + 0x3) / 4; GFDEndianSwap8in32((u32*)pShaders->pVertexShader->shaderPtr, nElem); } if ( pShadersSrc && pShadersSrc->pVertexShader ) { char *next_token = NULL; char *p = strtok_s((char*)pShadersSrc->pVertexShader, "\r\n", &next_token); fprintf(fpout, "// Source Vertex Shader\n"); while ( p ) { fprintf(fpout, "// %s\n", p); p = strtok_s(NULL, "\r\n", &next_token); } fprintf(fpout, "// End Source Vertex Shader\n"); } // Then, write out the shader pieces, // and finally, write the shader itself. _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_VS_shaderPtr", BASEMAX), (u32 *) pShaders->pVertexShader->shaderPtr, pShaders->pVertexShader->shaderSize, "ALIGNVAR(256)"); _GFDGX2UniformBlockAsCode(fpout, makeName(tempname, basename, "_VS", BASEMAX), pShaders->pVertexShader->numUniformBlocks, pShaders->pVertexShader->uniformBlocks); _GFDWriteGX2UniformsAsCode(fpout, makeName(tempname, basename, "_VS", BASEMAX), pShaders->pVertexShader->numUniforms, pShaders->pVertexShader->uniformVars, pShaders->pVertexShader->initialValues); _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_VS_loop_vars", BASEMAX), (u32 *) pShaders->pVertexShader->_loopVars, pShaders->pVertexShader->_numLoops * sizeof(GFDLoopVar), ""); _GFDWriteGX2SamplersAsCode(fpout, makeName(tempname, basename, "_VS", BASEMAX), pShaders->pVertexShader->numSamplers, pShaders->pVertexShader->samplerVars); _GFDWriteGX2AttributesAsCode(fpout, makeName(tempname, basename, "_VS", BASEMAX), pShaders->pVertexShader->numAttribs, pShaders->pVertexShader->attribVars); fprintf(fpout, "\n"); fprintf(fpout, "\n"); fprintf(fpout, "static GX2VertexShader %s_VS = {\n", basename); fprintf(fpout, " { // _regs\n"); _GFDWriteWordsAsHex(fpout, (u32 *) &pShaders->pVertexShader->_regs, sizeof(GFDVertexShaderRegs)); fprintf(fpout, "\n },\n"); fprintf(fpout, " %u,\n", pShaders->pVertexShader->shaderSize); fprintf(fpout, " (void *) %s_VS_shaderPtr,\n", basename); fprintf(fpout, " %s,\n", shaderModeName[(u32)pShaders->pVertexShader->shaderMode]); fprintf(fpout, " %u,\n", pShaders->pVertexShader->numUniformBlocks); if (pShaders->pVertexShader->numUniformBlocks) { fprintf(fpout, " %s_VS_uniform_blocks,\n", basename); } else { fprintf(fpout, " NULL,\n"); } fprintf(fpout, " %u,\n", pShaders->pVertexShader->numUniforms); if (pShaders->pVertexShader->numUniforms) { fprintf(fpout, " %s_VS_uniforms,\n", basename); } else { fprintf(fpout, " NULL,\n"); } fprintf(fpout, " %u,\n", pShaders->pVertexShader->numInitialValues); if (pShaders->pVertexShader->numInitialValues) { fprintf(fpout, " (GX2UniformInitialValue *) %s_VS_initial_values,\n", basename); } else { fprintf(fpout, " NULL,\n"); } fprintf(fpout, " %u,\n", pShaders->pVertexShader->_numLoops); if (pShaders->pVertexShader->_numLoops) { fprintf(fpout, " (GFDLoopVar *) %s_VS_loop_vars,\n", basename); } else { fprintf(fpout, " NULL,\n"); } fprintf(fpout, " %u,\n", pShaders->pVertexShader->numSamplers); if (pShaders->pVertexShader->numSamplers) { fprintf(fpout, " %s_VS_sampler_vars,\n", basename); } else { fprintf(fpout, " NULL,\n"); } fprintf(fpout, " %u,\n", pShaders->pVertexShader->numAttribs); if (pShaders->pVertexShader->numAttribs) { fprintf(fpout, " %s_VS_attrib_vars,\n", basename); } else { fprintf(fpout, " NULL,\n"); } fprintf(fpout, " %d,\n", (u32)pShaders->pVertexShader->ringItemsize); fprintf(fpout, " (GX2Boolean)%d,\n", (u32)pShaders->pVertexShader->hasStreamOut); fprintf(fpout, " { \n"); _GFDWriteWordsAsHex(fpout, (u32 *) &pShaders->pVertexShader->streamOutVertexStride, sizeof(u32)*GX2_MAX_STREAMOUT_BUFFERS); fprintf(fpout, "\n },\n"); // end of structure fprintf(fpout, "};\n\n\n"); } // For Pixel Shader if(NULL != pShaders->pPixelShader ) { fprintf(fpout, "// ---------- %s Pixel Shader ----------\n\n", basename); // First, write out initial values // (because uniforms refer to them) _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_PS_initial_values", BASEMAX), (u32 *) pShaders->pPixelShader->initialValues, pShaders->pPixelShader->numInitialValues * sizeof(GX2UniformInitialValue), ""); // Swap endian for GPU7 if(swapMode != GFD_ENDIAN_SWAP_MODE_8_IN_32) { int nElem = ( pShaders->pPixelShader->shaderSize + 0x3) / 4; GFDEndianSwap8in32((u32*) pShaders->pPixelShader->shaderPtr, nElem); } if ( pShadersSrc && pShadersSrc->pPixelShader ) { char *next_token = NULL; char *p = strtok_s((char*)pShadersSrc->pPixelShader, "\r\n", &next_token); fprintf(fpout, "// Source Pixel Shader\n"); while ( p ) { fprintf(fpout, "// %s\n", p); p = strtok_s(NULL, "\r\n", &next_token); } fprintf(fpout, "// End Source Pixel Shader\n"); } // Then, write out the shader pieces, // and finally, write the shader itself. _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_PS_shaderPtr", BASEMAX), (u32 *) pShaders->pPixelShader->shaderPtr, pShaders->pPixelShader->shaderSize, "ALIGNVAR(256)"); _GFDGX2UniformBlockAsCode(fpout, makeName(tempname, basename, "_PS", BASEMAX), pShaders->pPixelShader->numUniformBlocks, pShaders->pPixelShader->uniformBlocks); _GFDWriteGX2UniformsAsCode(fpout, makeName(tempname, basename, "_PS", BASEMAX), pShaders->pPixelShader->numUniforms, pShaders->pPixelShader->uniformVars, pShaders->pPixelShader->initialValues); _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_PS_loop_vars", BASEMAX), (u32 *) pShaders->pPixelShader->_loopVars, pShaders->pPixelShader->_numLoops * sizeof(GFDLoopVar), ""); _GFDWriteGX2SamplersAsCode(fpout, makeName(tempname, basename, "_PS", BASEMAX), pShaders->pPixelShader->numSamplers, pShaders->pPixelShader->samplerVars); fprintf(fpout, "\n"); fprintf(fpout, "\n"); fprintf(fpout, "static GX2PixelShader %s_PS = {\n", basename); fprintf(fpout, " { // _regs\n"); _GFDWriteWordsAsHex(fpout, (u32 *) &pShaders->pPixelShader->_regs, sizeof(GFDPixelShaderRegs)); fprintf(fpout, "\n },\n"); fprintf(fpout, " %u,\n", pShaders->pPixelShader->shaderSize); fprintf(fpout, " (void *) %s_PS_shaderPtr,\n", basename); fprintf(fpout, " %s,\n", shaderModeName[(u32)pShaders->pPixelShader->shaderMode]); fprintf(fpout, " %u,\n", pShaders->pPixelShader->numUniformBlocks); if (pShaders->pPixelShader->numUniformBlocks) { fprintf(fpout, " %s_PS_uniform_blocks,\n", basename); } else { fprintf(fpout, " NULL,\n"); } fprintf(fpout, " %u,\n", pShaders->pPixelShader->numUniforms); if (pShaders->pPixelShader->numUniforms) { fprintf(fpout, " %s_PS_uniforms,\n", basename); } else { fprintf(fpout, " NULL,\n"); } fprintf(fpout, " %u,\n", pShaders->pPixelShader->numInitialValues); if (pShaders->pPixelShader->numInitialValues) { fprintf(fpout, " (GX2UniformInitialValue *) %s_PS_initial_values,\n", basename); } else { fprintf(fpout, " NULL,\n"); } fprintf(fpout, " %u,\n", pShaders->pPixelShader->_numLoops); if (pShaders->pPixelShader->_numLoops) { fprintf(fpout, " (GFDLoopVar *) %s_PS_loop_vars,\n", basename); } else { fprintf(fpout, " NULL,\n"); } fprintf(fpout, " %u,\n", pShaders->pPixelShader->numSamplers); if (pShaders->pPixelShader->numSamplers) { fprintf(fpout, " %s_PS_sampler_vars,\n", basename); } else { fprintf(fpout, " NULL,\n"); } // end of structure fprintf(fpout, "};\n\n\n"); } // For Geometry Shader if(NULL != pShaders->pGeometryShader ) { fprintf(fpout, "// ---------- %s Geometry Shader ----------\n\n", basename); // First, write out initial values // (because uniforms refer to them) _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_GS_initial_values", BASEMAX), (u32 *) pShaders->pGeometryShader->initialValues, pShaders->pGeometryShader->numInitialValues * sizeof(GX2UniformInitialValue), ""); // Swap endian for GPU7 if(swapMode != GFD_ENDIAN_SWAP_MODE_8_IN_32) { int nElem = ( pShaders->pGeometryShader->shaderSize + 0x3) / 4; GFDEndianSwap8in32((u32*) pShaders->pGeometryShader->shaderPtr, nElem); } if ( pShadersSrc && pShadersSrc->pGeometryShader ) { char *next_token = NULL; char *p = strtok_s((char*)pShadersSrc->pGeometryShader, "\r\n", &next_token); fprintf(fpout, "// Source Geometry Shader\n"); while ( p ) { fprintf(fpout, "// %s\n", p); p = strtok_s(NULL, "\r\n", &next_token); } fprintf(fpout, "// End Source Geometry Shader\n"); } // Then, write out the shader pieces, // and finally, write the shader itself. _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_GS_shaderPtr", BASEMAX), (u32 *) pShaders->pGeometryShader->shaderPtr, pShaders->pGeometryShader->shaderSize, "ALIGNVAR(256)"); // Swap endian for GPU7 if(swapMode != GFD_ENDIAN_SWAP_MODE_8_IN_32) { int nElem = ( pShaders->pGeometryShader->copyShaderSize + 0x3) / 4; GFDEndianSwap8in32((u32*) pShaders->pGeometryShader->copyShaderPtr, nElem); } _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_GS_copyShaderPtr", BASEMAX), (u32 *) pShaders->pGeometryShader->copyShaderPtr, pShaders->pGeometryShader->copyShaderSize, "ALIGNVAR(256)"); _GFDGX2UniformBlockAsCode(fpout, makeName(tempname, basename, "_GS", BASEMAX), pShaders->pGeometryShader->numUniformBlocks, pShaders->pGeometryShader->uniformBlocks); _GFDWriteGX2UniformsAsCode(fpout, makeName(tempname, basename, "_GS", BASEMAX), pShaders->pGeometryShader->numUniforms, pShaders->pGeometryShader->uniformVars, pShaders->pGeometryShader->initialValues); _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_GS_loop_vars", BASEMAX), (u32 *) pShaders->pGeometryShader->_loopVars, pShaders->pGeometryShader->_numLoops * sizeof(GFDLoopVar), ""); _GFDWriteGX2SamplersAsCode(fpout, makeName(tempname, basename, "_GS", BASEMAX), pShaders->pGeometryShader->numSamplers, pShaders->pGeometryShader->samplerVars); fprintf(fpout, "\n"); fprintf(fpout, "\n"); fprintf(fpout, "static GX2GeometryShader %s_GS = {\n", basename); fprintf(fpout, " { // _regs\n"); _GFDWriteWordsAsHex(fpout, (u32 *) &pShaders->pGeometryShader->_regs, sizeof(GFDGeometryShaderRegs)); fprintf(fpout, "\n },\n"); fprintf(fpout, " %u,\n", pShaders->pGeometryShader->shaderSize); fprintf(fpout, " (void *) %s_GS_shaderPtr,\n", basename); fprintf(fpout, " %u,\n", pShaders->pGeometryShader->copyShaderSize); fprintf(fpout, " (void *) %s_GS_copyShaderPtr,\n", basename); fprintf(fpout, " %s,\n", shaderModeName[(u32)pShaders->pGeometryShader->shaderMode]); fprintf(fpout, " %u,\n", pShaders->pGeometryShader->numUniformBlocks); if (pShaders->pGeometryShader->numUniformBlocks) { fprintf(fpout, " %s_GS_uniform_blocks,\n", basename); } else { fprintf(fpout, " NULL,\n"); } fprintf(fpout, " %u,\n", pShaders->pGeometryShader->numUniforms); if (pShaders->pGeometryShader->numUniforms) { fprintf(fpout, " %s_GS_uniforms,\n", basename); } else { fprintf(fpout, " NULL,\n"); } fprintf(fpout, " %u,\n", pShaders->pGeometryShader->numInitialValues); if (pShaders->pGeometryShader->numInitialValues) { fprintf(fpout, " (GX2UniformInitialValue *) %s_GS_initial_values,\n", basename); } else { fprintf(fpout, " NULL,\n"); } fprintf(fpout, " %u,\n", pShaders->pGeometryShader->_numLoops); if (pShaders->pGeometryShader->_numLoops) { fprintf(fpout, " (GFDLoopVar *) %s_GS_loop_vars,\n", basename); } else { fprintf(fpout, " NULL,\n"); } fprintf(fpout, " %u,\n", pShaders->pGeometryShader->numSamplers); if (pShaders->pGeometryShader->numSamplers) { fprintf(fpout, " %s_GS_sampler_vars,\n", basename); } else { fprintf(fpout, " NULL,\n"); } fprintf(fpout, " %d,\n", (u32)pShaders->pGeometryShader->ringItemsize); fprintf(fpout, " (GX2Boolean)%d,\n", (u32)pShaders->pGeometryShader->hasStreamOut); fprintf(fpout, " { \n"); _GFDWriteWordsAsHex(fpout, (u32 *) &pShaders->pGeometryShader->streamOutVertexStride, sizeof(u32)*GX2_MAX_STREAMOUT_BUFFERS); fprintf(fpout, "\n },\n"); // end of structure fprintf(fpout, "};\n\n\n"); } // For Compute Shader if(NULL != pShaders->pComputeShader) { fprintf(fpout, "// ---------- %s Compute Shader ----------\n\n", basename); // Write this first since the uniforms refer to them. _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_CS_initial_values", BASEMAX), (u32 *) pShaders->pComputeShader->initialValues, pShaders->pComputeShader->numInitialValues * sizeof(GX2UniformInitialValue), ""); // Swap endian for GPU7 if(swapMode != GFD_ENDIAN_SWAP_MODE_8_IN_32) { int nElem = (pShaders->pComputeShader->shaderSize + 0x3) / 4; GFDEndianSwap8in32((u32*)pShaders->pComputeShader->shaderPtr, nElem); } if ( pShadersSrc && pShadersSrc->pComputeShader ) { char *next_token = NULL; char *p = strtok_s((char*)pShadersSrc->pComputeShader, "\r\n", &next_token); fprintf(fpout, "// Source Compute Shader\n"); while ( p ) { fprintf(fpout, "// %s\n", p); p = strtok_s(NULL, "\r\n", &next_token); } fprintf(fpout, "// End Source Compute Shader\n"); } // Then, write out the shader pieces, // and finally, write the shader itself. _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_CS_shaderPtr", BASEMAX), (u32 *) pShaders->pComputeShader->shaderPtr, pShaders->pComputeShader->shaderSize, "ALIGNVAR(256)"); _GFDGX2UniformBlockAsCode(fpout, makeName(tempname, basename, "_CS", BASEMAX), pShaders->pComputeShader->numUniformBlocks, pShaders->pComputeShader->uniformBlocks); _GFDWriteGX2SamplersAsCode(fpout, makeName(tempname, basename, "_CS", BASEMAX), pShaders->pComputeShader->numSamplers, pShaders->pComputeShader->samplerVars); _GFDWriteWordsAsCode(fpout, makeName(tempname, basename, "_CS_loop_vars", BASEMAX), (u32 *) pShaders->pComputeShader->_loopVars, pShaders->pComputeShader->_numLoops * sizeof(GFDLoopVar), ""); _GFDWriteGX2UniformsAsCode(fpout, makeName(tempname, basename, "_CS", BASEMAX), pShaders->pComputeShader->numUniforms, pShaders->pComputeShader->uniformVars, pShaders->pComputeShader->initialValues); fprintf(fpout, "\n"); fprintf(fpout, "\n"); fprintf(fpout, "static GX2ComputeShader %s_CS = {\n", basename); fprintf(fpout, " { // _regs\n"); _GFDWriteWordsAsHex(fpout, (u32 *) &pShaders->pComputeShader->_regs, sizeof(GFDComputeShaderRegs)); fprintf(fpout, "\n },\n"); fprintf(fpout, " %u,\n", pShaders->pComputeShader->shaderSize); fprintf(fpout, " (void *) %s_CS_shaderPtr,\n", basename); fprintf(fpout, " %u,\n", pShaders->pComputeShader->numUniformBlocks); if (pShaders->pComputeShader->numUniformBlocks) { fprintf(fpout, " %s_CS_uniform_blocks,\n", basename); } else { fprintf(fpout, " NULL,\n"); } fprintf(fpout, " %u,\n", pShaders->pComputeShader->numUniforms); if (pShaders->pComputeShader->numUniforms) { fprintf(fpout, " %s_CS_uniforms,\n", basename); } else { fprintf(fpout, " NULL,\n"); } fprintf(fpout, " %u,\n", pShaders->pComputeShader->numInitialValues); if (pShaders->pComputeShader->numInitialValues) { fprintf(fpout, " (GX2UniformInitialValue *) %s_CS_initial_values,\n", basename); } else { fprintf(fpout, " NULL,\n"); } fprintf(fpout, " %u,\n", pShaders->pComputeShader->_numLoops); if (pShaders->pComputeShader->_numLoops) { fprintf(fpout, " (GFDLoopVar *) %s_CS_loop_vars,\n", basename); } else { fprintf(fpout, " NULL,\n"); } fprintf(fpout, " %u,\n", pShaders->pComputeShader->numSamplers); if (pShaders->pComputeShader->numSamplers) { fprintf(fpout, " %s_CS_sampler_vars,\n", basename); } else { fprintf(fpout, " NULL,\n"); } fprintf(fpout, " %u,\n", pShaders->pComputeShader->layout_size_x); fprintf(fpout, " %u,\n", pShaders->pComputeShader->layout_size_y); fprintf(fpout, " %u,\n", pShaders->pComputeShader->layout_size_z); fprintf(fpout, " (GX2Boolean)%d,\n", (u32)pShaders->pComputeShader->Over64Mode); fprintf(fpout, " %d,\n", (u32)pShaders->pComputeShader->numWavesPerSIMD); // end of structure fprintf(fpout, "};\n\n\n"); } GFDCloseFile(fpout); return true; } GFD_DECLSPEC bool GFD_API GFDWriteFileShaderAsCode(char* pFilename, GFDEndianSwapMode swapMode, const GFDShaders *pShadersOrig) { GFDShaders2 shaders = {0}; if (!pShadersOrig) { return false; } // Convert to new version of the structure shaders.abiVersion = GFD_DLL_ABI_VERSION; shaders.pVertexShader = pShadersOrig->pVertexShader; shaders.pGeometryShader = pShadersOrig->pGeometryShader; shaders.pPixelShader = pShadersOrig->pPixelShader; return GFDWriteFileShaderAsCodeWithSource(pFilename, swapMode, &shaders, NULL); } GFD_DECLSPEC bool GFD_API GFDWriteFileShaderAsCode2(char* pFilename, GFDEndianSwapMode swapMode, const GFDShaders2 *pShaders) { if (!pShaders) { return false; } if ( GFD_DLL_ABI_TYPE(pShaders->abiVersion) != GFD_DLL_ABI_TYPE(GFD_DLL_ABI_VERSION) || GFD_DLL_ABI_VERSION_NUM(pShaders->abiVersion) > 0) { return false; } return GFDWriteFileShaderAsCodeWithSource(pFilename, swapMode, pShaders, NULL); }