1 /*---------------------------------------------------------------------------*
2   Project:  Horizon
3   File:     gr_ShaderLite.cpp
4 
5   Copyright (C)2009-2012 Nintendo Co., Ltd.  All rights reserved.
6 
7   These coded instructions, statements, and computer programs contain
8   proprietary information of Nintendo of America Inc. and/or Nintendo
9   Company Ltd., and are protected by Federal copyright law.  They may
10   not be disclosed to third parties or copied or duplicated in any form,
11   in whole or in part, without the prior written consent of Nintendo.
12 
13   $Rev: 47306 $
14  *---------------------------------------------------------------------------*/
15 
16 #include <nn/gr/CTR/gr_ShaderLite.h>
17 
18 namespace nn
19 {
20     namespace gr
21     {
22         namespace CTR
23         {
ShaderLite(void)24             ShaderLite::ShaderLite( void ) :
25             m_VtxShaderIndex( 0 ),
26             m_GeoShaderIndex( - 1 ),
27             m_ExeImageInfoNum( 0 ),
28             m_InstructionCount( 0 ),
29             m_SwizzleCount( 0 ),
30             m_DrawMode( PICA_DATA_DRAW_TRIANGLES ),
31             m_VtxShaderBoolMapUniform( 0 ),
32             m_GeoShaderBoolMapUniform( 0 ),
33             m_GeometryShaderNum( 0 )
34             {
35             }
36 
GetRequiredBufferSize(const void * shader_binary)37             u32 ShaderLite::GetRequiredBufferSize( const void* shader_binary )
38             {
39                 const bit32* binary = reinterpret_cast< const bit32* >( shader_binary );
40                 NN_GR_ASSERT( binary != NULL );
41 
42                 NN_GR_ASSERT( *binary == 0x424C5644 ); // DVLB
43                 ++binary;
44 
45                 NN_GR_ASSERT( *binary < EXE_IMAGE_MAX );
46 
47                 // Number of execution images in the shader binary.
48                 m_ExeImageInfoNum = *binary;
49                 ++binary;
50 
51                 // Saves the pointer to the execution images in the shader binary
52                 for ( s32 i = 0; i < m_ExeImageInfoNum; ++i )
53                 {
54                     m_ExeImageInfo[ i ] =
55                         reinterpret_cast< const ExeImageInfo* >( (u8*)shader_binary + *binary );
56                     NN_GR_ASSERT( m_ExeImageInfo[ i ]->signature == 0x454c5644 ); // DVLP
57                     ++binary;
58                 }
59 
60                 NN_GR_ASSERT( *binary == 0x504C5644 ); // DVLP
61                 ++binary;
62                 ++binary;
63                 ++binary;       // instruction
64                 ++binary;       // instruction count
65                 ++binary;       // swizzle
66 
67                 m_SwizzleCount = *binary;
68                 NN_GR_ASSERT( m_SwizzleCount <= SWIZZLE_PATTERN_MAX );
69 
70                 return CalculateBufferSize_();
71             }
72 
SetupBinary(const void * shader_binary,const s32 vtx_shader_index,const s32 geo_shader_index,bit32 * buffer)73             void ShaderLite::SetupBinary( const void* shader_binary, const s32 vtx_shader_index, const s32 geo_shader_index, bit32* buffer )
74             {
75                 const bit32* binary = reinterpret_cast< const bit32* >( shader_binary );
76                 NN_GR_ASSERT( binary != NULL );
77 
78                 NN_GR_ASSERT( *binary == 0x424C5644 ); // DVLB
79                 ++binary;
80 
81                 NN_GR_ASSERT( *binary < EXE_IMAGE_MAX );
82 
83                 // Number of execution images in the shader binary.
84                 m_ExeImageInfoNum = *binary;
85                 ++binary;
86 
87                 m_VtxShaderBoolMapUniform = 0;
88                 m_GeoShaderBoolMapUniform = 0;
89 
90                 // Saves the pointer to the execution images in the shader binary
91                 for ( s32 i = 0; i < m_ExeImageInfoNum; ++i )
92                 {
93                     m_ExeImageInfo[ i ] =
94                         reinterpret_cast< const ExeImageInfo* >( (u8*)shader_binary + *binary );
95                     NN_GR_ASSERT( m_ExeImageInfo[ i ]->signature == 0x454c5644 ); // DVLP
96                     ++binary;
97                 }
98 
99                 const bit32* package_info = binary;
100                 NN_GR_ASSERT( *binary == 0x504C5644 ); // DVLP
101                 ++binary;
102                 ++binary;
103 
104                 m_Instruction = reinterpret_cast< const bit32* >( (u8*)package_info + *binary );
105                 ++binary;
106 
107                 m_InstructionCount = *binary;
108                 ++binary;
109 
110                 const bit32* swizzle = reinterpret_cast< const bit32* >( (u8*)package_info + *binary );
111                 ++binary;
112 
113                 m_SwizzleCount = *binary;
114                 NN_GR_ASSERT( m_SwizzleCount <= SWIZZLE_PATTERN_MAX );
115                 ++binary;
116 
117                 // Buffer allocation
118                 NN_NULL_ASSERT(buffer);
119                 AssignBuffer_( buffer );
120 
121                 for ( u32 i = 0; i < m_SwizzleCount; i++ )
122                 {
123                     m_Swizzle[ i ] = swizzle[ i * 2 ] ;
124                 }
125 
126                 PicaDataDrawMode drawMode     = m_DrawMode;
127 
128                 MakeShaderConstCommandCache_();
129                 MakeRgCmdOffsetCache_();
130 
131                 SetShaderIndex( vtx_shader_index, geo_shader_index );
132                 MakeShaderOutAttrCommandCache_();
133 
134                 if ( ! IsEnableGeoShader() )
135                 {
136                     m_DrawMode                = drawMode;
137                 }
138             }
139 
140             //------------------------------------------------------------------------
141 
SetShaderIndex(const s32 vtx_shader_index,const s32 geo_shader_index)142             void ShaderLite::SetShaderIndex( const s32 vtx_shader_index, const s32 geo_shader_index )
143             {
144                 // Vertex shader and geometry shader test
145                 CheckVtxShaderIndex_( vtx_shader_index );
146                 // Geometry shader test
147                 CheckGeoShaderIndex_( geo_shader_index );
148 
149                 m_VtxShaderIndex     = vtx_shader_index;
150                 m_GeoShaderIndex     = geo_shader_index;
151                 m_CombShaderIndex    = m_CmdCacheOutAttrIndexArray[m_VtxShaderIndex + 1] + m_CmdCacheOutAttrIndexArray[m_GeoShaderIndex + 1];
152 
153                 if ( IsEnableGeoShader() )
154                 {
155                     m_DrawMode = PICA_DATA_DRAW_GEOMETRY_PRIMITIVE;
156                 }
157 
158             }
159 
160             //------------------------------------------------------------------------
161 
MakeShaderConstCommandCache_(void)162             void ShaderLite::MakeShaderConstCommandCache_(void)
163             {
164                 u32 offset = 0;
165                 for ( s32 shader_index = 0; shader_index < m_ExeImageInfoNum; shader_index++ )
166                 {
167                     // Create constant register command cache.
168                     MakeConstRgCommand_( &m_CmdCacheConstArray[ offset ], shader_index );
169                     offset += m_CmdCacheConstNumArray[ shader_index ];
170 
171                     NN_GR_ASSERT( m_CmdCacheConstNumArray[ shader_index ] <= CONST_REG_COMMAND_MAX );
172                 }
173             }
174 
175             //------------------------------------------------------------------------
176 
MakeShaderOutAttrCommandCache_(void)177             void ShaderLite::MakeShaderOutAttrCommandCache_( void )
178             {
179                 u8  shader_idx = 0;
180                 s32 tmp        = m_GeoShaderIndex;
181                 u32 offset     = 0;
182                 for ( u32 vert_idx = 0; vert_idx < m_ExeImageInfoNum; vert_idx++ )
183                 {
184                     if( !m_ExeImageInfo[vert_idx]->isGeoShader )
185                     {
186                         m_GeoShaderIndex = -1;
187                         // Generate a command cache for output attributes. (No geometry shader)
188                         MakeOutAttrCommand_( &m_CmdCacheOutAttrArray[ offset ],
189                                              vert_idx,
190                                              m_GeoShaderIndex );
191                         offset += m_CmdCacheOutAttrNumArray[ shader_idx ];
192                         shader_idx++;
193 
194                         for ( u32 geo_idx = 0; geo_idx < m_ExeImageInfoNum; geo_idx++ )
195                         {
196                             if( m_ExeImageInfo[geo_idx]->isGeoShader )
197                             {
198                                 m_GeoShaderIndex = geo_idx;
199                                 // Generate a command cache for output attributes. (With geometry shader)
200                                 MakeOutAttrCommand_( &m_CmdCacheOutAttrArray[ offset ],
201                                                      vert_idx,
202                                                      m_GeoShaderIndex );
203                                 offset += m_CmdCacheOutAttrNumArray[ shader_idx ];
204                                 shader_idx++;
205                             }
206                         }
207                     }
208                 }
209                 m_GeoShaderIndex = tmp;
210             }
211 
212             //------------------------------------------------------------------------
213 
MakeFullCommand(bit32 * command) const214             bit32* ShaderLite::MakeFullCommand( bit32* command ) const
215             {
216                 // Generate commands such as those for configuring geometry shader use
217                 {
218                     command = MakePrepareCommand( command );
219                 }
220 
221                 // Generate geometry shader commands
222                 if ( IsEnableGeoShader() )
223                 {
224                     command = MakeGeoProgramCommand( command );
225                     command = MakeGeoSwizzleCommand( command );
226                     command = MakeGeoConstRgCommand( command );
227                     command = MakeGeoBoolMapCommand( command );
228                 }
229 
230                 // Generate vertex shader commands
231                 {
232                     command = MakeVtxProgramCommand( command );
233                     command = MakeVtxSwizzleCommand( command );
234                     command = MakeVtxConstRgCommand( command );
235                     command = MakeVtxBoolMapCommand( command );
236                 }
237 
238                 // Generate commands related to output attributes (outmap)
239                 {
240                     command = MakeOutAttrCommand( command );
241                 }
242 
243                 return command;
244             }
245 
246             //------------------------------------------------------------------------
247 
MakeDisableCommand(bit32 * command)248             bit32* ShaderLite::MakeDisableCommand( bit32* command )
249             {
250                 const bool isEnableGeometryShader = false;
251                 const PicaDataDrawMode drawMode   = PICA_DATA_DRAW_TRIANGLES;
252 
253                 command = MakeShaderModeCommand_( command,
254                                                   isEnableGeometryShader,
255                                                   drawMode );
256 
257                 return command;
258             }
259 
260             //------------------------------------------------------------------------
261 
MakeShaderCommand(bit32 * command,const bool isMakePrepareCommand) const262             bit32* ShaderLite::MakeShaderCommand( bit32* command, const bool isMakePrepareCommand ) const
263             {
264                 // Generate commands such as those for configuring geometry shader use
265                 if ( isMakePrepareCommand )
266                 {
267                     command = MakePrepareCommand( command );
268                 }
269 
270                 // Generate commands for the geometry shader constant registers
271                 if ( IsEnableGeoShader() )
272                 {
273                     command = MakeGeoConstRgCommand( command );
274                     command = MakeGeoBoolMapCommand( command );
275                 }
276 
277                 // Generate commands for the vertex shader constant registers
278                 {
279                     command = MakeVtxConstRgCommand( command );
280                     command = MakeVtxBoolMapCommand( command );
281                 }
282 
283                 // Generate commands related to output attributes (outmap)
284                 {
285                     command = MakeOutAttrCommand( command );
286                 }
287 
288                 return command;
289             }
290 
291             //------------------------------------------------------------------------
292 
MakePrepareCommand(bit32 * command) const293             bit32* ShaderLite::MakePrepareCommand( bit32* command ) const
294             {
295                 bool isEnableGeoShader    = IsEnableGeoShader();
296                 PicaDataDrawMode drawMode = m_DrawMode;
297 
298                 command =
299                     MakeShaderModeCommand_( command,
300                                             isEnableGeoShader,
301                                             drawMode );
302 
303                 return command;
304             }
305 
306             //------------------------------------------------------------------------
307 
MakeVtxProgramCommand(bit32 * command) const308             bit32* ShaderLite::MakeVtxProgramCommand( bit32* command ) const
309             {
310                 s32 shader_index   = GetVtxShaderIndex();
311                 bit32 reg_addr     = PICA_REG_VS_PROG_ADDR;        // 0x2cb
312                 bit32 reg_load     = PICA_REG_VS_PROG_DATA0;       // 0x2cc
313                 bit32 reg_end      = PICA_REG_VS_PROG_UPDATE_END;  // 0x2bf
314 
315                 { // Set the program code load address
316                     *command++ = 0;
317                     *command++ = PICA_CMD_HEADER_SINGLE( reg_addr );
318                 }
319 
320                 { // Load program code
321                     NN_GR_ASSERT( 0 <= shader_index && shader_index < m_ExeImageInfoNum );
322 
323                     const ExeImageInfo* exe_info = m_ExeImageInfo[ shader_index ];
324 
325                     NN_UNUSED_VAR( exe_info );
326 
327                     u32 instructionCount = m_InstructionCount;
328                     if ( instructionCount > 512 )
329                     {
330                         instructionCount = 512;
331                     }
332 
333                     command = MakeLoadCommand_( command, reg_load,
334                                                 m_Instruction,
335                                                 m_InstructionCount < 512 ? m_InstructionCount : 512 );
336                 }
337 
338                 { // Notification that program update finished
339                     *command++ = 1;
340                     *command++ = PICA_CMD_HEADER_SINGLE( reg_end );
341                 }
342 
343                 return command;
344             }
345 
346             //------------------------------------------------------------------------
347 
MakeGeoProgramCommand(bit32 * command) const348             bit32* ShaderLite::MakeGeoProgramCommand( bit32* command ) const
349             {
350                 s32 shader_index   = GetGeoShaderIndex();
351                 bit32 reg_addr     = PICA_REG_GS_PROG_ADDR;        // 0x29b
352                 bit32 reg_load     = PICA_REG_GS_PROG_DATA0;       // 0x29c
353                 bit32 reg_end      = PICA_REG_GS_PROG_UPDATE_END;  // 0x28f
354 
355                 { // Set the program code load address
356                     *command++ = 0;
357                     *command++ = PICA_CMD_HEADER_SINGLE( reg_addr );
358                 }
359 
360                 { // Load program code
361                     NN_GR_ASSERT( ( 0 <= shader_index ) &&
362                                   ( shader_index < m_ExeImageInfoNum ) );
363 
364                     const ExeImageInfo* exe_info = m_ExeImageInfo[ shader_index ];
365 
366                     NN_UNUSED_VAR( exe_info );
367 
368                     command = MakeLoadCommand_( command, reg_load,
369                                                 m_Instruction,
370                                                 m_InstructionCount );
371                 }
372 
373                 { // Notification that program update finished
374                     *command++ = 1;
375                     *command++ = PICA_CMD_HEADER_SINGLE( reg_end );
376                 }
377 
378                 return command;
379             }
380 
381             //------------------------------------------------------------------------
382 
MakeShaderModeCommand_(bit32 * command,const bool isEnableGeoShader,const PicaDataDrawMode drawMode)383             bit32* ShaderLite::MakeShaderModeCommand_( bit32* command,
384                                                    const bool isEnableGeoShader,
385                                                    const PicaDataDrawMode drawMode )
386             {
387                 // Set to 0x25e[9:8]
388                 {
389                     if ( isEnableGeoShader )
390                     {
391                         *command++ = PICA_DATA_DRAW_GEOMETRY_PRIMITIVE << 8;
392                     }
393                     else
394                     {
395                         *command++ = drawMode << 8;
396                     }
397                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_VS_OUT_REG_NUM3, 2 );
398                 }
399 
400                 // Dummy command to 0x251
401                 {
402                     command = MakeDummyCommand_( command, PICA_REG_VS_OUT_REG_NUM2, DUMMY_DATA_NUM_251 );
403                 }
404 
405                 // Dummy command to 0x200
406                 {
407                     command = MakeDummyCommand_( command, PICA_REG_VERTEX_ATTR_ARRAYS_BASE_ADDR, DUMMY_DATA_NUM_200 );
408                 }
409 
410                 // 0x229[1:0] Sets the geometry shader to enabled or disabled
411                 {
412                     *command++ = isEnableGeoShader ? 2 : 0;
413                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_DRAW_MODE0, 1 );
414                 }
415 
416                 // Dummy command to 0x200
417                 {
418                     command = MakeDummyCommand_( command, PICA_REG_VERTEX_ATTR_ARRAYS_BASE_ADDR, DUMMY_DATA_NUM_200 );
419                 }
420 
421                 // 0x244 Whether the geometry shader settings are shared with the vertex shader
422                 {
423                     *command++ = isEnableGeoShader ? 1 : 0;
424                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_VS_COM_MODE, 1 );
425                 }
426 
427                 return command;
428             }
429 
430             //------------------------------------------------------------------------
431 
CheckVtxShaderIndex_(const s32 vtx_shader_index)432             void ShaderLite::CheckVtxShaderIndex_( const s32 vtx_shader_index )
433             {
434                 NN_UNUSED_VAR( vtx_shader_index );
435 
436                 // Vertex shader index range test
437                 NN_GR_ASSERT( ( 0 <= vtx_shader_index ) && ( vtx_shader_index < GetShaderNum() ) );
438 
439                 // Geometry shader integrity test
440                 NN_GR_ASSERT( ! m_ExeImageInfo[ vtx_shader_index ]->isGeoShader );
441             }
442 
443             //------------------------------------------------------------------------
444 
CheckGeoShaderIndex_(const s32 geo_shader_index)445             void ShaderLite::CheckGeoShaderIndex_( const s32 geo_shader_index )
446             {
447                 NN_UNUSED_VAR( geo_shader_index );
448 
449                 // Geometry shader index range test
450                 NN_GR_ASSERT( m_GeoShaderIndex < GetShaderNum() );
451 
452                 // Geometry shader integrity test
453                 if ( geo_shader_index > - 1 )
454                 {
455                     NN_GR_ASSERT( m_ExeImageInfo[ geo_shader_index ]->isGeoShader );
456                 }
457             }
458 
459             //------------------------------------------------------------------------
460 
MakeConstRgCommand_(bit32 * command,const s32 shader_index)461             bit32* ShaderLite::MakeConstRgCommand_( bit32* command,
462                                                 const s32 shader_index )
463             {
464                 bit32  reg_float     = PICA_REG_VS_FLOAT_ADDR; // 0x2c0
465                 bit32  reg_integer   = PICA_REG_VS_INT0;       // 0x2b1
466                 bit32* boolMap       = &m_VtxShaderBoolMapUniform;
467 
468                 bool is_geometry_shader = m_ExeImageInfo[ shader_index ]->isGeoShader;
469                 if ( is_geometry_shader )
470                 {
471                     reg_float    = PICA_REG_GS_FLOAT_ADDR; // 0x290
472                     reg_integer  = PICA_REG_GS_INT0;       // 0x281
473                     boolMap      = &m_GeoShaderBoolMapUniform;
474                 }
475 
476                 // Program information
477                 NN_GR_ASSERT( ( 0 <= shader_index ) &&
478                               ( shader_index < m_ExeImageInfoNum ) );
479                 const ExeImageInfo* exe_info = m_ExeImageInfo[ shader_index ];
480 
481                 // Constant register information
482                 struct SetupInfo
483                 {
484                     u16 type;
485                     u16 index;
486                     bit32 value[4];
487                 };
488 
489                 const SetupInfo* setupInfo =
490                     reinterpret_cast< const SetupInfo* >(
491                         reinterpret_cast< const u8* >( exe_info ) + exe_info->setupOffset );
492 
493                 // Constant register command generation
494                 for ( s32 i = 0; i < exe_info->setupCount; ++i )
495                 {
496                     const SetupInfo& info = setupInfo[ i ];
497                     const bit32* value = info.value;
498 
499                     switch ( info.type )
500                     {
501                     case 0 : // bool register setup
502                         *boolMap |= ( info.value[ 0 ] << info.index ) & ( 1 << info.index );
503                         break;
504 
505                     case 1 : // integer register setup
506                         *command++ = value[ 0 ] | value[ 1 ] <<  8 | value[ 2 ] << 16 | value[ 3 ] << 24;
507                         *command++ = PICA_CMD_HEADER_SINGLE( reg_integer + info.index );
508                         break;
509 
510                     case 2 : // float register setup
511                         *command++ = info.index;
512                         *command++ = PICA_CMD_HEADER_BURSTSEQ( reg_float, 4 );
513                         *command++ = ( value[ 3 ] <<  8 & 0xffffff00 ) | ( value[ 2 ] >> 16 & 0x000000ff );
514                         *command++ = ( value[ 2 ] << 16 & 0xffff0000 ) | ( value[ 1 ] >>  8 & 0x0000ffff );
515                         *command++ = ( value[ 1 ] << 24 & 0xff000000 ) | ( value[ 0 ] >>  0 & 0x00ffffff );
516                         *command++ = PADDING_DATA; // Padding
517                         break;
518                     }
519                 }
520 
521                 return command;
522             }
523 
524             //------------------------------------------------------------------------
525 
MakeOutAttrCommand_(bit32 * command,const s32 vtx_shader_index,const s32 geo_shader_index)526             bit32* ShaderLite::MakeOutAttrCommand_( bit32* command,
527                                                 const s32 vtx_shader_index,
528                                                 const s32 geo_shader_index )
529             {
530                 s32 shader_index = vtx_shader_index;
531 
532                 bool is_geometry_shader = false;
533                 if ( 0 <= GetGeoShaderIndex() )
534                 {
535                     is_geometry_shader = true;
536 
537                     shader_index = geo_shader_index;
538                 }
539 
540                 NN_GR_ASSERT(  0 <= shader_index && shader_index < m_ExeImageInfoNum );
541 
542                 // Set output attributes
543                 // When the geometry shader is enabled,
544                 // The output attributes configure the geometry shader
545 
546                 const s32 OUT_ATTR_INDEX_MAX     = 7;
547                 const s32 OUT_ATTR_DIMENTION_MAX = 4;
548                 const s32 OUT_ATTR_BUFFER_MAX    = 16 * 4;
549                 const s32 VS_OUT_ATTR_INDEX_MAX  = 16;
550 
551                 // Output attribute information
552                 struct OutmapInfo
553                 {
554                     u16 type;
555                     u16 index;
556                     u16 mask;
557                     u16 reserve;
558                 };
559 
560                 u32 outNum  = 0;
561                 bit32 useTex  = 0;
562                 bit32 clock   = 0;
563                 bit32 outMask = 0;
564                 bit32 attr[ OUT_ATTR_INDEX_MAX ];
565 
566                 // Get output attribute settings
567                 {
568                     // Program information
569                     const ExeImageInfo* exe_info = m_ExeImageInfo[ shader_index ];
570 
571                     // Actual output attribute information
572                     OutmapInfo outmap_buffer[ OUT_ATTR_BUFFER_MAX ];
573                     s32 outMapBufferCount = 0;
574 
575                     // Merge
576                     if ( is_geometry_shader && exe_info->outputMaps )
577                     {
578                         bit32 gs_copy_mask = 0;
579                         bit32 vs_copy_mask = 0;
580                         // GS output attribute information
581                         const OutmapInfo* outmapInfo =
582                             reinterpret_cast< const OutmapInfo* >(
583                                 reinterpret_cast< const u8* >( exe_info ) + exe_info->outMapOffset );
584                         // VS program information
585                         NN_GR_ASSERT(  0 <= vtx_shader_index && vtx_shader_index < m_ExeImageInfoNum );
586                         const ExeImageInfo* vtx_exe_info = m_ExeImageInfo[ vtx_shader_index ];
587 
588                         // VS output attribute information
589                         const OutmapInfo* vtxOutmapInfo =
590                             reinterpret_cast< const OutmapInfo* >(
591                                 reinterpret_cast< const u8* >( vtx_exe_info ) + vtx_exe_info->outMapOffset );
592 
593                         // Merge all output attributes other than generic output attributes that are defined by both VS and GS
594                         NN_GR_ASSERT( outMapBufferCount < OUT_ATTR_BUFFER_MAX );
595                         for( s32 g = 0; g < exe_info->outMapCount; ++g )
596                         {
597                             // Exclude GS generic attributes (value 9)
598                             if ( ( outmapInfo[ g ].type >= 0 ) &&
599                                  ( outmapInfo[ g ].type <  9 ) &&
600                                  ( outmapInfo[ g ].type != 7 ) )
601                             {
602                                 for( s32 v = 0; v < vtx_exe_info->outMapCount; ++v )
603                                 {
604                                     // Exclude VS generic attributes (value 9)
605                                     if ( ( vtxOutmapInfo[ v ].type >= 0 ) &&
606                                          ( vtxOutmapInfo[ g ].type <  9 ) &&
607                                          ( vtxOutmapInfo[ g ].type != 7 ) )
608                                     {
609                                         if ( outmapInfo[ g ].type == vtxOutmapInfo[ v ].type )
610                                         {
611                                             NN_GR_ASSERT( outMapBufferCount < OUT_ATTR_INDEX_MAX );
612                                             outmap_buffer[ outMapBufferCount ].type = outmapInfo[ g ].type;
613                                             outmap_buffer[ outMapBufferCount ].index = outMapBufferCount;
614                                             outmap_buffer[ outMapBufferCount ].mask = outmapInfo[ g ].mask;
615                                             gs_copy_mask |= 1 << g;
616                                             vs_copy_mask |= 1 << v;
617                                             ++outMapBufferCount;
618                                         }
619                                     }
620                                 }
621                             }
622                         }
623 
624                         // Next, sets the output attributes defined with only GS
625                         for( s32 g = 0; g < exe_info->outMapCount; ++g )
626                         {
627                             if ( ( !( gs_copy_mask & ( 1 << g ) )) &&
628                                  ( outmapInfo[ g ].type >= 0  )    &&
629                                  ( outmapInfo[ g ].type < 9 )      &&
630                                  ( outmapInfo[ g ].type != 7 ) )
631                             {
632                                 NN_GR_ASSERT( outMapBufferCount < OUT_ATTR_BUFFER_MAX );
633                                 outmap_buffer[ outMapBufferCount ].type = outmapInfo[ g ].type;
634                                 outmap_buffer[ outMapBufferCount ].index = outMapBufferCount;
635                                 outmap_buffer[ outMapBufferCount ].mask = outmapInfo[ g ].mask;
636                                 ++outMapBufferCount;
637                             }
638                         }
639 
640                         // Last, sets the output attributes defined with only VS
641                         for( s32 v = 0; v < vtx_exe_info->outMapCount; ++v )
642                         {
643                             if ( ( !( vs_copy_mask & ( 1 << v ) ) ) &&
644                                  ( vtxOutmapInfo[ v ].type >= 0 )   &&
645                                  ( vtxOutmapInfo[ v ].type < 9  )   &&
646                                  ( vtxOutmapInfo[ v ].type != 7 ) )
647                             {
648                                 NN_GR_ASSERT( outMapBufferCount < OUT_ATTR_BUFFER_MAX );
649                                 outmap_buffer[ outMapBufferCount ].type = vtxOutmapInfo[ v ].type;
650                                 outmap_buffer[ outMapBufferCount ].index = outMapBufferCount;
651                                 outmap_buffer[ outMapBufferCount ].mask = vtxOutmapInfo[ v ].mask;
652                                 ++outMapBufferCount;
653                             }
654                         }
655                     }
656                     else
657                     {
658                         // Output attribute information
659                         const OutmapInfo* outmapInfo =
660                             reinterpret_cast< const OutmapInfo* >(
661                                 reinterpret_cast< const u8* >( exe_info ) + exe_info->outMapOffset );
662                         // Copy when not merging
663                         for( s32 i = 0; i < exe_info->outMapCount; ++i )
664                         {
665                             outmap_buffer[ i ] = outmapInfo[ i ];
666                         }
667                         outMapBufferCount = exe_info->outMapCount;
668                     }
669 
670                     for ( s32 index = 0; index < OUT_ATTR_INDEX_MAX; ++index )
671                     {
672                         attr[ index ] = 0x1f1f1f1f;
673                         for ( s32 i = 0; i <outMapBufferCount; ++i )
674                         {
675                             bit32 c = 0;
676                             for ( s32 j = 0; outmap_buffer[ i ].index == index && j < OUT_ATTR_DIMENTION_MAX; ++j )
677                             {
678                                 if ( ( outmap_buffer[ i ].mask & ( 1 << j ) ) == 0 ) continue;
679 
680                                 s32 value = 0x1f;
681                                 switch ( outmap_buffer[ i ].type )
682                                 {
683                                 case 0  :            value = 0x00 + c++; if (c == 2) clock |= 1 <<  0; break; // position
684                                 case 1  :            value = 0x04 + c++;             clock |= 1 << 24; break; // quaternion
685                                 case 2  :            value = 0x08 + c++;             clock |= 1 <<  1; break; // color
686                                 case 3  : if (c < 2) value = 0x0c + c++; useTex = 1; clock |= 1 <<  8; break; // texcoord0
687                                 case 4  :            value = 0x10;       useTex = 1; clock |= 1 << 16; break; // texcoord0w
688                                 case 5  : if (c < 2) value = 0x0e + c++; useTex = 1; clock |= 1 <<  9; break; // texcoord1
689                                 case 6  : if (c < 2) value = 0x16 + c++; useTex = 1; clock |= 1 << 10; break; // texcoord2
690                                 case 8  : if (c < 3) value = 0x12 + c++;             clock |= 1 << 24; break; // view
691                                 }
692                                 attr[ index ] = attr[ index ] & ~( 0xff << ( j * 8 ) ) | value << ( j * 8 );
693                             }
694                         }
695                         if ( attr[ index ] != 0x1f1f1f1f )
696                         {
697                             outMask |= ( 1 << index );
698                             ++outNum;
699                         }
700                     }
701                 }
702 
703                 if ( is_geometry_shader )
704                 {
705                     // Gets the vertex shader output attribute settings
706                     //m_DrawMode = PICA_DATA_DRAW_GEOMETRY_PRIMITIVE;
707 
708                     u32 vtxOutNum = 0;
709                     bit32 vtxOutMask = 0;
710                     bit32 vtxAttr[ VS_OUT_ATTR_INDEX_MAX ];
711 
712                     // Program information
713                     const ExeImageInfo* exe_info = m_ExeImageInfo[ vtx_shader_index ];
714                     const OutmapInfo* outmapInfo =
715                         reinterpret_cast< const OutmapInfo* >(
716                             reinterpret_cast< const u8* >( exe_info ) + exe_info->outMapOffset );
717 
718                     for ( s32 index = 0; index < VS_OUT_ATTR_INDEX_MAX; ++index )
719                     {
720                         vtxAttr[ index ] = 0x1f1f1f1f;
721                         for ( s32 i = 0; i < exe_info->outMapCount; ++i )
722                         {
723                             u32 c = 0;
724                             for ( s32 j = 0; outmapInfo[ i ].index == index && j < OUT_ATTR_DIMENTION_MAX; ++j )
725                             {
726                                 if ( ( outmapInfo[ i ].mask & ( 1 << j ) ) == 0 ) continue;
727 
728                                 s32 value = 0x1f;
729                                 switch ( outmapInfo[ i ].type )
730                                 {
731                                 case 0  :            value = 0x00 + c++; break; // position
732                                 case 1  :            value = 0x04 + c++; break; // Quaternion
733                                 case 2  :            value = 0x08 + c++; break; // color
734                                 case 3  : if (c < 2) value = 0x0c + c++; break; // texcoord0
735                                 case 4  :            value = 0x10;       break; // texcoord0w
736                                 case 5  : if (c < 2) value = 0x0e + c++; break; // texcoord1
737                                 case 6  : if (c < 2) value = 0x16 + c++; break; // texcoord2
738                                 case 8  : if (c < 3) value = 0x12 + c++; break; // view
739                                 case 9  : value = 0xff;
740                                 }
741                                 vtxAttr[ index ] = vtxAttr[ index ] & ~( 0xff << ( j * 8 ) ) | value << ( j * 8 );
742                             }
743                         }
744                         if ( vtxAttr[ index ] != 0x1f1f1f1f )
745                         {
746                             vtxOutMask |= ( 1 << index );
747                             ++vtxOutNum;
748                         }
749                     }
750 
751                     bit32 gsDataMode = m_ExeImageInfo[ geo_shader_index ]->gsDataMode;
752 
753                     // GL_GEOMETRY_PRIMITIVE_DMP
754 
755                     // 0x229 [31:31],
756                     // For data mode 1
757                     if ( gsDataMode == 1 )
758                     {
759                         *command++ = 0x80000000;
760                     }
761                     // For data mode 0 and data mode 2
762                     else
763                     {
764                         *command++ = 0x00000000;
765                     }
766                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_DRAW_MODE0, 0xa );
767 
768                     // 0x253
769                     *command++ = 0x00000000;
770                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_DRAW_MODE1, 0x3 );
771 
772                     // 0x289 Vertex shader output count
773                     *command++ = 0x08000000 | (gsDataMode == 0 ? 0x0000 : 0x0100) | vtxOutNum - 1;
774                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_GS_ATTR_NUM, 0xb );
775 
776                     // 0x28a Geometry shader main label address
777                     *command++ = 0x7fff0000 | m_ExeImageInfo[ geo_shader_index ]->mainAddress;
778                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_GS_START_ADDR );
779 
780                     // 0x28d Vertex shader output mask
781                     *command++ = outMask;
782                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_GS_OUT_REG_MASK );
783 
784                     // 0x2ba Vertex shader main label address
785                     *command++ = 0x7fff0000 | m_ExeImageInfo[ vtx_shader_index ]->mainAddress;
786                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_START_ADDR );
787 
788                     // 0x2bd Vertex shader output register mask
789                     *command++ = vtxOutMask;
790                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_REG_MASK );
791 
792                     // 0x251 Vertex shader output count
793                     *command++ = vtxOutNum - 1;
794                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_REG_NUM2 );
795 
796                     // 0x28b Assumes that the vertex shader output is made to match the geometry shader input
797                     *command++ = 0x76543210;
798                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_GS_ATTR_IN_REG_MAP0 ); // 0x28b
799 
800                     // 0x28c
801                     *command++ = 0xfedcba98;
802                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_GS_ATTR_IN_REG_MAP1 );
803 
804                     // 0x254
805                     if ( ( gsDataMode == 1 ) &&
806                          ( m_ExeImageInfo[ geo_shader_index ]->gsVertexNum != 0 ) )
807                     {
808                         *command++ = m_ExeImageInfo[ geo_shader_index ]->gsVertexNum - 1;
809                         *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_GS_MISC_REG1, 0x1 );
810                     }
811 
812                     // Value for 0x252
813                     if ( gsDataMode == 2 )
814                     {
815                         gsDataMode |= 0x01                                                       << 24;
816                         gsDataMode |= ( m_ExeImageInfo[ geo_shader_index ]->gsVertexStartIndex ) << 16;
817                         gsDataMode |= ( vtxOutNum - 1 )                                          << 12;
818                         gsDataMode |= ( m_ExeImageInfo[ geo_shader_index ]->gsVertexNum - 1 )    <<  8;
819                     }
820 
821                     // 0x252 Data mode specification
822                     *command++ = gsDataMode;
823                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_GS_MISC_REG0 );
824 
825                     // 0x24a
826                     *command++ = vtxOutNum - 1;
827                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_REG_NUM1 );
828                 }
829                 else
830                 {
831                     // 0x229 [31:31]
832                     *command++ = 0x0;
833                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_DRAW_MODE0, 0x8 );
834 
835                     // 0x253 [0:0]
836                     *command++ = 0x0;
837                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_DRAW_MODE1, 0x1 );
838 
839                     // 0x289 [31:24], [15:8], [3:0] Vertex shader mode setting
840                     *command++ = 0xa0000000;
841                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_GS_ATTR_NUM, 0xb );
842 
843                     // 0x2ba Vertex shader main label address
844                     *command++ = 0x7fff0000 | m_ExeImageInfo[ vtx_shader_index ]->mainAddress;
845                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_START_ADDR );
846 
847                     // 0x2bd Vertex shader output register mask
848                     *command++ = outMask;
849                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_REG_MASK );
850 
851                     // 0x251
852                     *command++ = outNum - 1;
853                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_REG_NUM2 );
854 
855                     // 0x252
856                     *command++ = 0;
857                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_GS_MISC_REG0 );
858 
859                     // 0x24a
860                     *command++ = outNum - 1;
861                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_REG_NUM1 );
862                 }
863 
864                 {
865                     // 0x25e [3:0]
866                     *command++ = outNum - 1;
867                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_VS_OUT_REG_NUM3, 0x1 );
868 
869                     // 0x04f
870                     *command++ = outNum;
871                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_REG_NUM0 );
872 
873                     // Output attribute command
874                     outNum = 0;
875                     for ( s32 index = 0; index < OUT_ATTR_INDEX_MAX; ++index )
876                     {
877                         if ( attr[ index ] != 0x1f1f1f1f )
878                         {
879                             *command++ = attr[ index ];
880                             *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_ATTR0 + outNum );
881                             ++outNum;
882                         }
883                     }
884                     for ( s32 index = outNum; index < OUT_ATTR_INDEX_MAX; ++index )
885                     {
886                         *command++ = attr[ index ];
887                         *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_ATTR0 + index );
888                     }
889                 }
890 
891                 // 0x064 Sets whether to use texture coordinates
892                 *command++ = useTex;
893                 *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_ATTR_MODE );
894 
895                 // 0x06f Sets output attribute clock control
896                 *command++ = clock;
897                 *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_ATTR_CLK );
898 
899                 if ( is_geometry_shader )
900                 {
901                     // 0x25e
902                     *command++ = 0;
903                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_VS_OUT_REG_NUM3, 8 );
904                 }
905 
906                 return command;
907             }
908 
909             //------------------------------------------------------------------------
910 
MakeLoadCommand_(bit32 * command,const bit32 load_reg,const bit32 * src_buffer_ptr,const u32 src_data_num) const911             bit32* ShaderLite::MakeLoadCommand_( bit32* command,
912                                              const bit32  load_reg,
913                                              const bit32* src_buffer_ptr,
914                                              const u32  src_data_num ) const
915             {
916                 const s32 WRITE_MAX = 128;
917 
918                 u32 rest = src_data_num;
919 
920                 while ( true )
921                 {
922                     if ( rest <= WRITE_MAX )
923                     {
924                         *command++ = *src_buffer_ptr++;
925                         *command++ = PICA_CMD_HEADER_BURST( load_reg, rest );
926                         std::memcpy( command, src_buffer_ptr, ( rest - 1 ) * sizeof( bit32 ) );
927                         command += rest - 1;
928 
929                         if ( ( rest & 1 ) == 0 ) *command++ = PADDING_DATA; // Padding
930                         break;
931                     }
932                     else
933                     {
934                         *command++ = *src_buffer_ptr++;
935                         *command++ = PICA_CMD_HEADER_BURST( load_reg, WRITE_MAX );
936                         std::memcpy( command, src_buffer_ptr, ( WRITE_MAX - 1 ) * sizeof( bit32 ) );
937 
938                         command += WRITE_MAX - 1;
939                         src_buffer_ptr += WRITE_MAX - 1;
940 
941                         rest -= WRITE_MAX;
942                         if ( ( WRITE_MAX & 1 ) == 0 ) *command++ = PADDING_DATA; // Padding
943                     }
944                 }
945 
946                 return command;
947             }
948 
949             //------------------------------------------------------------------------
950 
MakeDummyCommand_(bit32 * command,const bit32 load_reg,const u32 dataNum)951             bit32* ShaderLite::MakeDummyCommand_( bit32* command,
952                                               const bit32 load_reg,
953                                               const u32 dataNum )
954             {
955                 *command++ = 0;
956                 *command++ = PICA_CMD_HEADER_BURST_BE( load_reg, dataNum, 0 );
957 
958                 for ( s32 i = 0; i < dataNum - ( dataNum & 1 ); ++i)
959                 {
960                     *command++ = PADDING_DATA;
961                 }
962 
963                 return command;
964             }
965 
966             //------------------------------------------------------------------------
967 
SearchBindSymbol(BindSymbol * symbol,const char * name) const968             bool ShaderLite::SearchBindSymbol( BindSymbol* symbol,
969                                            const char* name ) const
970             {
971                 const s32 shader_index = ( symbol->shaderType == BindSymbol::SHADER_TYPE_GEOMETRY )
972                     ? GetGeoShaderIndex() : GetVtxShaderIndex();
973 
974                 NN_GR_ASSERT(  0 <= shader_index && shader_index < m_ExeImageInfoNum );
975 
976                 // Program information
977                 const ExeImageInfo* exe_info = m_ExeImageInfo[ shader_index ];
978 
979                 // Symbol information
980                 struct BindSymbolInfo { u32 nameIndex; u32 regIndex; };
981                 const BindSymbolInfo* bind_symbol_info =
982                     reinterpret_cast< const BindSymbolInfo* >(
983                         reinterpret_cast< const u8* >( exe_info ) + exe_info->bindSymbolOffset );
984 
985                 // String information
986                 const char* string =
987                     reinterpret_cast< const char* >(
988                         reinterpret_cast< const u8* >( exe_info ) + exe_info->stringOffset );
989 
990                 // Search
991                 u32 namelen = std::strlen( name );
992                 for ( s32 i = 0; i < exe_info->bindSymbolCount; ++i )
993                 {
994                     const BindSymbolInfo& info = bind_symbol_info[ i ];
995 
996                     if ( std::strncmp( name, &string[ info.nameIndex ], namelen ) != 0 ) continue;
997                     if ( string[ info.nameIndex + namelen ] != '\0' &&  string[ info.nameIndex + namelen ] != '.' ) continue;
998 
999                     symbol->name  = &string[ info.nameIndex ];
1000                     symbol->start = (info.regIndex & 0x0000ffff);
1001                     symbol->end   = (info.regIndex & 0xffff0000) >> 16;
1002 
1003                     if ( 136 <= symbol->start )
1004                     {
1005                         return false;
1006                     }
1007                     else if ( 120 <= symbol->start )
1008                     {
1009                         symbol->start -= 120;
1010                         symbol->end   -= 120;
1011 
1012                         return symbol->symbolType == BindSymbol::SYMBOL_TYPE_BOOL;
1013                     }
1014                     else if ( 112 <= symbol->start )
1015                     {
1016                         symbol->start -= 112;
1017                         symbol->end   -= 112;
1018 
1019                         return symbol->symbolType == BindSymbol::SYMBOL_TYPE_INTEGER;
1020                     }
1021                     else if ( 16 <= symbol->start )
1022                     {
1023                         symbol->start -= 16;
1024                         symbol->end   -= 16;
1025 
1026                         return symbol->symbolType == BindSymbol::SYMBOL_TYPE_FLOAT;
1027                     }
1028                     else
1029                     {
1030                         return symbol->symbolType == BindSymbol::SYMBOL_TYPE_INPUT;
1031                     }
1032                 }
1033 
1034                 return false;
1035             }
1036 
1037             //------------------------------------------------------------------------
1038 
SearchBindSymbolNum(const BindSymbol::ShaderType shader_type,const BindSymbol::SymbolType symbol_type) const1039             u32 ShaderLite::SearchBindSymbolNum(
1040                 const BindSymbol::ShaderType shader_type,
1041                 const BindSymbol::SymbolType symbol_type ) const
1042             {
1043                 const s32 shader_index = ( shader_type == BindSymbol::SHADER_TYPE_GEOMETRY ) ? GetGeoShaderIndex() : GetVtxShaderIndex();
1044                 NN_GR_ASSERT(  0 <= shader_index && shader_index < m_ExeImageInfoNum );
1045 
1046                 // Program information
1047                 const ExeImageInfo* exe_info = m_ExeImageInfo[ shader_index ];
1048 
1049                 // Output attribute information
1050                 struct BindSymbolInfo { u32 nameIndex; u32 regIndex; };
1051                 const BindSymbolInfo* bind_symbol_info =
1052                     reinterpret_cast< const BindSymbolInfo* >(
1053                         reinterpret_cast< const u8* >( exe_info ) + exe_info->bindSymbolOffset );
1054 
1055                 s32 num = 0;
1056 
1057                 // Search
1058                 for ( s32 i = 0; i < exe_info->bindSymbolCount; ++i )
1059                 {
1060                     s32 regStart = bind_symbol_info[ i ].regIndex & 0x0000ffff;
1061 
1062                     if ( 120 <= regStart && regStart < 136 &&
1063                          symbol_type == BindSymbol::SYMBOL_TYPE_BOOL )
1064                     {
1065                         ++num;
1066                     }
1067                     else if ( 112 <= regStart && regStart < 115 &&
1068                               symbol_type == BindSymbol::SYMBOL_TYPE_INTEGER )
1069                     {
1070                         ++num;
1071                     }
1072                     else if ( 16 <= regStart && regStart < 111 &&
1073                               symbol_type == BindSymbol::SYMBOL_TYPE_FLOAT )
1074                     {
1075                         ++num;
1076                     }
1077                     else if ( regStart <  15 &&
1078                               symbol_type == BindSymbol::SYMBOL_TYPE_INPUT )
1079                     {
1080                         ++num;
1081                     }
1082                 }
1083 
1084                 return num;
1085             }
1086 
1087             //------------------------------------------------------------------------
1088 
SearchBindSymbol(BindSymbol * symbol,const u8 symbol_index) const1089             bool ShaderLite::SearchBindSymbol(
1090                 BindSymbol* symbol,
1091                 const u8 symbol_index ) const
1092             {
1093                 const BindSymbol::ShaderType shaderType = symbol->shaderType;
1094 
1095                 const s32 shader_index = ( shaderType == BindSymbol::SHADER_TYPE_GEOMETRY )
1096                     ? GetGeoShaderIndex() : GetVtxShaderIndex();
1097 
1098                 NN_GR_ASSERT(  0 <= shader_index && shader_index < m_ExeImageInfoNum );
1099 
1100                 // Program information
1101                 const ExeImageInfo* exe_info = m_ExeImageInfo[ shader_index ];
1102 
1103                 // Symbol information
1104                 struct BindSymbolInfo { u32 nameIndex; u32 regIndex; };
1105                 const BindSymbolInfo* bind_symbol_info =
1106                     reinterpret_cast< const BindSymbolInfo* >(
1107                         reinterpret_cast< const u8* >( exe_info ) + exe_info->bindSymbolOffset );
1108 
1109                 // String information
1110                 const char* string =
1111                     reinterpret_cast< const char* >(
1112                         reinterpret_cast< const u8* >( exe_info ) + exe_info->stringOffset );
1113 
1114                 s32 index = -1;
1115 
1116                 // Search
1117                 for ( s32 i = 0; i < exe_info->bindSymbolCount; ++i )
1118                 {
1119                     s32 regStart = bind_symbol_info[ i ].regIndex & 0x0000ffff;
1120 
1121                     // Bool registers
1122                     if ( 120 <= regStart && regStart < 136 )
1123                     {
1124                         ++index;
1125                     }
1126                     // Integer registers
1127                     else if ( 112 <= regStart && regStart < 115 )
1128                     {
1129                         ++index;
1130                     }
1131                     // Constant registers
1132                     else if ( 16 <= regStart && regStart < 111 )
1133                     {
1134                         ++index;
1135                     }
1136                     // Input registers
1137                     else if ( regStart <  15 )
1138                     {
1139                         ++index;
1140                     }
1141                     else
1142                     {
1143                         return false;
1144                     }
1145 
1146                     if ( index == symbol_index )
1147                     {
1148                         const BindSymbolInfo& info = bind_symbol_info[ i ];
1149 
1150                         symbol->name  = &string[ info.nameIndex ];
1151                         symbol->start = (info.regIndex & 0x0000ffff);
1152                         symbol->end   = (info.regIndex & 0xffff0000) >> 16;
1153 
1154                         if ( 120 <= symbol->start )
1155                         {
1156                             symbol->start -= 120;
1157                             symbol->end   -= 120;
1158 
1159                             return symbol->symbolType == BindSymbol::SYMBOL_TYPE_BOOL;
1160                         }
1161                         else if ( 112 <= symbol->start )
1162                         {
1163                             symbol->start -= 112;
1164                             symbol->end -= 112;
1165 
1166                             return symbol->symbolType == BindSymbol::SYMBOL_TYPE_INTEGER;
1167                         }
1168                         else if (  16 <= symbol->start )
1169                         {
1170                             symbol->start -=  16;
1171                             symbol->end -=  16;
1172 
1173                             return symbol->symbolType == BindSymbol::SYMBOL_TYPE_FLOAT;
1174                         }
1175                         else
1176                         {
1177                             return symbol->symbolType == BindSymbol::SYMBOL_TYPE_INPUT;
1178                         }
1179                     }
1180                 }
1181 
1182                 return false;
1183             }
1184 
1185             //------------------------------------------------------------------------
1186 
MakeRgCmdOffsetCache_(void)1187             void ShaderLite::MakeRgCmdOffsetCache_(void)
1188             {
1189                 // Offset value calculation for constant registers
1190                 m_ConstRgCmdOffsetArray[0] = 0;
1191                 for(int i = 1; i < m_ExeImageInfoNum; i++ )
1192                 {
1193                     m_ConstRgCmdOffsetArray[i] = m_CmdCacheConstNumArray[i - 1] + m_ConstRgCmdOffsetArray[i - 1];
1194                 }
1195 
1196                 // Calculates index array required in the output attribute command cache
1197                 // Based on this array, the index when combining the vertex shader and geometry shader is calculated
1198 
1199                 // When m_GeoShaderIndex = -1
1200                 m_CmdCacheOutAttrIndexArray[0] = 0;
1201 
1202                 u8 vtx_index = 0;
1203                 u8 geo_index = 1;
1204                 for(u32 idx = 1; idx < m_ExeImageInfoNum + 1; idx++)
1205                 {
1206                     if(!m_ExeImageInfo[idx - 1]->isGeoShader)
1207                     {
1208                         m_CmdCacheOutAttrIndexArray[idx] = vtx_index;
1209                         vtx_index += (m_GeometryShaderNum + 1);
1210                     }
1211                     else
1212                     {
1213                         m_CmdCacheOutAttrIndexArray[idx] = geo_index;
1214                         geo_index++;
1215                     }
1216                 }
1217 
1218                 // Calculates the offset value required in the output attribute command cache
1219                 u32 combination_num = (m_ExeImageInfoNum - m_GeometryShaderNum) * (m_GeometryShaderNum + 1);
1220 
1221                 m_CmdCacheOutAttrOffsetArray[0] = 0;
1222                 for(u32 i = 1; i < combination_num; i++)
1223                 {
1224                     m_CmdCacheOutAttrOffsetArray[i] = m_CmdCacheOutAttrNumArray[i - 1] + m_CmdCacheOutAttrOffsetArray[i - 1];
1225                 }
1226             }
1227 
1228             //------------------------------------------------------------------------
CalculateBufferSize_() const1229             u32 ShaderLite::CalculateBufferSize_() const
1230             {
1231                 u32 size = 0;
1232 
1233                 // m_Swizzle
1234                 size += m_SwizzleCount * sizeof(bit32);
1235 
1236                 // m_CmdCacheConstNumArray
1237                 size += m_ExeImageInfoNum * sizeof(u32);
1238 
1239                 // m_CmdCacheConstArray
1240                 int num = 0;
1241                 for ( u8 i = 0; i < m_ExeImageInfoNum; i++ )
1242                 {
1243                     // Program information
1244                     NN_GR_ASSERT( ( 0 <= i ) &&
1245                                   ( i < m_ExeImageInfoNum ) );
1246                     const ExeImageInfo* exe_info = m_ExeImageInfo[ i ];
1247 
1248                     // Constant register information
1249                     struct SetupInfo
1250                     {
1251                         u16 type;
1252                         u16 index;
1253                         bit32 value[4];
1254                     };
1255 
1256                     const SetupInfo* setupInfo =
1257                         reinterpret_cast< const SetupInfo* >(
1258                             reinterpret_cast< const u8* >( exe_info ) + exe_info->setupOffset );
1259 
1260                     // Constant register command count
1261                     for ( u32 j = 0; j < exe_info->setupCount; ++j )
1262                     {
1263                         const SetupInfo& info = setupInfo[ j ];
1264 
1265                         switch ( info.type )
1266                         {
1267                         case 0 : // bool register setup
1268                             break;
1269 
1270                         case 1 : // integer register setup
1271                             num += 2;
1272                             break;
1273 
1274                         case 2 : // float register setup
1275                             num += 6;
1276                             break;
1277                         }
1278                     }
1279                 }
1280 
1281                 size += num * sizeof(bit32);
1282 
1283                 // Vertex shader and geometry shader combination count
1284                 u32 num_geo = 0;
1285                 for(u8 i = 0; i < m_ExeImageInfoNum; i++)
1286                 {
1287                     num_geo += m_ExeImageInfo[i]->isGeoShader;
1288                 }
1289                 u32 num_combination = (m_ExeImageInfoNum - num_geo) * (num_geo + 1);
1290 
1291                 // m_CmdCacheOutAttrNumArray
1292                 size += num_combination * sizeof(u32);
1293 
1294                 // m_CmdCacheOutAttrArray
1295                 for(u8 i = 0; i < m_ExeImageInfoNum; i++)
1296                 {
1297                     if( !m_ExeImageInfo[i]->isGeoShader )
1298                     {
1299                         // Operation on the vertex shader alone
1300                         size += 38 * sizeof(u32);
1301                         // Combined with the geometry shader
1302                         for(int j = 0; j < m_ExeImageInfoNum; j++)
1303                         {
1304                             if( m_ExeImageInfo[j]->isGeoShader )
1305                             {
1306                                 size += (m_ExeImageInfo[j]->gsDataMode == 1) ? 50 * sizeof(u32): 48 * sizeof(u32);
1307                             }
1308                         }
1309                     }
1310                 }
1311 
1312                 // m_ConstRgCmdOffsetArray
1313                 size += m_ExeImageInfoNum * sizeof(u32);
1314 
1315                 // m_CmdCacheOutAttrIndexArray
1316                 size += (m_ExeImageInfoNum + 1) * sizeof(u32);
1317 
1318                 // m_CmdCacheOutAttrOffsetArray
1319                 size += num_combination * sizeof(u32);
1320 
1321                 return size;
1322             }
1323 
AssignBuffer_(bit32 * buf)1324             void ShaderLite::AssignBuffer_(bit32* buf)
1325             {
1326                 // m_Swizzle
1327                 m_Swizzle = buf;
1328                 buf += m_SwizzleCount;
1329 
1330                 // m_CmdCacheConstNumArray
1331                 m_CmdCacheConstNumArray = buf;
1332                 buf += m_ExeImageInfoNum;
1333 
1334                 // m_CmdCacheConstArray
1335                 m_CmdCacheConstArray = buf;
1336                 int num = 0;
1337                 for ( u8 i = 0; i < m_ExeImageInfoNum; i++ )
1338                 {
1339                     // Program information
1340                     NN_GR_ASSERT( ( 0 <= i ) &&
1341                                   ( i < m_ExeImageInfoNum ) );
1342                     const ExeImageInfo* exe_info = m_ExeImageInfo[ i ];
1343 
1344                     // Constant register information
1345                     struct SetupInfo
1346                     {
1347                         u16 type;
1348                         u16 index;
1349                         bit32 value[4];
1350                     };
1351 
1352                     const SetupInfo* setupInfo =
1353                         reinterpret_cast< const SetupInfo* >(
1354                             reinterpret_cast< const u8* >( exe_info ) + exe_info->setupOffset );
1355 
1356                     // Constant register command count
1357                     for ( u32 j = 0; j < exe_info->setupCount; ++j )
1358                     {
1359                         const SetupInfo& info = setupInfo[ j ];
1360 
1361                         switch ( info.type )
1362                         {
1363                         case 0 : // bool register setup
1364                             break;
1365 
1366                         case 1 : // integer register setup
1367                             num += 2;
1368                             break;
1369 
1370                         case 2 : // float register setup
1371                             num += 6;
1372                             break;
1373                         }
1374                     }
1375 
1376                     m_CmdCacheConstNumArray[i] = num;
1377                     buf += num;
1378                     num = 0;
1379                 }
1380 
1381                 // Vertex shader and geometry shader combination count
1382                 m_GeometryShaderNum = 0;
1383                 for(u8 i = 0; i < m_ExeImageInfoNum; i++)
1384                 {
1385                     m_GeometryShaderNum += m_ExeImageInfo[i]->isGeoShader;
1386                 }
1387                 u32 combination_num = (m_ExeImageInfoNum - m_GeometryShaderNum) * (m_GeometryShaderNum + 1);
1388 
1389                 // m_CmdCacheOutAttrNumArray
1390                 m_CmdCacheOutAttrNumArray = buf;
1391                 u8 idx = 0;
1392                 for(u8 i = 0; i < m_ExeImageInfoNum; i++)
1393                 {
1394                     if( !m_ExeImageInfo[i]->isGeoShader )
1395                     {
1396                         // Considering operating on the vertex shader alone
1397                         m_CmdCacheOutAttrNumArray[idx] = 38;
1398                         idx++;
1399                         // Considering combining with all the geometry shaders
1400                         for(int j = 0; j < m_ExeImageInfoNum; j++)
1401                         {
1402                             if( m_ExeImageInfo[j]->isGeoShader )
1403                             {
1404                                 m_CmdCacheOutAttrNumArray[idx] = (m_ExeImageInfo[j]->gsDataMode == 1) ? 50 : 48;
1405                                 idx++;
1406                             }
1407                         }
1408                     }
1409                 }
1410                 buf += combination_num;
1411 
1412                 // m_CmdCacheOutAttrArray
1413                 m_CmdCacheOutAttrArray = buf;
1414                 for(u8 i = 0; i < combination_num; i++)
1415                 {
1416                     buf += m_CmdCacheOutAttrNumArray[i];
1417                 }
1418 
1419                 // m_ConstRgCmdOffsetArray
1420                 m_ConstRgCmdOffsetArray = buf;
1421                 buf += m_ExeImageInfoNum;
1422 
1423                 // m_CmdCacheOutAttrIndexArray
1424                 m_CmdCacheOutAttrIndexArray = buf;
1425                 buf += m_ExeImageInfoNum + 1;
1426 
1427                 // m_CmdCacheOutAttrOffsetArray
1428                 m_CmdCacheOutAttrOffsetArray = buf;
1429 
1430             }
1431 
1432         } //namespace CTR
1433     } //namespace gr
1434 } //namespace nn
1435