1 /*---------------------------------------------------------------------------*
2   Project:  Horizon
3   File:     gr_Shader.cpp
4 
5   Copyright (C)2009-2012 Nintendo Co., Ltd.  All rights reserved.
6 
7   These coded instructions, statements, and computer programs contain
8   proprietary information of Nintendo of America Inc. and/or Nintendo
9   Company Ltd., and are protected by Federal copyright law.  They may
10   not be disclosed to third parties or copied or duplicated in any form,
11   in whole or in part, without the prior written consent of Nintendo.
12 
13   $Rev: 49968 $
14  *---------------------------------------------------------------------------*/
15 
16 #include <nn/gr/CTR/gr_Shader.h>
17 
18 namespace nn
19 {
20     namespace gr
21     {
22         namespace CTR
23         {
Shader(void)24             Shader::Shader( void ) :
25             m_VtxShaderIndex( 0 ),
26             m_GeoShaderIndex( - 1 ),
27             m_ExeImageInfoNum( 0 ),
28             m_InstructionCount( 0 ),
29             m_SwizzleCount( 0 ),
30             m_DrawMode( PICA_DATA_DRAW_TRIANGLES ),
31             m_VtxShaderBoolMapUniform( 0 ),
32             m_GeoShaderBoolMapUniform( 0 ),
33             m_CmdCacheOutAttrNum( 0 )
34             {
35                 for ( s32 shader_index = 0; shader_index < EXE_IMAGE_MAX; shader_index++ )
36                 {
37                     m_CmdCacheConstNumArray[ shader_index ] = 0;
38                 }
39             }
40 
SetupBinary(const void * shader_binary,const s32 vtx_shader_index,const s32 geo_shader_index)41             void Shader::SetupBinary( const void* shader_binary, const s32 vtx_shader_index, const s32 geo_shader_index )
42             {
43                 const bit32* binary = reinterpret_cast< const bit32* >( shader_binary );
44                 NN_GR_ASSERT( binary != NULL );
45 
46                 NN_GR_ASSERT( *binary == 0x424C5644 ); // DVLB
47                 ++binary;
48 
49                 NN_GR_ASSERT( *binary < EXE_IMAGE_MAX );
50 
51                 // Number of execution images in the shader binary.
52                 m_ExeImageInfoNum = *binary;
53                 ++binary;
54 
55                 m_VtxShaderBoolMapUniform = 0;
56                 m_GeoShaderBoolMapUniform = 0;
57 
58                 // Saves the pointer to the execution images in the shader binary
59                 for ( s32 i = 0; i < m_ExeImageInfoNum; ++i )
60                 {
61                     m_ExeImageInfo[ i ] =
62                         reinterpret_cast< const ExeImageInfo* >( (u8*)shader_binary + *binary );
63                     NN_GR_ASSERT( m_ExeImageInfo[ i ]->signature == 0x454c5644 ); // DVLP
64                     ++binary;
65                 }
66 
67                 const bit32* package_info = binary;
68                 NN_GR_ASSERT( *binary == 0x504C5644 ); // DVLP
69                 ++binary;
70                 ++binary;
71 
72                 m_Instruction = reinterpret_cast< const bit32* >( (u8*)package_info + *binary );
73                 ++binary;
74 
75                 m_InstructionCount = *binary;
76                 ++binary;
77 
78                 const bit32* swizzle = reinterpret_cast< const bit32* >( (u8*)package_info + *binary );
79                 ++binary;
80 
81                 m_SwizzleCount = *binary;
82                 NN_GR_ASSERT( m_SwizzleCount < SWIZZLE_PATTERN_MAX );
83                 ++binary;
84 
85                 for ( s32 i = 0; i < m_SwizzleCount; i++ )
86                 {
87                     m_Swizzle[ i ] = swizzle[ i * 2 ] ;
88                 }
89 
90                 PicaDataDrawMode drawMode     = m_DrawMode;
91 
92                 MakeShaderConstCommandCache_();
93                 SetShaderIndex( vtx_shader_index, geo_shader_index );
94 
95                 if ( ! IsEnableGeoShader() )
96                 {
97                     m_DrawMode                = drawMode;
98                 }
99             }
100 
101             //------------------------------------------------------------------------
102 
SetShaderIndex(const s32 vtx_shader_index,const s32 geo_shader_index)103             void Shader::SetShaderIndex( const s32 vtx_shader_index, const s32 geo_shader_index )
104             {
105                 // Test for the vertex shade and geometry shader
106                 CheckVtxShaderIndex_( vtx_shader_index );
107                 // Geometry shader test
108                 CheckGeoShaderIndex_( geo_shader_index );
109 
110                 m_VtxShaderIndex = vtx_shader_index;
111                 m_GeoShaderIndex = geo_shader_index;
112 
113                 if ( IsEnableGeoShader() )
114                 {
115                     m_DrawMode = PICA_DATA_DRAW_GEOMETRY_PRIMITIVE;
116                 }
117 
118                 MakeShaderOutAttrCommandCache_();
119             }
120 
121             //------------------------------------------------------------------------
122 
MakeShaderConstCommandCache_(void)123             void Shader::MakeShaderConstCommandCache_( void )
124             {
125                 for ( s32 shader_index = 0; shader_index < m_ExeImageInfoNum; shader_index++ )
126                 {
127                     // Generates command cache for the constant register.
128                     m_CmdCacheConstNumArray[ shader_index ] =
129                         MakeConstRgCommand_( m_CmdCacheConstArray[ shader_index ],
130                                              shader_index ) -
131                                              m_CmdCacheConstArray[ shader_index ];
132 
133                     NN_GR_ASSERT( m_CmdCacheConstNumArray[ shader_index ] <= CONST_REG_COMMAND_MAX );
134                 }
135             }
136 
137             //------------------------------------------------------------------------
138 
MakeShaderOutAttrCommandCache_(void)139             void Shader::MakeShaderOutAttrCommandCache_( void )
140             {
141                 // Generates command cache for output attributes.
142                 m_CmdCacheOutAttrNum =
143                     MakeOutAttrCommand_( m_CmdCacheOutAttrArray,
144                                          m_VtxShaderIndex,
145                                          m_GeoShaderIndex )
146                     - m_CmdCacheOutAttrArray;
147             }
148 
149             //------------------------------------------------------------------------
150 
MakeFullCommand(bit32 * command) const151             bit32* Shader::MakeFullCommand( bit32* command ) const
152             {
153                 // Generates configuration commands using the geometry shader
154                 {
155                     command = MakePrepareCommand( command );
156                 }
157 
158                 // Generates geometry shader commands
159                 if ( IsEnableGeoShader() )
160                 {
161                     command = MakeGeoProgramCommand( command );
162                     command = MakeGeoSwizzleCommand( command );
163                     command = MakeGeoConstRgCommand( command );
164                     command = MakeGeoBoolMapCommand( command );
165                 }
166 
167                 // Generates vertex shader commands
168                 {
169                     command = MakeVtxProgramCommand( command );
170                     command = MakeVtxSwizzleCommand( command );
171                     command = MakeVtxConstRgCommand( command );
172                     command = MakeVtxBoolMapCommand( command );
173                 }
174 
175                 // Generates output attribute (outmap) related commands
176                 {
177                     command = MakeOutAttrCommand( command );
178                 }
179 
180                 return command;
181             }
182 
183             //------------------------------------------------------------------------
184 
MakeDisableCommand(bit32 * command)185             bit32* Shader::MakeDisableCommand( bit32* command )
186             {
187                 const bool isEnableGeometryShader = false;
188                 const PicaDataDrawMode drawMode   = PICA_DATA_DRAW_TRIANGLES;
189 
190                 command = MakeShaderModeCommand_( command,
191                                                   isEnableGeometryShader,
192                                                   drawMode );
193 
194                 return command;
195             }
196 
197             //------------------------------------------------------------------------
198 
MakeShaderCommand(bit32 * command,const bool isMakePrepareCommand) const199             bit32* Shader::MakeShaderCommand( bit32* command, const bool isMakePrepareCommand ) const
200             {
201                 // Generates configuration commands using the geometry shader
202                 if ( isMakePrepareCommand )
203                 {
204                     command = MakePrepareCommand( command );
205                 }
206 
207                 // Generates commands for the geometry shader constant register
208                 if ( IsEnableGeoShader() )
209                 {
210                     command = MakeGeoConstRgCommand( command );
211                     command = MakeGeoBoolMapCommand( command );
212                 }
213 
214                 // Generates commands for the vertex shader constant register
215                 {
216                     command = MakeVtxConstRgCommand( command );
217                     command = MakeVtxBoolMapCommand( command );
218                 }
219 
220                 // Generates output attribute (outmap) related commands
221                 {
222                     command = MakeOutAttrCommand( command );
223                 }
224 
225                 return command;
226             }
227 
228             //------------------------------------------------------------------------
229 
MakePrepareCommand(bit32 * command) const230             bit32* Shader::MakePrepareCommand( bit32* command ) const
231             {
232                 bool isEnableGeoShader    = IsEnableGeoShader();
233                 PicaDataDrawMode drawMode = m_DrawMode;
234 
235                 command =
236                     MakeShaderModeCommand_( command,
237                                             isEnableGeoShader,
238                                             drawMode );
239 
240                 return command;
241             }
242 
243             //------------------------------------------------------------------------
244 
MakeVtxProgramCommand(bit32 * command) const245             bit32* Shader::MakeVtxProgramCommand( bit32* command ) const
246             {
247                 s32 shader_index   = GetVtxShaderIndex();
248                 bit32 reg_addr     = PICA_REG_VS_PROG_ADDR;        // 0x2cb
249                 bit32 reg_load     = PICA_REG_VS_PROG_DATA0;       // 0x2cc
250                 bit32 reg_end      = PICA_REG_VS_PROG_UPDATE_END;  // 0x2bf
251 
252                 { // Sets the program code load address
253                     *command++ = 0;
254                     *command++ = PICA_CMD_HEADER_SINGLE( reg_addr );
255                 }
256 
257                 { // Loads program code
258                     NN_GR_ASSERT( 0 <= shader_index && shader_index < m_ExeImageInfoNum );
259 
260                     const ExeImageInfo* exe_info = m_ExeImageInfo[ shader_index ];
261 
262                     NN_UNUSED_VAR( exe_info );
263 
264                     u32 instructionCount = m_InstructionCount;
265                     if ( instructionCount > 512 )
266                     {
267                         instructionCount = 512;
268                     }
269 
270                     command = MakeLoadCommand_( command, reg_load,
271                                                 m_Instruction,
272                                                 m_InstructionCount < 512 ? m_InstructionCount : 512 );
273                 }
274 
275                 { // Completes notification program updates
276                     *command++ = 1;
277                     *command++ = PICA_CMD_HEADER_SINGLE( reg_end );
278                 }
279 
280                 return command;
281             }
282 
283             //------------------------------------------------------------------------
284 
MakeGeoProgramCommand(bit32 * command) const285             bit32* Shader::MakeGeoProgramCommand( bit32* command ) const
286             {
287                 s32 shader_index   = GetGeoShaderIndex();
288                 bit32 reg_addr     = PICA_REG_GS_PROG_ADDR;        // 0x29b
289                 bit32 reg_load     = PICA_REG_GS_PROG_DATA0;       // 0x29c
290                 bit32 reg_end      = PICA_REG_GS_PROG_UPDATE_END;  // 0x28f
291 
292                 { // Sets the program code load address
293                     *command++ = 0;
294                     *command++ = PICA_CMD_HEADER_SINGLE( reg_addr );
295                 }
296 
297                 { // Loads program code
298                     NN_GR_ASSERT( ( 0 <= shader_index ) &&
299                                   ( shader_index < m_ExeImageInfoNum ) );
300 
301                     const ExeImageInfo* exe_info = m_ExeImageInfo[ shader_index ];
302 
303                     NN_UNUSED_VAR( exe_info );
304 
305                     command = MakeLoadCommand_( command, reg_load,
306                                                 m_Instruction,
307                                                 m_InstructionCount );
308                 }
309 
310                 { // Completes notification program updates
311                     *command++ = 1;
312                     *command++ = PICA_CMD_HEADER_SINGLE( reg_end );
313                 }
314 
315                 return command;
316             }
317 
318             //------------------------------------------------------------------------
319 
MakeShaderModeCommand_(bit32 * command,const bool isEnableGeoShader,const PicaDataDrawMode drawMode)320             bit32* Shader::MakeShaderModeCommand_( bit32* command,
321                                                    const bool isEnableGeoShader,
322                                                    const PicaDataDrawMode drawMode )
323             {
324                 // Sets to 0x25e[9:8]
325                 {
326                     if ( isEnableGeoShader )
327                     {
328                         *command++ = PICA_DATA_DRAW_GEOMETRY_PRIMITIVE << 8;
329                     }
330                     else
331                     {
332                         *command++ = drawMode << 8;
333                     }
334                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_VS_OUT_REG_NUM3, 2 );
335                 }
336 
337                 // Dummy command to 0x251
338                 {
339                     command = MakeDummyCommand_( command, PICA_REG_VS_OUT_REG_NUM2, DUMMY_DATA_NUM_251 );
340                 }
341 
342                 // Dummy command to 0x200
343                 {
344                     command = MakeDummyCommand_( command, PICA_REG_VERTEX_ATTR_ARRAYS_BASE_ADDR, DUMMY_DATA_NUM_200 );
345                 }
346 
347                 // 0x229[1:0] Sets whether to enable or disable the geometry shader
348                 {
349                     *command++ = isEnableGeoShader ? 2 : 0;
350                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_DRAW_MODE0, 1 );
351                 }
352 
353                 // Dummy command to 0x200
354                 {
355                     command = MakeDummyCommand_( command, PICA_REG_VERTEX_ATTR_ARRAYS_BASE_ADDR, DUMMY_DATA_NUM_200 );
356                 }
357 
358                 // 0x244 Sets whether to share geometry shader settings with the vertex shader
359                 {
360                     *command++ = isEnableGeoShader ? 1 : 0;
361                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_VS_COM_MODE, 1 );
362                 }
363 
364                 return command;
365             }
366 
367             //------------------------------------------------------------------------
368 
CheckVtxShaderIndex_(const s32 vtx_shader_index)369             void Shader::CheckVtxShaderIndex_( const s32 vtx_shader_index )
370             {
371                 NN_UNUSED_VAR( vtx_shader_index );
372 
373                 // Vertex shader index range test
374                 NN_GR_ASSERT( ( 0 <= vtx_shader_index ) && ( vtx_shader_index < GetShaderNum() ) );
375 
376                 // Shader binary compatibility test
377                 NN_GR_ASSERT( ! m_ExeImageInfo[ vtx_shader_index ]->isGeoShader );
378             }
379 
380             //------------------------------------------------------------------------
381 
CheckGeoShaderIndex_(const s32 geo_shader_index)382             void Shader::CheckGeoShaderIndex_( const s32 geo_shader_index )
383             {
384                 NN_UNUSED_VAR( geo_shader_index );
385 
386                 // Geometry shader index range test
387                 NN_GR_ASSERT( m_GeoShaderIndex < GetShaderNum() );
388 
389                 // Shader binary compatibility test
390                 if ( geo_shader_index > - 1 )
391                 {
392                     NN_GR_ASSERT( m_ExeImageInfo[ geo_shader_index ]->isGeoShader );
393                 }
394             }
395 
396             //------------------------------------------------------------------------
397 
MakeConstRgCommand_(bit32 * command,const s32 shader_index)398             bit32* Shader::MakeConstRgCommand_( bit32* command,
399                                                 const s32 shader_index )
400             {
401                 bit32  reg_float     = PICA_REG_VS_FLOAT_ADDR; // 0x2c0
402                 bit32  reg_integer   = PICA_REG_VS_INT0;       // 0x2b1
403                 bit32* boolMap       = &m_VtxShaderBoolMapUniform;
404 
405                 bool is_geometry_shader = m_ExeImageInfo[ shader_index ]->isGeoShader;
406                 if ( is_geometry_shader )
407                 {
408                     reg_float    = PICA_REG_GS_FLOAT_ADDR; // 0x290
409                     reg_integer  = PICA_REG_GS_INT0;       // 0x281
410                     boolMap      = &m_GeoShaderBoolMapUniform;
411                 }
412 
413                 // Program information
414                 NN_GR_ASSERT( ( 0 <= shader_index ) &&
415                               ( shader_index < m_ExeImageInfoNum ) );
416                 const ExeImageInfo* exe_info = m_ExeImageInfo[ shader_index ];
417 
418                 // Constant register information
419                 struct SetupInfo
420                 {
421                     u16 type;
422                     u16 index;
423                     bit32 value[4];
424                 };
425 
426                 const SetupInfo* setupInfo =
427                     reinterpret_cast< const SetupInfo* >(
428                         reinterpret_cast< const u8* >( exe_info ) + exe_info->setupOffset );
429 
430                 // Constant register command generation
431                 for ( s32 i = 0; i < exe_info->setupCount; ++i )
432                 {
433                     const SetupInfo& info = setupInfo[ i ];
434                     const bit32* value = info.value;
435 
436                     switch ( info.type )
437                     {
438                     case 0 : // bool register setup
439                         *boolMap |= ( info.value[ 0 ] << info.index ) & ( 1 << info.index );
440                         break;
441 
442                     case 1 : // integer register setup
443                         *command++ = value[ 0 ] | value[ 1 ] <<  8 | value[ 2 ] << 16 | value[ 3 ] << 24;
444                         *command++ = PICA_CMD_HEADER_SINGLE( reg_integer + info.index );
445                         break;
446 
447                     case 2 : // float register setup
448                         *command++ = info.index;
449                         *command++ = PICA_CMD_HEADER_BURSTSEQ( reg_float, 4 );
450                         *command++ = ( value[ 3 ] <<  8 & 0xffffff00 ) | ( value[ 2 ] >> 16 & 0x000000ff );
451                         *command++ = ( value[ 2 ] << 16 & 0xffff0000 ) | ( value[ 1 ] >>  8 & 0x0000ffff );
452                         *command++ = ( value[ 1 ] << 24 & 0xff000000 ) | ( value[ 0 ] >>  0 & 0x00ffffff );
453                         *command++ = PADDING_DATA; // Padding
454                         break;
455                     }
456                 }
457 
458                 return command;
459             }
460 
461             //------------------------------------------------------------------------
462 
MakeOutAttrCommand_(bit32 * command,const s32 vtx_shader_index,const s32 geo_shader_index)463             bit32* Shader::MakeOutAttrCommand_( bit32* command,
464                                                 const s32 vtx_shader_index,
465                                                 const s32 geo_shader_index )
466             {
467                 s32 shader_index = vtx_shader_index;
468 
469                 bool is_geometry_shader = false;
470                 if ( 0 <= GetGeoShaderIndex() )
471                 {
472                     is_geometry_shader = true;
473 
474                     shader_index = geo_shader_index;
475                 }
476 
477                 NN_GR_ASSERT(  0 <= shader_index && shader_index < m_ExeImageInfoNum );
478 
479                 // Output attribute settings
480                 // When the geometry shader is enabled, output attributes perform settings for the geometry shader
481                 //
482 
483                 const s32 OUT_ATTR_INDEX_MAX     = 7;
484                 const s32 OUT_ATTR_DIMENTION_MAX = 4;
485                 const s32 OUT_ATTR_BUFFER_MAX    = 16 * 4;
486                 const s32 VS_OUT_ATTR_INDEX_MAX  = 16;
487 
488                 // Output attribute information
489                 struct OutmapInfo
490                 {
491                     u16 type;
492                     u16 index;
493                     u16 mask;
494                     u16 reserve;
495                 };
496 
497                 u32 outNum  = 0;
498                 bit32 useTex  = 0;
499                 bit32 clock   = 0;
500                 bit32 outMask = 0;
501                 bit32 attr[ OUT_ATTR_INDEX_MAX ];
502 
503                 // Find output attribute settings
504                 {
505                     // Program information
506                     const ExeImageInfo* exe_info = m_ExeImageInfo[ shader_index ];
507 
508                     // Actual output attribute information
509                     OutmapInfo outmap_buffer[ OUT_ATTR_BUFFER_MAX ];
510                     s32 outMapBufferCount = 0;
511 
512                     // Merge
513                     if ( is_geometry_shader && exe_info->outputMaps )
514                     {
515                         bit32 gs_copy_mask = 0;
516                         bit32 vs_copy_mask = 0;
517                         // GS output attribute information
518                         const OutmapInfo* outmapInfo =
519                             reinterpret_cast< const OutmapInfo* >(
520                                 reinterpret_cast< const u8* >( exe_info ) + exe_info->outMapOffset );
521                         // VS program information
522                         NN_GR_ASSERT(  0 <= vtx_shader_index && vtx_shader_index < m_ExeImageInfoNum );
523                         const ExeImageInfo* vtx_exe_info = m_ExeImageInfo[ vtx_shader_index ];
524 
525                         // VS output attribute information
526                         const OutmapInfo* vtxOutmapInfo =
527                             reinterpret_cast< const OutmapInfo* >(
528                                 reinterpret_cast< const u8* >( vtx_exe_info ) + vtx_exe_info->outMapOffset );
529 
530                         // Merge output attributes, other than generic, defined by both VS and GS
531                         NN_GR_ASSERT( outMapBufferCount < OUT_ATTR_BUFFER_MAX );
532                         for( s32 g = 0; g < exe_info->outMapCount; ++g )
533                         {
534                             // Excludes GS generic attributes (value 9)
535                             if ( ( outmapInfo[ g ].type >= 0 ) &&
536                                  ( outmapInfo[ g ].type <  9 ) &&
537                                  ( outmapInfo[ g ].type != 7 ) )
538                             {
539                                 for( s32 v = 0; v < vtx_exe_info->outMapCount; ++v )
540                                 {
541                                     // Excludes VS generic attributes (value 9)
542                                     if ( ( vtxOutmapInfo[ v ].type >= 0 ) &&
543                                          ( vtxOutmapInfo[ g ].type <  9 ) &&
544                                          ( vtxOutmapInfo[ g ].type != 7 ) )
545                                     {
546                                         if ( outmapInfo[ g ].type == vtxOutmapInfo[ v ].type )
547                                         {
548                                             NN_GR_ASSERT( outMapBufferCount < OUT_ATTR_INDEX_MAX );
549                                             outmap_buffer[ outMapBufferCount ].type = outmapInfo[ g ].type;
550                                             outmap_buffer[ outMapBufferCount ].index = outMapBufferCount;
551                                             outmap_buffer[ outMapBufferCount ].mask = outmapInfo[ g ].mask;
552                                             gs_copy_mask |= 1 << g;
553                                             vs_copy_mask |= 1 << v;
554                                             ++outMapBufferCount;
555                                         }
556                                     }
557                                 }
558                             }
559                         }
560 
561                         // Next, sets output attributes defined only by GS
562                         for( s32 g = 0; g < exe_info->outMapCount; ++g )
563                         {
564                             if ( ( !( gs_copy_mask & ( 1 << g ) )) &&
565                                  ( outmapInfo[ g ].type >= 0  )    &&
566                                  ( outmapInfo[ g ].type < 9 )      &&
567                                  ( outmapInfo[ g ].type != 7 ) )
568                             {
569                                 NN_GR_ASSERT( outMapBufferCount < OUT_ATTR_BUFFER_MAX );
570                                 outmap_buffer[ outMapBufferCount ].type = outmapInfo[ g ].type;
571                                 outmap_buffer[ outMapBufferCount ].index = outMapBufferCount;
572                                 outmap_buffer[ outMapBufferCount ].mask = outmapInfo[ g ].mask;
573                                 ++outMapBufferCount;
574                             }
575                         }
576 
577                         // Last, sets output attributes defined only by VS
578                         for( s32 v = 0; v < vtx_exe_info->outMapCount; ++v )
579                         {
580                             if ( ( !( vs_copy_mask & ( 1 << v ) ) ) &&
581                                  ( vtxOutmapInfo[ v ].type >= 0 )   &&
582                                  ( vtxOutmapInfo[ v ].type < 9  )   &&
583                                  ( vtxOutmapInfo[ v ].type != 7 ) )
584                             {
585                                 NN_GR_ASSERT( outMapBufferCount < OUT_ATTR_BUFFER_MAX );
586                                 outmap_buffer[ outMapBufferCount ].type = vtxOutmapInfo[ v ].type;
587                                 outmap_buffer[ outMapBufferCount ].index = outMapBufferCount;
588                                 outmap_buffer[ outMapBufferCount ].mask = vtxOutmapInfo[ v ].mask;
589                                 ++outMapBufferCount;
590                             }
591                         }
592                     }
593                     else
594                     {
595                         // Output attribute information
596                         const OutmapInfo* outmapInfo =
597                             reinterpret_cast< const OutmapInfo* >(
598                                 reinterpret_cast< const u8* >( exe_info ) + exe_info->outMapOffset );
599                         // Copies when not merging.
600                         for( s32 i = 0; i < exe_info->outMapCount; ++i )
601                         {
602                             outmap_buffer[ i ] = outmapInfo[ i ];
603                         }
604                         outMapBufferCount = exe_info->outMapCount;
605                     }
606 
607                     for ( s32 index = 0; index < OUT_ATTR_INDEX_MAX; ++index )
608                     {
609                         attr[ index ] = 0x1f1f1f1f;
610                         for ( s32 i = 0; i <outMapBufferCount; ++i )
611                         {
612                             bit32 c = 0;
613                             for ( s32 j = 0; outmap_buffer[ i ].index == index && j < OUT_ATTR_DIMENTION_MAX; ++j )
614                             {
615                                 if ( ( outmap_buffer[ i ].mask & ( 1 << j ) ) == 0 ) continue;
616 
617                                 s32 value = 0x1f;
618                                 switch ( outmap_buffer[ i ].type )
619                                 {
620                                 case 0  :            value = 0x00 + c++; if (c == 2) clock |= 1 <<  0; break; // Position
621                                 case 1  :            value = 0x04 + c++;             clock |= 1 << 24; break; // Quaternion
622                                 case 2  :            value = 0x08 + c++;             clock |= 1 <<  1; break; // color
623                                 case 3  : if (c < 2) value = 0x0c + c++; useTex = 1; clock |= 1 <<  8; break; // texcoord0
624                                 case 4  :            value = 0x10;       useTex = 1; clock |= 1 << 16; break; // texcoord0w
625                                 case 5  : if (c < 2) value = 0x0e + c++; useTex = 1; clock |= 1 <<  9; break; // texcoord1
626                                 case 6  : if (c < 2) value = 0x16 + c++; useTex = 1; clock |= 1 << 10; break; // texcoord2
627                                 case 8  : if (c < 3) value = 0x12 + c++;             clock |= 1 << 24; break; // view
628                                 }
629                                 attr[ index ] = attr[ index ] & ~( 0xff << ( j * 8 ) ) | value << ( j * 8 );
630                             }
631                         }
632                         if ( attr[ index ] != 0x1f1f1f1f )
633                         {
634                             outMask |= ( 1 << index );
635                             ++outNum;
636                         }
637                     }
638                 }
639 
640                 if ( is_geometry_shader )
641                 {
642                     // Finds vertex shader output attribute settings
643                     //m_DrawMode = PICA_DATA_DRAW_GEOMETRY_PRIMITIVE;
644 
645                     u32 vtxOutNum = 0;
646                     bit32 vtxOutMask = 0;
647                     bit32 vtxAttr[ VS_OUT_ATTR_INDEX_MAX ];
648 
649                     // Program information
650                     const ExeImageInfo* exe_info = m_ExeImageInfo[ vtx_shader_index ];
651                     const OutmapInfo* outmapInfo =
652                         reinterpret_cast< const OutmapInfo* >(
653                             reinterpret_cast< const u8* >( exe_info ) + exe_info->outMapOffset );
654 
655                     for ( s32 index = 0; index < VS_OUT_ATTR_INDEX_MAX; ++index )
656                     {
657                         vtxAttr[ index ] = 0x1f1f1f1f;
658                         for ( s32 i = 0; i < exe_info->outMapCount; ++i )
659                         {
660                             u32 c = 0;
661                             for ( s32 j = 0; outmapInfo[ i ].index == index && j < OUT_ATTR_DIMENTION_MAX; ++j )
662                             {
663                                 if ( ( outmapInfo[ i ].mask & ( 1 << j ) ) == 0 ) continue;
664 
665                                 s32 value = 0x1f;
666                                 switch ( outmapInfo[ i ].type )
667                                 {
668                                 case 0  :            value = 0x00 + c++; break; // Position
669                                 case 1  :            value = 0x04 + c++; break; // Quaternion
670                                 case 2  :            value = 0x08 + c++; break; // color
671                                 case 3  : if (c < 2) value = 0x0c + c++; break; // texcoord0
672                                 case 4  :            value = 0x10;       break; // texcoord0w
673                                 case 5  : if (c < 2) value = 0x0e + c++; break; // texcoord1
674                                 case 6  : if (c < 2) value = 0x16 + c++; break; // texcoord2
675                                 case 8  : if (c < 3) value = 0x12 + c++; break; // view
676                                 case 9  : value = 0xff;
677                                 }
678                                 vtxAttr[ index ] = vtxAttr[ index ] & ~( 0xff << ( j * 8 ) ) | value << ( j * 8 );
679                             }
680                         }
681                         if ( vtxAttr[ index ] != 0x1f1f1f1f )
682                         {
683                             vtxOutMask |= ( 1 << index );
684                             ++vtxOutNum;
685                         }
686                     }
687 
688                     bit32 gsDataMode = m_ExeImageInfo[ geo_shader_index ]->gsDataMode;
689 
690                     // GL_GEOMETRY_PRIMITIVE_DMP
691 
692                     // 0x229 [31:31],
693                     // For data mode 1
694                     if ( gsDataMode == 1 )
695                     {
696                         *command++ = 0x80000000;
697                     }
698                     // For data mode 0 and data mode 2
699                     else
700                     {
701                         *command++ = 0x00000000;
702                     }
703                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_DRAW_MODE0, 0xa );
704 
705                     // 0x253
706                     *command++ = 0x00000000;
707                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_DRAW_MODE1, 0x3 );
708 
709                     // 0x289 Vertex shader output count
710                     *command++ = 0x08000000 | (gsDataMode == 0 ? 0x0000 : 0x0100) | vtxOutNum - 1;
711                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_GS_ATTR_NUM, 0xb );
712 
713                     // 0x28a Geometry shader main label address
714                     *command++ = 0x7fff0000 | m_ExeImageInfo[ geo_shader_index ]->mainAddress;
715                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_GS_START_ADDR );
716 
717                     // 0x28d Vertex shader output mask
718                     *command++ = outMask;
719                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_GS_OUT_REG_MASK );
720 
721                     // 0x2ba Vertex shader main label address
722                     *command++ = 0x7fff0000 | m_ExeImageInfo[ vtx_shader_index ]->mainAddress;
723                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_START_ADDR );
724 
725                     // 0x2bd Vertex shader output register mask
726                     *command++ = vtxOutMask;
727                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_REG_MASK );
728 
729                     // 0x251 Vertex shader output count
730                     *command++ = vtxOutNum - 1;
731                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_REG_NUM2 );
732 
733                     // 0x28b It is assumed that the vertex shader output matches the geometry shader input
734                     *command++ = 0x76543210;
735                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_GS_ATTR_IN_REG_MAP0 ); // 0x28b
736 
737                     // 0x28c
738                     *command++ = 0xfedcba98;
739                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_GS_ATTR_IN_REG_MAP1 );
740 
741                     // 0x254
742                     if ( ( gsDataMode == 1 ) &&
743                          ( m_ExeImageInfo[ geo_shader_index ]->gsPatchSize != 0 ) )
744                     {
745                         *command++ = m_ExeImageInfo[ geo_shader_index ]->gsPatchSize - 1;
746                         *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_GS_MISC_REG1, 0x1 );
747                     }
748 
749                     // Value for 0x252
750                     if ( gsDataMode == 2 )
751                     {
752                         gsDataMode |= 0x01                                                       << 24;
753                         gsDataMode |= ( m_ExeImageInfo[ geo_shader_index ]->gsVertexStartIndex ) << 16;
754                         gsDataMode |= ( vtxOutNum - 1 )                                          << 12;
755                         gsDataMode |= ( m_ExeImageInfo[ geo_shader_index ]->gsVertexNum - 1 )    <<  8;
756                     }
757 
758                     // 0x252 data mode specification
759                     *command++ = gsDataMode;
760                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_GS_MISC_REG0 );
761 
762                     // 0x24a
763                     *command++ = vtxOutNum - 1;
764                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_REG_NUM1 );
765                 }
766                 else
767                 {
768                     // 0x229 [31:31]
769                     *command++ = 0x0;
770                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_DRAW_MODE0, 0x8 );
771 
772                     // 0x253 [0:0]
773                     *command++ = 0x0;
774                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_DRAW_MODE1, 0x1 );
775 
776                     // 0x289 [31:24], [15:8], [3:0] Geometry shader mode settings
777                     *command++ = 0xa0000000;
778                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_GS_ATTR_NUM, 0xb );
779 
780                     // 0x2ba Vertex shader main label address
781                     *command++ = 0x7fff0000 | m_ExeImageInfo[ vtx_shader_index ]->mainAddress;
782                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_START_ADDR );
783 
784                     // 0x2bd Vertex shader output register mask
785                     *command++ = outMask;
786                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_REG_MASK );
787 
788                     // 0x251
789                     *command++ = outNum - 1;
790                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_REG_NUM2 );
791 
792                     // 0x252
793                     *command++ = 0;
794                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_GS_MISC_REG0 );
795 
796                     // 0x24a
797                     *command++ = outNum - 1;
798                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_REG_NUM1 );
799                 }
800 
801                 {
802                     // 0x25e [3:0]
803                     *command++ = outNum - 1;
804                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_VS_OUT_REG_NUM3, 0x1 );
805 
806                     // 0x04f
807                     *command++ = outNum;
808                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_REG_NUM0 );
809 
810                     // Output attribute command
811                     outNum = 0;
812                     for ( s32 index = 0; index < OUT_ATTR_INDEX_MAX; ++index )
813                     {
814                         if ( attr[ index ] != 0x1f1f1f1f )
815                         {
816                             *command++ = attr[ index ];
817                             *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_ATTR0 + outNum );
818                             ++outNum;
819                         }
820                     }
821                     for ( s32 index = outNum; index < OUT_ATTR_INDEX_MAX; ++index )
822                     {
823                         *command++ = attr[ index ];
824                         *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_ATTR0 + index );
825                     }
826                 }
827 
828                 // 0x064 Sets whether to use texture coordinates
829                 *command++ = useTex;
830                 *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_ATTR_MODE );
831 
832                 // 0x06f Sets output attribute clock control
833                 *command++ = clock;
834                 *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_ATTR_CLK );
835 
836                 if ( is_geometry_shader )
837                 {
838                     // 0x25e
839                     *command++ = 0;
840                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_VS_OUT_REG_NUM3, 8 );
841                 }
842 
843                 return command;
844             }
845 
846             //------------------------------------------------------------------------
847 
MakeLoadCommand_(bit32 * command,const bit32 load_reg,const bit32 * src_buffer_ptr,const u32 src_data_num) const848             bit32* Shader::MakeLoadCommand_( bit32* command,
849                                              const bit32  load_reg,
850                                              const bit32* src_buffer_ptr,
851                                              const u32  src_data_num ) const
852             {
853                 const s32 WRITE_MAX = 128;
854 
855                 u32 rest = src_data_num;
856 
857                 while ( true )
858                 {
859                     if ( rest <= WRITE_MAX )
860                     {
861                         *command++ = *src_buffer_ptr++;
862                         *command++ = PICA_CMD_HEADER_BURST( load_reg, rest );
863                         std::memcpy( command, src_buffer_ptr, ( rest - 1 ) * sizeof( bit32 ) );
864                         command += rest - 1;
865 
866                         if ( ( rest & 1 ) == 0 ) *command++ = PADDING_DATA; // Padding
867                         break;
868                     }
869                     else
870                     {
871                         *command++ = *src_buffer_ptr++;
872                         *command++ = PICA_CMD_HEADER_BURST( load_reg, WRITE_MAX );
873                         std::memcpy( command, src_buffer_ptr, ( WRITE_MAX - 1 ) * sizeof( bit32 ) );
874 
875                         command += WRITE_MAX - 1;
876                         src_buffer_ptr += WRITE_MAX - 1;
877 
878                         rest -= WRITE_MAX;
879                         if ( ( WRITE_MAX & 1 ) == 0 ) *command++ = PADDING_DATA; // Padding
880                     }
881                 }
882 
883                 return command;
884             }
885 
886             //------------------------------------------------------------------------
887 
MakeDummyCommand_(bit32 * command,const bit32 load_reg,const u32 dataNum)888             bit32* Shader::MakeDummyCommand_( bit32* command,
889                                               const bit32 load_reg,
890                                               const u32 dataNum )
891             {
892                 *command++ = 0;
893                 *command++ = PICA_CMD_HEADER_BURST_BE( load_reg, dataNum, 0 );
894 
895                 for ( s32 i = 0; i < dataNum - ( dataNum & 1 ); ++i)
896                 {
897                     *command++ = PADDING_DATA;
898                 }
899 
900                 return command;
901             }
902 
903             //------------------------------------------------------------------------
904 
SearchBindSymbol(BindSymbol * symbol,const char * name) const905             bool Shader::SearchBindSymbol( BindSymbol* symbol,
906                                            const char* name ) const
907             {
908                 const s32 shader_index = ( symbol->shaderType == BindSymbol::SHADER_TYPE_GEOMETRY )
909                     ? GetGeoShaderIndex() : GetVtxShaderIndex();
910 
911                 NN_GR_ASSERT(  0 <= shader_index && shader_index < m_ExeImageInfoNum );
912 
913                 // Program information
914                 const ExeImageInfo* exe_info = m_ExeImageInfo[ shader_index ];
915 
916                 // Symbol information
917                 struct BindSymbolInfo { u32 nameIndex; u32 regIndex; };
918                 const BindSymbolInfo* bind_symbol_info =
919                     reinterpret_cast< const BindSymbolInfo* >(
920                         reinterpret_cast< const u8* >( exe_info ) + exe_info->bindSymbolOffset );
921 
922                 // Text string information
923                 const char* string =
924                     reinterpret_cast< const char* >(
925                         reinterpret_cast< const u8* >( exe_info ) + exe_info->stringOffset );
926 
927                 // Search
928                 u32 namelen = std::strlen( name );
929                 for ( s32 i = 0; i < exe_info->bindSymbolCount; ++i )
930                 {
931                     const BindSymbolInfo& info = bind_symbol_info[ i ];
932 
933                     if ( std::strncmp( name, &string[ info.nameIndex ], namelen ) != 0 ) continue;
934                     if ( string[ info.nameIndex + namelen ] != '\0' &&  string[ info.nameIndex + namelen ] != '.' ) continue;
935 
936                     symbol->name  = &string[ info.nameIndex ];
937                     symbol->start = (info.regIndex & 0x0000ffff);
938                     symbol->end   = (info.regIndex & 0xffff0000) >> 16;
939 
940                     if ( 136 <= symbol->start )
941                     {
942                         return false;
943                     }
944                     else if ( 120 <= symbol->start )
945                     {
946                         symbol->start -= 120;
947                         symbol->end   -= 120;
948 
949                         return symbol->symbolType == BindSymbol::SYMBOL_TYPE_BOOL;
950                     }
951                     else if ( 112 <= symbol->start )
952                     {
953                         symbol->start -= 112;
954                         symbol->end   -= 112;
955 
956                         return symbol->symbolType == BindSymbol::SYMBOL_TYPE_INTEGER;
957                     }
958                     else if ( 16 <= symbol->start )
959                     {
960                         symbol->start -= 16;
961                         symbol->end   -= 16;
962 
963                         return symbol->symbolType == BindSymbol::SYMBOL_TYPE_FLOAT;
964                     }
965                     else
966                     {
967                         return symbol->symbolType == BindSymbol::SYMBOL_TYPE_INPUT;
968                     }
969                 }
970 
971                 return false;
972             }
973 
974             //------------------------------------------------------------------------
975 
SearchBindSymbolNum(const BindSymbol::ShaderType shader_type,const BindSymbol::SymbolType symbol_type) const976             u32 Shader::SearchBindSymbolNum(
977                 const BindSymbol::ShaderType shader_type,
978                 const BindSymbol::SymbolType symbol_type ) const
979             {
980                 const s32 shader_index = ( shader_type == BindSymbol::SHADER_TYPE_GEOMETRY ) ? GetGeoShaderIndex() : GetVtxShaderIndex();
981                 NN_GR_ASSERT(  0 <= shader_index && shader_index < m_ExeImageInfoNum );
982 
983                 // Program information
984                 const ExeImageInfo* exe_info = m_ExeImageInfo[ shader_index ];
985 
986                 // Output attribute information
987                 struct BindSymbolInfo { u32 nameIndex; u32 regIndex; };
988                 const BindSymbolInfo* bind_symbol_info =
989                     reinterpret_cast< const BindSymbolInfo* >(
990                         reinterpret_cast< const u8* >( exe_info ) + exe_info->bindSymbolOffset );
991 
992                 s32 num = 0;
993 
994                 // Search
995                 for ( s32 i = 0; i < exe_info->bindSymbolCount; ++i )
996                 {
997                     s32 regStart = bind_symbol_info[ i ].regIndex & 0x0000ffff;
998 
999                     if ( 120 <= regStart && regStart < 136 &&
1000                          symbol_type == BindSymbol::SYMBOL_TYPE_BOOL )
1001                     {
1002                         ++num;
1003                     }
1004                     else if ( 112 <= regStart && regStart < 115 &&
1005                               symbol_type == BindSymbol::SYMBOL_TYPE_INTEGER )
1006                     {
1007                         ++num;
1008                     }
1009                     else if ( 16 <= regStart && regStart < 111 &&
1010                               symbol_type == BindSymbol::SYMBOL_TYPE_FLOAT )
1011                     {
1012                         ++num;
1013                     }
1014                     else if ( regStart <  15 &&
1015                               symbol_type == BindSymbol::SYMBOL_TYPE_INPUT )
1016                     {
1017                         ++num;
1018                     }
1019                 }
1020 
1021                 return num;
1022             }
1023 
1024             //------------------------------------------------------------------------
1025 
SearchBindSymbol(BindSymbol * symbol,const u8 symbol_index) const1026             bool Shader::SearchBindSymbol(
1027                 BindSymbol* symbol,
1028                 const u8 symbol_index ) const
1029             {
1030                 const BindSymbol::ShaderType shaderType = symbol->shaderType;
1031 
1032                 const s32 shader_index = ( shaderType == BindSymbol::SHADER_TYPE_GEOMETRY )
1033                     ? GetGeoShaderIndex() : GetVtxShaderIndex();
1034 
1035                 NN_GR_ASSERT(  0 <= shader_index && shader_index < m_ExeImageInfoNum );
1036 
1037                 // Program information
1038                 const ExeImageInfo* exe_info = m_ExeImageInfo[ shader_index ];
1039 
1040                 // Symbol information
1041                 struct BindSymbolInfo { u32 nameIndex; u32 regIndex; };
1042                 const BindSymbolInfo* bind_symbol_info =
1043                     reinterpret_cast< const BindSymbolInfo* >(
1044                         reinterpret_cast< const u8* >( exe_info ) + exe_info->bindSymbolOffset );
1045 
1046                 // Text string information
1047                 const char* string =
1048                     reinterpret_cast< const char* >(
1049                         reinterpret_cast< const u8* >( exe_info ) + exe_info->stringOffset );
1050 
1051                 s32 index = -1;
1052 
1053                 // Search
1054                 for ( s32 i = 0; i < exe_info->bindSymbolCount; ++i )
1055                 {
1056                     s32 regStart = bind_symbol_info[ i ].regIndex & 0x0000ffff;
1057 
1058                     // Bool registers
1059                     if ( 120 <= regStart && regStart < 136 )
1060                     {
1061                         ++index;
1062                     }
1063                     // Integer registers
1064                     else if ( 112 <= regStart && regStart < 115 )
1065                     {
1066                         ++index;
1067                     }
1068                     // Constant registers
1069                     else if ( 16 <= regStart && regStart < 111 )
1070                     {
1071                         ++index;
1072                     }
1073                     // Input registers
1074                     else if ( regStart <  15 )
1075                     {
1076                         ++index;
1077                     }
1078                     else
1079                     {
1080                         return false;
1081                     }
1082 
1083                     if ( index == symbol_index )
1084                     {
1085                         const BindSymbolInfo& info = bind_symbol_info[ i ];
1086 
1087                         symbol->name  = &string[ info.nameIndex ];
1088                         symbol->start = (info.regIndex & 0x0000ffff);
1089                         symbol->end   = (info.regIndex & 0xffff0000) >> 16;
1090 
1091                         if ( 120 <= symbol->start )
1092                         {
1093                             symbol->start -= 120;
1094                             symbol->end   -= 120;
1095 
1096                             return symbol->symbolType == BindSymbol::SYMBOL_TYPE_BOOL;
1097                         }
1098                         else if ( 112 <= symbol->start )
1099                         {
1100                             symbol->start -= 112;
1101                             symbol->end -= 112;
1102 
1103                             return symbol->symbolType == BindSymbol::SYMBOL_TYPE_INTEGER;
1104                         }
1105                         else if (  16 <= symbol->start )
1106                         {
1107                             symbol->start -=  16;
1108                             symbol->end -=  16;
1109 
1110                             return symbol->symbolType == BindSymbol::SYMBOL_TYPE_FLOAT;
1111                         }
1112                         else
1113                         {
1114                             return symbol->symbolType == BindSymbol::SYMBOL_TYPE_INPUT;
1115                         }
1116                     }
1117                 }
1118 
1119                 return false;
1120             }
1121 
1122         } //namespace CTR
1123     } //namespace gr
1124 } //namespace nn
1125