1 /*---------------------------------------------------------------------------*
2   Project:  Horizon
3   File:     gr_Shader.cpp
4   Copyright (C)2010 Nintendo Co., Ltd.  All rights reserved.
5   These coded instructions, statements, and computer programs contain
6   proprietary information of Nintendo of America Inc. and/or Nintendo
7   Company Ltd., and are protected by Federal copyright law. They may
8   not be disclosed to third parties or copied or duplicated in any form,
9   in whole or in part, without the prior written consent of Nintendo.
10   $Rev: 34829 $
11  *---------------------------------------------------------------------------
12 
13 
14 */
15 
16 #include <nn/gr/CTR/gr_Shader.h>
17 
18 namespace nn
19 {
20     namespace gr
21     {
22         namespace CTR
23         {
Shader(void)24             Shader::Shader( void ) :
25             m_VtxShaderIndex( 0 ),
26             m_GeoShaderIndex( - 1 ),
27             m_ExeImageInfoNum( 0 ),
28             m_InstructionCount( 0 ),
29             m_SwizzleCount( 0 ),
30             m_DrawMode( PICA_DATA_DRAW_TRIANGLES ),
31             m_VtxShaderBoolMapUniform( 0 ),
32             m_GeoShaderBoolMapUniform( 0 ),
33             m_CmdCacheOutAttrNum( 0 )
34             {
35                 for ( s32 shader_index = 0; shader_index < EXE_IMAGE_MAX; shader_index++ )
36                 {
37                     m_CmdCacheConstNumArray[ shader_index ] = 0;
38                 }
39             }
40 
SetupBinary(const void * shader_binary,const s32 vtx_shader_index,const s32 geo_shader_index)41             void Shader::SetupBinary( const void* shader_binary, const s32 vtx_shader_index, const s32 geo_shader_index )
42             {
43                 const bit32* binary = reinterpret_cast< const bit32* >( shader_binary );
44                 NN_GR_ASSERT( binary != NULL );
45 
46                 NN_GR_ASSERT( *binary == 0x424C5644 ); // DVLB
47                 ++binary;
48 
49                 NN_GR_ASSERT( *binary < EXE_IMAGE_MAX );
50 
51                 // Number of execution images in the shader binary.
52                 m_ExeImageInfoNum = *binary;
53                 ++binary;
54 
55                 m_VtxShaderBoolMapUniform = 0;
56                 m_GeoShaderBoolMapUniform = 0;
57 
58                 // Saves the pointer to the execution images in the shader binary
59                 for ( s32 i = 0; i < m_ExeImageInfoNum; ++i )
60                 {
61                     m_ExeImageInfo[ i ] =
62                         reinterpret_cast< const ExeImageInfo* >( (u8*)shader_binary + *binary );
63                     NN_GR_ASSERT( m_ExeImageInfo[ i ]->signature == 0x454c5644 ); // DVLP
64                     ++binary;
65                 }
66 
67                 const bit32* package_info = binary;
68                 NN_GR_ASSERT( *binary == 0x504C5644 ); // DVLP
69                 ++binary;
70                 ++binary;
71 
72                 m_Instruction = reinterpret_cast< const bit32* >( (u8*)package_info + *binary );
73                 ++binary;
74 
75                 m_InstructionCount = *binary;
76                 ++binary;
77 
78                 const bit32* swizzle = reinterpret_cast< const bit32* >( (u8*)package_info + *binary );
79                 ++binary;
80 
81                 m_SwizzleCount = *binary;
82                 NN_GR_ASSERT( m_SwizzleCount < SWIZZLE_PATTERN_MAX );
83                 ++binary;
84 
85                 for ( s32 i = 0; i < m_SwizzleCount; i++ )
86                 {
87                     m_Swizzle[ i ] = swizzle[ i * 2 ] ;
88                 }
89 
90                 PicaDataDrawMode drawMode     = m_DrawMode;
91 
92                 MakeShaderConstCommandCache_();
93                 SetShaderIndex( vtx_shader_index, geo_shader_index );
94 
95                 if ( ! IsEnableGeoShader() )
96                 {
97                     m_DrawMode                = drawMode;
98                 }
99             }
100 
101             //------------------------------------------------------------------------
102 
SetShaderIndex(const s32 vtx_shader_index,const s32 geo_shader_index)103             void Shader::SetShaderIndex( const s32 vtx_shader_index, const s32 geo_shader_index )
104             {
105                 // Test for the vertex shade and geometry shader
106                 CheckVtxShaderIndex_( vtx_shader_index );
107                 // Geometry shader test
108                 CheckGeoShaderIndex_( geo_shader_index );
109 
110                 m_VtxShaderIndex = vtx_shader_index;
111                 m_GeoShaderIndex = geo_shader_index;
112 
113                 if ( IsEnableGeoShader() )
114                 {
115                     m_DrawMode = PICA_DATA_DRAW_GEOMETRY_PRIMITIVE;
116                 }
117 
118                 MakeShaderOutAttrCommandCache_();
119             }
120 
121             //------------------------------------------------------------------------
122 
MakeShaderConstCommandCache_(void)123             void Shader::MakeShaderConstCommandCache_( void )
124             {
125                 for ( s32 shader_index = 0; shader_index < m_ExeImageInfoNum; shader_index++ )
126                 {
127                     // Generates command cache for the constant register.
128                     m_CmdCacheConstNumArray[ shader_index ] =
129                         MakeConstRgCommand_( m_CmdCacheConstArray[ shader_index ],
130                                              shader_index ) -
131                                              m_CmdCacheConstArray[ shader_index ];
132 
133                     NN_GR_ASSERT( m_CmdCacheConstNumArray[ shader_index ] <= CONST_REG_COMMAND_MAX );
134                 }
135             }
136 
137             //------------------------------------------------------------------------
138 
MakeShaderOutAttrCommandCache_(void)139             void Shader::MakeShaderOutAttrCommandCache_( void )
140             {
141                 // Generates command cache for output attributes.
142                 m_CmdCacheOutAttrNum =
143                     MakeOutAttrCommand_( m_CmdCacheOutAttrArray,
144                                          m_VtxShaderIndex,
145                                          m_GeoShaderIndex )
146                     - m_CmdCacheOutAttrArray;
147             }
148 
149             //------------------------------------------------------------------------
150 
MakeFullCommand(bit32 * command) const151             bit32* Shader::MakeFullCommand( bit32* command ) const
152             {
153                 // Generates configuration commands using the geometry shader
154                 {
155                     command = MakePrepareCommand( command );
156                 }
157 
158                 // Generates geometry shader commands
159                 if ( IsEnableGeoShader() )
160                 {
161                     command = MakeGeoProgramCommand( command );
162                     command = MakeGeoSwizzleCommand( command );
163                     command = MakeGeoConstRgCommand( command );
164                     command = MakeGeoBoolMapCommand( command );
165                 }
166 
167                 // Generates vertex shader commands
168                 {
169                     command = MakeVtxProgramCommand( command );
170                     command = MakeVtxSwizzleCommand( command );
171                     command = MakeVtxConstRgCommand( command );
172                     command = MakeVtxBoolMapCommand( command );
173                 }
174 
175                 // Generates output attribute (outmap) related commands
176                 {
177                     command = MakeOutAttrCommand( command );
178                 }
179 
180                 return command;
181             }
182 
183             //------------------------------------------------------------------------
184 
MakeDisableCommand(bit32 * command)185             bit32* Shader::MakeDisableCommand( bit32* command )
186             {
187                 const bool isEnableGeometryShader = false;
188                 const PicaDataDrawMode drawMode   = PICA_DATA_DRAW_TRIANGLES;
189 
190                 command = MakeShaderModeCommand_( command,
191                                                   isEnableGeometryShader,
192                                                   drawMode );
193 
194                 return command;
195             }
196 
197             //------------------------------------------------------------------------
198 
MakeShaderCommand(bit32 * command,const bool isMakePrepareCommand) const199             bit32* Shader::MakeShaderCommand( bit32* command, const bool isMakePrepareCommand ) const
200             {
201                 // Generates configuration commands using the geometry shader
202                 if ( isMakePrepareCommand )
203                 {
204                     command = MakePrepareCommand( command );
205                 }
206 
207                 // Generates commands for the geometry shader constant register
208                 if ( IsEnableGeoShader() )
209                 {
210                     command = MakeGeoConstRgCommand( command );
211                     command = MakeGeoBoolMapCommand( command );
212                 }
213 
214                 // Generates commands for the vertex shader constant register
215                 {
216                     command = MakeVtxConstRgCommand( command );
217                     command = MakeVtxBoolMapCommand( command );
218                 }
219 
220                 // Generates output attribute (outmap) related commands
221                 {
222                     command = MakeOutAttrCommand( command );
223                 }
224 
225                 return command;
226             }
227 
228             //------------------------------------------------------------------------
229 
MakePrepareCommand(bit32 * command) const230             bit32* Shader::MakePrepareCommand( bit32* command ) const
231             {
232                 bool isEnableGeoShader    = IsEnableGeoShader();
233                 PicaDataDrawMode drawMode = m_DrawMode;
234 
235                 command =
236                     MakeShaderModeCommand_( command,
237                                             isEnableGeoShader,
238                                             drawMode );
239 
240                 return command;
241             }
242 
243             //------------------------------------------------------------------------
244 
MakeVtxProgramCommand(bit32 * command) const245             bit32* Shader::MakeVtxProgramCommand( bit32* command ) const
246             {
247                 s32 shader_index   = GetVtxShaderIndex();
248                 bit32 reg_addr     = PICA_REG_VS_PROG_ADDR;        // 0x2cb
249                 bit32 reg_load     = PICA_REG_VS_PROG_DATA0;       // 0x2cc
250                 bit32 reg_end      = PICA_REG_VS_PROG_UPDATE_END;  // 0x2bf
251 
252                 { // Sets the program code load address
253                     *command++ = 0;
254                     *command++ = PICA_CMD_HEADER_SINGLE( reg_addr );
255                 }
256 
257                 { // Loads program code
258                     NN_GR_ASSERT( 0 <= shader_index && shader_index < m_ExeImageInfoNum );
259 
260                     const ExeImageInfo* exe_info = m_ExeImageInfo[ shader_index ];
261 
262                     NN_UNUSED_VAR( exe_info );
263 
264                     u32 instructionCount = m_InstructionCount;
265                     if ( instructionCount > 512 )
266                     {
267                         instructionCount = 512;
268                     }
269 
270                     command = MakeLoadCommand_( command, reg_load,
271                                                 m_Instruction,
272                                                 m_InstructionCount < 512 ? m_InstructionCount : 512 );
273                 }
274 
275                 { // Completes notification program updates
276                     *command++ = 1;
277                     *command++ = PICA_CMD_HEADER_SINGLE( reg_end );
278                 }
279 
280                 return command;
281             }
282 
283             //------------------------------------------------------------------------
284 
MakeGeoProgramCommand(bit32 * command) const285             bit32* Shader::MakeGeoProgramCommand( bit32* command ) const
286             {
287                 s32 shader_index   = GetGeoShaderIndex();
288                 bit32 reg_addr     = PICA_REG_GS_PROG_ADDR;        // 0x29b
289                 bit32 reg_load     = PICA_REG_GS_PROG_DATA0;       // 0x29c
290                 bit32 reg_end      = PICA_REG_GS_PROG_UPDATE_END;  // 0x28f
291 
292                 { // Sets the program code load address
293                     *command++ = 0;
294                     *command++ = PICA_CMD_HEADER_SINGLE( reg_addr );
295                 }
296 
297                 { // Loads program code
298                     NN_GR_ASSERT( ( 0 <= shader_index ) &&
299                                   ( shader_index < m_ExeImageInfoNum ) );
300 
301                     const ExeImageInfo* exe_info = m_ExeImageInfo[ shader_index ];
302 
303                     NN_UNUSED_VAR( exe_info );
304 
305                     command = MakeLoadCommand_( command, reg_load,
306                                                 m_Instruction,
307                                                 m_InstructionCount );
308                 }
309 
310                 { // Completes notification program updates
311                     *command++ = 1;
312                     *command++ = PICA_CMD_HEADER_SINGLE( reg_end );
313                 }
314 
315                 return command;
316             }
317 
318             //------------------------------------------------------------------------
319 
MakeShaderModeCommand_(bit32 * command,const bool isEnableGeoShader,const PicaDataDrawMode drawMode)320             bit32* Shader::MakeShaderModeCommand_( bit32* command,
321                                                    const bool isEnableGeoShader,
322                                                    const PicaDataDrawMode drawMode )
323             {
324                 // Sets to 0x25e[9:8]
325                 {
326                     if ( isEnableGeoShader )
327                     {
328                         *command++ = PICA_DATA_DRAW_GEOMETRY_PRIMITIVE << 8;
329                     }
330                     else
331                     {
332                         *command++ = drawMode << 8;
333                     }
334                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_VS_OUT_REG_NUM3, 2 );
335                 }
336 
337                 // Dummy command to 0x251
338                 {
339                     command = MakeDummyCommand_( command, PICA_REG_VS_OUT_REG_NUM2, DUMMY_DATA_NUM_251 );
340                 }
341 
342                 // Dummy command to 0x200
343                 {
344                     command = MakeDummyCommand_( command, PICA_REG_VERTEX_ATTR_ARRAYS_BASE_ADDR, DUMMY_DATA_NUM_200 );
345                 }
346 
347                 // 0x229[1:0] Sets whether to enable or disable the geometry shader
348                 {
349                     *command++ = isEnableGeoShader ? 2 : 0;
350                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_DRAW_MODE0, 1 );
351                 }
352 
353                 // Dummy command to 0x200
354                 {
355                     command = MakeDummyCommand_( command, PICA_REG_VERTEX_ATTR_ARRAYS_BASE_ADDR, DUMMY_DATA_NUM_200 );
356                 }
357 
358                 // 0x244 Sets whether to share geometry shader settings with the vertex shader
359                 {
360                     *command++ = isEnableGeoShader ? 1 : 0;
361                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_VS_COM_MODE, 1 );
362                 }
363 
364                 return command;
365             }
366 
367             //------------------------------------------------------------------------
368 
CheckVtxShaderIndex_(const s32 vtx_shader_index)369             void Shader::CheckVtxShaderIndex_( const s32 vtx_shader_index )
370             {
371                 NN_UNUSED_VAR( vtx_shader_index );
372 
373                 // Vertex shader index range test
374                 NN_GR_ASSERT( ( 0 <= vtx_shader_index ) && ( vtx_shader_index < GetShaderNum() ) );
375 
376                 // Shader binary compatibility test
377                 NN_GR_ASSERT( ! m_ExeImageInfo[ vtx_shader_index ]->isGeoShader );
378             }
379 
380             //------------------------------------------------------------------------
381 
CheckGeoShaderIndex_(const s32 geo_shader_index)382             void Shader::CheckGeoShaderIndex_( const s32 geo_shader_index )
383             {
384                 NN_UNUSED_VAR( geo_shader_index );
385 
386                 // Geometry shader index range test
387                 NN_GR_ASSERT( m_GeoShaderIndex < GetShaderNum() );
388 
389                 // Shader binary compatibility test
390                 if ( geo_shader_index > - 1 )
391                 {
392                     NN_GR_ASSERT( m_ExeImageInfo[ geo_shader_index ]->isGeoShader );
393                 }
394             }
395 
396             //------------------------------------------------------------------------
397 
MakeConstRgCommand_(bit32 * command,const s32 shader_index)398             bit32* Shader::MakeConstRgCommand_( bit32* command,
399                                                 const s32 shader_index )
400             {
401                 bit32  reg_float     = PICA_REG_VS_FLOAT_ADDR; // 0x2c0
402                 bit32  reg_integer   = PICA_REG_VS_INT0;       // 0x2b1
403                 bit32* boolMap       = &m_VtxShaderBoolMapUniform;
404 
405                 bool is_geometry_shader = m_ExeImageInfo[ shader_index ]->isGeoShader;
406                 if ( is_geometry_shader )
407                 {
408                     reg_float    = PICA_REG_GS_FLOAT_ADDR; // 0x290
409                     reg_integer  = PICA_REG_GS_INT0;       // 0x281
410                     boolMap      = &m_GeoShaderBoolMapUniform;
411                 }
412 
413                 // Program information
414                 NN_GR_ASSERT( ( 0 <= shader_index ) &&
415                               ( shader_index < m_ExeImageInfoNum ) );
416                 const ExeImageInfo* exe_info = m_ExeImageInfo[ shader_index ];
417 
418                 // Constant register information
419                 struct SetupInfo
420                 {
421                     u16 type;
422                     u16 index;
423                     bit32 value[4];
424                 };
425 
426                 const SetupInfo* setupInfo =
427                     reinterpret_cast< const SetupInfo* >(
428                         reinterpret_cast< const u8* >( exe_info ) + exe_info->setupOffset );
429 
430                 // Constant register command generation
431                 for ( s32 i = 0; i < exe_info->setupCount; ++i )
432                 {
433                     const SetupInfo& info = setupInfo[ i ];
434                     const bit32* value = info.value;
435 
436                     switch ( info.type )
437                     {
438                     case 0 : // bool register setup
439                         *boolMap |= ( info.value[ 0 ] << info.index ) & ( 1 << info.index );
440                         break;
441 
442                     case 1 : // integer register setup
443                         *command++ = value[ 0 ] | value[ 1 ] <<  8 | value[ 2 ] << 16 | value[ 3 ] << 24;
444                         *command++ = PICA_CMD_HEADER_SINGLE( reg_integer + info.index );
445                         break;
446 
447                     case 2 : // float register setup
448                         *command++ = info.index;
449                         *command++ = PICA_CMD_HEADER_BURSTSEQ( reg_float, 4 );
450                         *command++ = ( value[ 3 ] <<  8 & 0xffffff00 ) | ( value[ 2 ] >> 16 & 0x000000ff );
451                         *command++ = ( value[ 2 ] << 16 & 0xffff0000 ) | ( value[ 1 ] >>  8 & 0x0000ffff );
452                         *command++ = ( value[ 1 ] << 24 & 0xff000000 ) | ( value[ 0 ] >>  0 & 0x00ffffff );
453                         *command++ = PADDING_DATA; // Padding
454                         break;
455                     }
456                 }
457 
458                 return command;
459             }
460 
461             //------------------------------------------------------------------------
462 
MakeOutAttrCommand_(bit32 * command,const s32 vtx_shader_index,const s32 geo_shader_index)463             bit32* Shader::MakeOutAttrCommand_( bit32* command,
464                                                 const s32 vtx_shader_index,
465                                                 const s32 geo_shader_index )
466             {
467                 s32 shader_index = vtx_shader_index;
468 
469                 bool is_geometry_shader = false;
470                 if ( 0 <= GetGeoShaderIndex() )
471                 {
472                     is_geometry_shader = true;
473 
474                     shader_index = geo_shader_index;
475                 }
476 
477                 NN_GR_ASSERT(  0 <= shader_index && shader_index < m_ExeImageInfoNum );
478 
479                 // Output attribute settings
480                 // When the geometry shader is enabled, output attributes perform settings for the geometry shader
481                 //
482 
483                 const s32 OUT_ATTR_INDEX_MAX     = 7;
484                 const s32 OUT_ATTR_DIMENTION_MAX = 4;
485                 const s32 OUT_ATTR_BUFFER_MAX    = 16 * 4;
486                 const s32 VS_OUT_ATTR_INDEX_MAX  = 16;
487 
488                 // Output attribute information
489                 struct OutmapInfo
490                 {
491                     u16 type;
492                     u16 index;
493                     u16 mask;
494                     u16 reserve;
495                 };
496 
497                 u32 outNum  = 0;
498                 bit32 useTex  = 0;
499                 bit32 clock   = 0;
500                 bit32 outMask = 0;
501                 bit32 attr[ OUT_ATTR_INDEX_MAX ];
502 
503                 // Find output attribute settings
504                 {
505                     // Program information
506                     const ExeImageInfo* exe_info = m_ExeImageInfo[ shader_index ];
507 
508                     // Actual output attribute information
509                     OutmapInfo outmap_buffer[ OUT_ATTR_BUFFER_MAX ];
510                     s32 outMapBufferCount = 0;
511 
512                     // Merge
513                     if ( is_geometry_shader && exe_info->outputMaps )
514                     {
515                         bit32 gs_copy_mask = 0;
516                         bit32 vs_copy_mask = 0;
517                         // GS output attribute information
518                         const OutmapInfo* outmapInfo =
519                             reinterpret_cast< const OutmapInfo* >(
520                                 reinterpret_cast< const u8* >( exe_info ) + exe_info->outMapOffset );
521                         // VS program information
522                         NN_GR_ASSERT(  0 <= vtx_shader_index && vtx_shader_index < m_ExeImageInfoNum );
523                         const ExeImageInfo* vtx_exe_info = m_ExeImageInfo[ vtx_shader_index ];
524 
525                         // VS output attribute information
526                         const OutmapInfo* vtxOutmapInfo =
527                             reinterpret_cast< const OutmapInfo* >(
528                                 reinterpret_cast< const u8* >( vtx_exe_info ) + vtx_exe_info->outMapOffset );
529 
530                         // Merge output attributes, other than generic, defined by both VS and GS
531                         NN_GR_ASSERT( outMapBufferCount < OUT_ATTR_BUFFER_MAX );
532                         for( s32 g = 0; g < exe_info->outMapCount; ++g )
533                         {
534                             // Excludes GS generic attributes (value 9)
535                             if ( ( outmapInfo[ g ].type >= 0 ) &&
536                                  ( outmapInfo[ g ].type <  9 ) &&
537                                  ( outmapInfo[ g ].type != 7 ) )
538                             {
539                                 for( s32 v = 0; v < vtx_exe_info->outMapCount; ++v )
540                                 {
541                                     // Excludes VS generic attributes (value 9)
542                                     if ( ( vtxOutmapInfo[ v ].type >= 0 ) &&
543                                          ( vtxOutmapInfo[ g ].type <  9 ) &&
544                                          ( vtxOutmapInfo[ g ].type != 7 ) )
545                                     {
546                                         if ( outmapInfo[ g ].type == vtxOutmapInfo[ v ].type )
547                                         {
548                                             NN_GR_ASSERT( outMapBufferCount < OUT_ATTR_INDEX_MAX );
549                                             outmap_buffer[ outMapBufferCount ].type = outmapInfo[ g ].type;
550                                             outmap_buffer[ outMapBufferCount ].index = outMapBufferCount;
551                                             outmap_buffer[ outMapBufferCount ].mask = outmapInfo[ g ].mask;
552                                             gs_copy_mask |= 1 << g;
553                                             vs_copy_mask |= 1 << v;
554                                             ++outMapBufferCount;
555                                         }
556                                     }
557                                 }
558                             }
559                         }
560 
561                         // Next, sets output attributes defined only by GS
562                         for( s32 g = 0; g < exe_info->outMapCount; ++g )
563                         {
564                             if ( ( !( gs_copy_mask & ( 1 << g ) )) &&
565                                  ( outmapInfo[ g ].type >= 0  )    &&
566                                  ( outmapInfo[ g ].type < 9 )      &&
567                                  ( outmapInfo[ g ].type != 7 ) )
568                             {
569                                 NN_GR_ASSERT( outMapBufferCount < OUT_ATTR_BUFFER_MAX );
570                                 outmap_buffer[ outMapBufferCount ].type = outmapInfo[ g ].type;
571                                 outmap_buffer[ outMapBufferCount ].index = outMapBufferCount;
572                                 outmap_buffer[ outMapBufferCount ].mask = outmapInfo[ g ].mask;
573                                 ++outMapBufferCount;
574                             }
575                         }
576 
577                         // Last, sets output attributes defined only by VS
578                         for( s32 v = 0; v < vtx_exe_info->outMapCount; ++v )
579                         {
580                             if ( ( !( vs_copy_mask & ( 1 << v ) ) ) &&
581                                  ( vtxOutmapInfo[ v ].type >= 0 )   &&
582                                  ( vtxOutmapInfo[ v ].type < 9  )   &&
583                                  ( vtxOutmapInfo[ v ].type != 7 ) )
584                             {
585                                 NN_GR_ASSERT( outMapBufferCount < OUT_ATTR_BUFFER_MAX );
586                                 outmap_buffer[ outMapBufferCount ].type = vtxOutmapInfo[ v ].type;
587                                 outmap_buffer[ outMapBufferCount ].index = outMapBufferCount;
588                                 outmap_buffer[ outMapBufferCount ].mask = vtxOutmapInfo[ v ].mask;
589                                 ++outMapBufferCount;
590                             }
591                         }
592                     }
593                     else
594                     {
595                         // Output attribute information
596                         const OutmapInfo* outmapInfo =
597                             reinterpret_cast< const OutmapInfo* >(
598                                 reinterpret_cast< const u8* >( exe_info ) + exe_info->outMapOffset );
599                         // Copies when not merging.
600                         for( s32 i = 0; i < exe_info->outMapCount; ++i )
601                         {
602                             outmap_buffer[ i ] = outmapInfo[ i ];
603                         }
604                         outMapBufferCount = exe_info->outMapCount;
605                     }
606 
607                     for ( s32 index = 0; index < OUT_ATTR_INDEX_MAX; ++index )
608                     {
609                         attr[ index ] = 0x1f1f1f1f;
610                         for ( s32 i = 0; i <outMapBufferCount; ++i )
611                         {
612                             bit32 c = 0;
613                             for ( s32 j = 0; outmap_buffer[ i ].index == index && j < OUT_ATTR_DIMENTION_MAX; ++j )
614                             {
615                                 if ( ( outmap_buffer[ i ].mask & ( 1 << j ) ) == 0 ) continue;
616 
617                                 s32 value = 0x1f;
618                                 switch ( outmap_buffer[ i ].type )
619                                 {
620                                 case 0  :            value = 0x00 + c++; if (c == 2) clock |= 1 <<  0; break; // Position
621                                 case 1  :            value = 0x04 + c++;             clock |= 1 << 24; break; // Quaternion
622                                 case 2  :            value = 0x08 + c++;             clock |= 1 <<  1; break; // color
623                                 case 3  : if (c < 2) value = 0x0c + c++; useTex = 1; clock |= 1 <<  8; break; // texcoord0
624                                 case 4  :            value = 0x10;       useTex = 1; clock |= 3 << 16; break; // texcoord0w
625                                 case 5  : if (c < 2) value = 0x0e + c++; useTex = 1; clock |= 1 <<  9; break; // texcoord1
626                                 case 6  : if (c < 2) value = 0x16 + c++; useTex = 1; clock |= 1 << 10; break; // texcoord2
627                                 case 8  : if (c < 3) value = 0x12 + c++;             clock |= 1 << 24; break; // view
628                                 }
629                                 attr[ index ] = attr[ index ] & ~( 0xff << ( j * 8 ) ) | value << ( j * 8 );
630                             }
631                         }
632                         if ( attr[ index ] != 0x1f1f1f1f )
633                         {
634                             outMask |= ( 1 << index );
635                             ++outNum;
636                         }
637                     }
638                 }
639 
640                 if ( is_geometry_shader )
641                 {
642                     // Finds vertex shader output attribute settings
643                     //m_DrawMode = PICA_DATA_DRAW_GEOMETRY_PRIMITIVE;
644 
645                     u32 vtxOutNum = 0;
646                     bit32 vtxOutMask = 0;
647                     bit32 vtxAttr[ VS_OUT_ATTR_INDEX_MAX ];
648 
649                     // Program information
650                     const ExeImageInfo* exe_info = m_ExeImageInfo[ vtx_shader_index ];
651                     const OutmapInfo* outmapInfo =
652                         reinterpret_cast< const OutmapInfo* >(
653                             reinterpret_cast< const u8* >( exe_info ) + exe_info->outMapOffset );
654 
655                     for ( s32 index = 0; index < VS_OUT_ATTR_INDEX_MAX; ++index )
656                     {
657                         vtxAttr[ index ] = 0x1f1f1f1f;
658                         for ( s32 i = 0; i < exe_info->outMapCount; ++i )
659                         {
660                             u32 c = 0;
661                             for ( s32 j = 0; outmapInfo[ i ].index == index && j < OUT_ATTR_DIMENTION_MAX; ++j )
662                             {
663                                 if ( ( outmapInfo[ i ].mask & ( 1 << j ) ) == 0 ) continue;
664 
665                                 s32 value = 0x1f;
666                                 switch ( outmapInfo[ i ].type )
667                                 {
668                                 case 0  :            value = 0x00 + c++; break; // Position
669                                 case 1  :            value = 0x04 + c++; break; // Quaternion
670                                 case 2  :            value = 0x08 + c++; break; // color
671                                 case 3  : if (c < 2) value = 0x0c + c++; break; // texcoord0
672                                 case 4  :            value = 0x10;       break; // texcoord0w
673                                 case 5  : if (c < 2) value = 0x0e + c++; break; // texcoord1
674                                 case 6  : if (c < 2) value = 0x16 + c++; break; // texcoord2
675                                 case 8  : if (c < 3) value = 0x12 + c++; break; // view
676                                 case 9  : value = 0xff;
677                                 }
678                                 vtxAttr[ index ] = vtxAttr[ index ] & ~( 0xff << ( j * 8 ) ) | value << ( j * 8 );
679                             }
680                         }
681                         if ( vtxAttr[ index ] != 0x1f1f1f1f )
682                         {
683                             vtxOutMask |= ( 1 << index );
684                             ++vtxOutNum;
685                         }
686                     }
687 
688                     bit32 gsDataMode = m_ExeImageInfo[ geo_shader_index ]->gsDataMode;
689 
690                     // GL_GEOMETRY_PRIMITIVE_DMP
691 
692                     // 0x229 [31:31],
693                     // For data mode 1
694                     if ( gsDataMode == 1 )
695                     {
696                         *command++ = 0x80000000;
697                     }
698                     // For data mode 0 and data mode 2
699                     else
700                     {
701                         *command++ = 0x00000000;
702                     }
703                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_DRAW_MODE0, 0xa );
704 
705                     // 0x253
706                     *command++ = 0x00000000;
707                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_DRAW_MODE1, 0x3 );
708 
709                     // 0x289 Vertex shader output count
710                     *command++ = 0x08000000 | (gsDataMode == 0 ? 0x0000 : 0x0100) | vtxOutNum - 1;
711                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_GS_ATTR_NUM, 0xb );
712 
713                     // 0x28a Geometry shader main label address
714                     *command++ = 0x7fff0000 | m_ExeImageInfo[ geo_shader_index ]->mainAddress;
715                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_GS_START_ADDR );
716 
717                     // 0x28d Vertex shader output mask
718                     *command++ = outMask;
719                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_GS_OUT_REG_MASK );
720 
721                     // 0x2ba Vertex shader main label address
722                     *command++ = 0x7fff0000 | m_ExeImageInfo[ vtx_shader_index ]->mainAddress;
723                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_START_ADDR );
724 
725                     // 0x2bd Vertex shader output register mask
726                     *command++ = vtxOutMask;
727                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_REG_MASK );
728 
729                     // 0x251 Vertex shader output count
730                     *command++ = vtxOutNum - 1;
731                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_REG_NUM2 );
732 
733                     // 0x28b It is assumed that the vertex shader output matches the geometry shader input
734                     *command++ = 0x76543210;
735                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_GS_ATTR_IN_REG_MAP0 ); // 0x28b
736 
737                     // 0x28c
738                     *command++ = 0xfedcba98;
739                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_GS_ATTR_IN_REG_MAP1 );
740 
741                     // 0x254
742                     if ( ( gsDataMode == 1 ) &&
743                          ( m_ExeImageInfo[ geo_shader_index ]->gsVertexNum != 0 ) )
744                     {
745                         *command++ = m_ExeImageInfo[ geo_shader_index ]->gsVertexNum - 1;
746                         *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_GS_MISC_REG1, 0x1 );
747                     }
748 
749                     // Value for 0x252
750                     if ( gsDataMode == 2 )
751                     {
752                         gsDataMode |= 0x01                                                       << 24;
753                         gsDataMode |= ( m_ExeImageInfo[ geo_shader_index ]->gsVertexStartIndex ) << 16;
754                         gsDataMode |= ( vtxOutNum - 1 )                                          << 12;
755                         gsDataMode |= ( m_ExeImageInfo[ geo_shader_index ]->gsVertexNum - 1 )    <<  8;
756                     }
757 
758                     // 0x252 data mode specification
759                     *command++ = gsDataMode;
760                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_GS_MISC_REG0 );
761 
762                     // 0x24a
763                     *command++ = vtxOutNum - 1;
764                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_REG_NUM1 );
765                 }
766                 else
767                 {
768                     // 0x229 [31:31]
769                     *command++ = 0x0;
770                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_DRAW_MODE0, 0x8 );
771 
772                     // 0x253 [0:0]
773                     *command++ = 0x0;
774                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_DRAW_MODE1, 0x1 );
775 
776                     // 0x289 [31:24], [15:8], [3:0] Geometry shader mode settings
777                     *command++ = 0xa0000000;
778                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_GS_ATTR_NUM, 0xb );
779 
780                     // 0x2ba Vertex shader main label address
781                     *command++ = 0x7fff0000 | m_ExeImageInfo[ vtx_shader_index ]->mainAddress;
782                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_START_ADDR );
783 
784                     // 0x2bd Vertex shader output register mask
785                     *command++ = outMask;
786                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_REG_MASK );
787 
788                     // 0x251
789                     *command++ = outNum - 1;
790                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_REG_NUM2 );
791 
792                     // 0x252
793                     *command++ = 0;
794                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_GS_MISC_REG0 );
795 
796                     // 0x24a
797                     *command++ = outNum - 1;
798                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_REG_NUM1 );
799                 }
800 
801                 {
802                     // 0x25e [3:0]
803                     *command++ = outNum - 1;
804                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_VS_OUT_REG_NUM3, 0x1 );
805 
806                     // 0x04f
807                     *command++ = outNum;
808                     *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_REG_NUM0 );
809 
810                     // Output attribute command
811                     outNum = 0;
812                     for ( s32 index = 0; index < OUT_ATTR_INDEX_MAX; ++index )
813                     {
814                         if ( attr[ index ] != 0x1f1f1f1f )
815                         {
816                             *command++ = attr[ index ];
817                             *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_ATTR0 + outNum );
818                             ++outNum;
819                         }
820                     }
821                     for ( s32 index = outNum; index < OUT_ATTR_INDEX_MAX; ++index )
822                     {
823                         *command++ = attr[ index ];
824                         *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_ATTR0 + index );
825                     }
826                 }
827 
828                 // 0x064 Sets whether to use texture coordinates
829                 *command++ = useTex;
830                 *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_ATTR_MODE );
831 
832                 // 0x06f Sets output attribute clock control
833                 *command++ = clock;
834                 *command++ = PICA_CMD_HEADER_SINGLE( PICA_REG_VS_OUT_ATTR_CLK );
835 
836                 if ( is_geometry_shader )
837                 {
838                     // 0x25e
839                     *command++ = 0;
840                     *command++ = PICA_CMD_HEADER_SINGLE_BE( PICA_REG_VS_OUT_REG_NUM3, 8 );
841                 }
842 
843                 return command;
844             }
845 
846             //------------------------------------------------------------------------
847 
MakeLoadCommand_(bit32 * command,const bit32 load_reg,const bit32 * src_buffer_ptr,const u32 src_data_num) const848             bit32* Shader::MakeLoadCommand_( bit32* command,
849                                              const bit32  load_reg,
850                                              const bit32* src_buffer_ptr,
851                                              const u32  src_data_num ) const
852             {
853                 const s32 WRITE_MAX = 128;
854 
855                 u32 rest = src_data_num;
856 
857                 while ( true )
858                 {
859                     if ( rest <= WRITE_MAX )
860                     {
861                         *command++ = *src_buffer_ptr++;
862                         *command++ = PICA_CMD_HEADER_BURST( load_reg, rest );
863                         std::memcpy( command, src_buffer_ptr, ( rest - 1 ) * sizeof( bit32 ) );
864                         command += rest - 1;
865 
866                         if ( ( rest & 1 ) == 0 ) *command++ = PADDING_DATA; // Padding
867                         break;
868                     }
869                     else
870                     {
871                         *command++ = *src_buffer_ptr++;
872                         *command++ = PICA_CMD_HEADER_BURST( load_reg, WRITE_MAX );
873                         std::memcpy( command, src_buffer_ptr, ( WRITE_MAX - 1 ) * sizeof( bit32 ) );
874 
875                         command += WRITE_MAX - 1;
876                         src_buffer_ptr += WRITE_MAX - 1;
877 
878                         rest -= WRITE_MAX;
879                         if ( ( WRITE_MAX & 1 ) == 0 ) *command++ = PADDING_DATA; // Padding
880                     }
881                 }
882 
883                 return command;
884             }
885 
886             //------------------------------------------------------------------------
887 
MakeDummyCommand_(bit32 * command,const bit32 load_reg,const u32 dataNum)888             bit32* Shader::MakeDummyCommand_( bit32* command,
889                                               const bit32 load_reg,
890                                               const u32 dataNum )
891             {
892                 *command++ = 0;
893                 *command++ = PICA_CMD_HEADER_BURST_BE( load_reg, dataNum, 0 );
894 
895                 for ( s32 i = 0; i < dataNum - ( dataNum & 1 ); ++i)
896                 {
897                     *command++ = PADDING_DATA;
898                 }
899 
900                 return command;
901             }
902 
903             //------------------------------------------------------------------------
904 
SearchBindSymbol(BindSymbol * symbol,const char * name) const905             bool Shader::SearchBindSymbol( BindSymbol* symbol,
906                                            const char* name ) const
907             {
908                 const s32 shader_index = ( symbol->shaderType == BindSymbol::SHADER_TYPE_GEOMETRY )
909                     ? GetGeoShaderIndex() : GetVtxShaderIndex();
910 
911                 NN_GR_ASSERT(  0 <= shader_index && shader_index < m_ExeImageInfoNum );
912 
913                 // Program information
914                 const ExeImageInfo* exe_info = m_ExeImageInfo[ shader_index ];
915 
916                 // Symbol information
917                 struct BindSymbolInfo { u32 nameIndex; u32 regIndex; };
918                 const BindSymbolInfo* bind_symbol_info =
919                     reinterpret_cast< const BindSymbolInfo* >(
920                         reinterpret_cast< const u8* >( exe_info ) + exe_info->bindSymbolOffset );
921 
922                 // Text string information
923                 const char* string =
924                     reinterpret_cast< const char* >(
925                         reinterpret_cast< const u8* >( exe_info ) + exe_info->stringOffset );
926 
927                 // Search
928                 u32 namelen = std::strlen( name );
929                 for ( s32 i = 0; i < exe_info->bindSymbolCount; ++i )
930                 {
931                     const BindSymbolInfo& info = bind_symbol_info[ i ];
932 
933                     if ( std::strncmp( name, &string[ info.nameIndex ], namelen ) != 0 ) continue;
934                     if ( string[ info.nameIndex + namelen ] != '\0' &&  string[ info.nameIndex + namelen ] != '.' ) continue;
935 
936                     symbol->name  = &string[ info.nameIndex ];
937                     symbol->start = (info.regIndex & 0x0000ffff);
938                     symbol->end   = (info.regIndex & 0xffff0000) >> 16;
939 
940                     if ( 136 <= symbol->start )
941                     {
942                         return false;
943                     }
944                     else if ( 120 <= symbol->start )
945                     {
946                         symbol->start -= 120;
947                         symbol->end   -= 120;
948 
949                         return symbol->symbolType == BindSymbol::SYMBOL_TYPE_BOOL;
950                     }
951                     else if ( 112 <= symbol->start )
952                     {
953                         symbol->start -= 112;
954                         symbol->end   -= 112;
955 
956                         return symbol->symbolType == BindSymbol::SYMBOL_TYPE_INTEGER;
957                     }
958                     else if ( 16 <= symbol->start )
959                     {
960                         symbol->start -= 16;
961                         symbol->end   -= 16;
962 
963                         return symbol->symbolType == BindSymbol::SYMBOL_TYPE_FLOAT;
964                     }
965                     else
966                     {
967                         return symbol->symbolType == BindSymbol::SYMBOL_TYPE_INPUT;
968                     }
969                 }
970 
971                 return false;
972             }
973 
974             //------------------------------------------------------------------------
975 
SearchBindSymbolNum(const BindSymbol::ShaderType shader_type,const BindSymbol::SymbolType symbol_type) const976             u32 Shader::SearchBindSymbolNum(
977                 const BindSymbol::ShaderType shader_type,
978                 const BindSymbol::SymbolType symbol_type ) const
979             {
980                 const s32 shader_index = ( shader_type == BindSymbol::SHADER_TYPE_GEOMETRY ) ? GetGeoShaderIndex() : GetVtxShaderIndex();
981                 NN_GR_ASSERT(  0 <= shader_index && shader_index < m_ExeImageInfoNum );
982 
983                 // Program information
984                 const ExeImageInfo* exe_info = m_ExeImageInfo[ shader_index ];
985 
986                 // Output attribute information
987                 struct BindSymbolInfo { u32 nameIndex; u32 regIndex; };
988                 const BindSymbolInfo* bind_symbol_info =
989                     reinterpret_cast< const BindSymbolInfo* >(
990                         reinterpret_cast< const u8* >( exe_info ) + exe_info->bindSymbolOffset );
991 
992                 s32 num = 0;
993 
994                 // Search
995                 for ( s32 i = 0; i < exe_info->bindSymbolCount; ++i )
996                 {
997                     s32 regStart = bind_symbol_info[ i ].regIndex & 0x0000ffff;
998 
999                     if ( 120 <= regStart && regStart < 136 &&
1000                          symbol_type == BindSymbol::SYMBOL_TYPE_BOOL )
1001                     {
1002                         ++num;
1003                     }
1004                     else if ( 112 <= regStart && regStart < 115 &&
1005                               symbol_type == BindSymbol::SYMBOL_TYPE_INTEGER )
1006                     {
1007                         ++num;
1008                     }
1009                     else if ( 16 <= regStart && regStart < 111 &&
1010                               symbol_type == BindSymbol::SYMBOL_TYPE_FLOAT )
1011                     {
1012                         ++num;
1013                     }
1014                     else if ( regStart <  15 &&
1015                               symbol_type == BindSymbol::SYMBOL_TYPE_INPUT )
1016                     {
1017                         ++num;
1018                     }
1019                 }
1020 
1021                 return num;
1022             }
1023 
1024             //------------------------------------------------------------------------
1025 
SearchBindSymbol(BindSymbol * symbol,const u8 symbol_index) const1026             bool Shader::SearchBindSymbol(
1027                 BindSymbol* symbol,
1028                 const u8 symbol_index ) const
1029             {
1030                 const BindSymbol::ShaderType shaderType = symbol->shaderType;
1031 
1032                 const s32 shader_index = ( shaderType == BindSymbol::SHADER_TYPE_GEOMETRY )
1033                     ? GetGeoShaderIndex() : GetVtxShaderIndex();
1034 
1035                 NN_GR_ASSERT(  0 <= shader_index && shader_index < m_ExeImageInfoNum );
1036 
1037                 // Program information
1038                 const ExeImageInfo* exe_info = m_ExeImageInfo[ shader_index ];
1039 
1040                 // Symbol information
1041                 struct BindSymbolInfo { u32 nameIndex; u32 regIndex; };
1042                 const BindSymbolInfo* bind_symbol_info =
1043                     reinterpret_cast< const BindSymbolInfo* >(
1044                         reinterpret_cast< const u8* >( exe_info ) + exe_info->bindSymbolOffset );
1045 
1046                 // Text string information
1047                 const char* string =
1048                     reinterpret_cast< const char* >(
1049                         reinterpret_cast< const u8* >( exe_info ) + exe_info->stringOffset );
1050 
1051                 s32 index = -1;
1052 
1053                 // Search
1054                 for ( s32 i = 0; i < exe_info->bindSymbolCount; ++i )
1055                 {
1056                     s32 regStart = bind_symbol_info[ i ].regIndex & 0x0000ffff;
1057 
1058                     // Bool registers
1059                     if ( 120 <= regStart && regStart < 136 )
1060                     {
1061                         ++index;
1062                     }
1063                     // Integer registers
1064                     else if ( 112 <= regStart && regStart < 115 )
1065                     {
1066                         ++index;
1067                     }
1068                     // Constant registers
1069                     else if ( 16 <= regStart && regStart < 111 )
1070                     {
1071                         ++index;
1072                     }
1073                     // Input registers
1074                     else if ( regStart <  15 )
1075                     {
1076                         ++index;
1077                     }
1078                     else
1079                     {
1080                         return false;
1081                     }
1082 
1083                     if ( index == symbol_index )
1084                     {
1085                         const BindSymbolInfo& info = bind_symbol_info[ i ];
1086 
1087                         symbol->name  = &string[ info.nameIndex ];
1088                         symbol->start = (info.regIndex & 0x0000ffff);
1089                         symbol->end   = (info.regIndex & 0xffff0000) >> 16;
1090 
1091                         if ( 120 <= symbol->start )
1092                         {
1093                             symbol->start -= 120;
1094                             symbol->end   -= 120;
1095 
1096                             return symbol->symbolType == BindSymbol::SYMBOL_TYPE_BOOL;
1097                         }
1098                         else if ( 112 <= symbol->start )
1099                         {
1100                             symbol->start -= 112;
1101                             symbol->end -= 112;
1102 
1103                             return symbol->symbolType == BindSymbol::SYMBOL_TYPE_INTEGER;
1104                         }
1105                         else if (  16 <= symbol->start )
1106                         {
1107                             symbol->start -=  16;
1108                             symbol->end -=  16;
1109 
1110                             return symbol->symbolType == BindSymbol::SYMBOL_TYPE_FLOAT;
1111                         }
1112                         else
1113                         {
1114                             return symbol->symbolType == BindSymbol::SYMBOL_TYPE_INPUT;
1115                         }
1116                     }
1117                 }
1118 
1119                 return false;
1120             }
1121 
1122         } //namespace CTR
1123     } //namespace gr
1124 } //namespace nn
1125