1 /*---------------------------------------------------------------------------*
2   Project: matrix vector Library
3   File:    mtx.c
4 
5   Copyright 1998-2006 Nintendo.  All rights reserved.
6 
7   These coded instructions, statements, and computer programs contain
8   proprietary information of Nintendo of America Inc. and/or Nintendo
9   Company Ltd., and are protected by Federal copyright law.  They may
10   not be disclosed to third parties or copied or duplicated in any form,
11   in whole or in part, without the prior written consent of Nintendo.
12 
13 
14   $Log: mtx.c,v $
15   Revision 1.3  02/20/2006 04:25:42  mitu
16   changed include path from dolphin/ to revolution/.
17 
18   Revision 1.2  12/30/2005 06:01:51  hirose
19   Temporary workaround for CW3.0 Alpha3 problem.
20 
21 
22     30    02/06/26 13:49 Hirose
23     Fixed a register allocation problem with DEBUG build.
24 
25     29    02/06/20 11:21 Hirose
26     Added MTXConcatArray.
27 
28     28    02/04/11 13:10 Hirose
29     const type specifier support. (worked by Hiratsu@IRD)
30 
31     27    02/02/18 9:27 Hirose
32     Workaround for floating-point precision mismatch issue.
33 
34     26    9/13/01 4:28p Hirose
35     Removed unnecessary possibilities of zero division exception.
36 
37     25    8/28/01 3:48p Hirose
38     Removed one refinement process in the calculation of square root.
39 
40     24    7/30/01 10:21p Hirose
41     Changes for function definitions.
42 
43     23    7/23/01 8:45p Hirose
44     Now all model matrix functions and matrix-vector functions have PS
45     version.
46 
47     22    7/07/01 7:37p Hirose
48     added PS versions of MTXTrans* / MTXScale* functions.
49 
50     21    3/29/01 8:23p Hirose
51     fixed assertion messages
52 
53     20    3/29/01 3:06p Hirose
54     added MTXInvXpose
55 
56     19    3/16/01 11:43p Hirose
57     added PSMTXInverse
58 
59     18    2/22/01 11:58p Hirose
60     Moved some sections into other files.
61     Added some new Paired-Single functions. Etc.
62 
63     17    7/31/00 2:07p Carl
64     Removed redundant verify in MTXRotRad.
65     Minor potential optimization in VECNormalize.
66 
67     16    7/21/00 4:19p Carl
68     Removed as many divides as possible.
69 
70     15    7/21/00 2:58p Carl
71     Removed unnecessary VECNormalize in MTXLookAt.
72 
73     14    7/12/00 4:41p John
74     Substitutes MTXConcat and MTXMultVecArray with their paired-singles
75     equivalent for Gekko non-debug builds.
76 
77     13    7/07/00 7:09p Dante
78     PC Compatibility
79 
80     12    5/10/00 1:50p Hirose
81     added radian-based rotation matrix functions
82     moved paired-single matrix stuff into an another source file
83 
84     11    4/13/00 11:26a Danm
85     Restore high performance version of ROMulVecArray.
86 
87     10    4/10/00 11:56a Danm
88     Added 2 matrix skinning support.
89 
90     9     3/27/00 6:38p Carl
91     Fixed other comments.
92 
93     8     3/27/00 6:20p Carl
94     Changed comment regarding z scale in MTXFrustum.
95 
96     7     3/22/00 2:01p John
97     Added VECSquareDistance and VECDistance.
98 
99     6     2/14/00 2:04p Mikepc
100     de-tabbed code.
101 
102     5     2/11/00 4:22p Tian
103     Cleaned up reordered paired single code
104 
105     4     1/27/00 5:21p Tian
106     Documented paired single ops.
107 
108     3     1/27/00 4:56p Tian
109     Implemented fast paired single matrix ops : PSMTXReorder, PSMTXConcat,
110     PSMTXROMultVecArray, PSMTXMultVecArray, PSMTXMultS16VecArray,
111     PSMTXROMultS16VecArray
112 
113     2     12/06/99 12:40p Alligator
114     modify proj mtx to scale/translate z to (0, w) range for hardware
115 
116     20    11/17/99 4:32p Ryan
117     update to fin win32 compiler warnings
118 
119     19    11/12/99 12:08p Mikepc
120     changed #include<dolphin/mtx/mtxAssert.h> to #include"mtxAssert.h" to
121     reflect new location of mtxAssert.h file.
122 
123     18    11/10/99 7:21p Hirose
124     added singular case check for VECHalfAngle
125 
126     17    11/10/99 4:40p Alligator
127     added MTXReflect
128 
129     16    10/06/99 12:28p Yasu
130     Appended MTXMultVecSR and MTXMultVecArraySR
131 
132     15    8/31/99 5:41p Shiki
133     Revised to use sqrtf() instead of sqrt().
134 
135     14      8/31/99 5:18p Shiki
136     Revised to use sinf()/cosf() instead of sin()/cos().
137 
138     13      7/28/99 3:38p Ryan
139 
140     12      7/28/99 11:30a Ryan
141     Added Texture Projection functions
142 
143     11      7/02/99 12:53p Mikepc
144     changed mtx row/col
145 
146   $NoKeywords: $
147  *---------------------------------------------------------------------------*/
148 #ifdef WIN32
149 #include <win32/win32.h>
150 #endif
151 
152 #include <math.h>
153 #include <revolution/mtx.h>
154 #include "mtxAssert.h"
155 
156 #if ( __MWERKS__ == 0x00004100 )
157 #pragma defer_codegen on    // This will be removed when compiler is fixed.
158 #endif
159 
160 /*---------------------------------------------------------------------*
161     Constants
162  *---------------------------------------------------------------------*/
163 static f32 Unit01[] = { 0.0f, 1.0f };
164 
165 
166 /*---------------------------------------------------------------------*
167 
168 
169 
170 
171                             GENERAL   SECTION
172 
173 
174 
175 
176 *---------------------------------------------------------------------*/
177 
178 
179 /*---------------------------------------------------------------------*
180 
181 Name:           MTXIdentity
182 
183 Description:    sets a matrix to identity
184 
185 Arguments:      m :  matrix to be set
186 
187 Return   :         none
188 
189 *---------------------------------------------------------------------*/
190 /*---------------------------------------------------------------------*
191     C version
192  *---------------------------------------------------------------------*/
C_MTXIdentity(Mtx m)193 void C_MTXIdentity ( Mtx m )
194 {
195 
196     ASSERTMSG( (m != 0), MTX_IDENTITY_1 );
197 
198 
199     m[0][0] = 1.0f;     m[0][1] = 0.0f;  m[0][2] = 0.0f;  m[0][3] = 0.0f;
200 
201     m[1][0] = 0.0f;     m[1][1] = 1.0f;  m[1][2] = 0.0f;  m[1][3] = 0.0f;
202 
203     m[2][0] = 0.0f;     m[2][1] = 0.0f;  m[2][2] = 1.0f;  m[2][3] = 0.0f;
204 
205 }
206 
207 /*---------------------------------------------------------------------*
208     Paired-Single assembler version
209  *---------------------------------------------------------------------*
210             Note that this performs NO error checking.
211             Actually there is not much performance advantage.
212  *---------------------------------------------------------------------*/
213 #ifdef GEKKO
PSMTXIdentity(register Mtx m)214 void PSMTXIdentity
215 (
216     register Mtx m    // r3
217 )
218 {
219     register f32 c_zero = 0.0F; // { 0.0F, 0.0F };
220     register f32 c_one  = 1.0F; // { 1.0.0F, 1.0.0F };
221     register f32 c_01;
222     register f32 c_10;
223 
224     asm
225     {
226         psq_st      c_zero, 8(m),   0, 0     // m[0][2], m[0][3]
227         ps_merge01  c_01, c_zero, c_one      // { 0.1F, 1.0F }
228         psq_st      c_zero, 24(m),  0, 0     // m[1][2], m[1][3]
229         ps_merge10  c_10, c_one, c_zero      // fp2 = { 1.0F, 0.0F }
230         psq_st      c_zero, 32(m),  0, 0     // m[2][0], m[2][1]
231         psq_st      c_01,   16(m),  0, 0     // m[1][0], m[1][1]
232         psq_st      c_10,   0(m),   0, 0     // m[0][0], m[0][1]
233         psq_st      c_10,   40(m),  0, 0     // m[2][2], m[2][3]
234     }
235 }
236 #endif // GEKKO
237 
238 /*---------------------------------------------------------------------*
239 
240 Name:           MTXCopy
241 
242 Description:    copies the contents of one matrix into another
243 
244 Arguments:      src        source matrix for copy
245                 dst        destination matrix for copy
246 
247 
248 Return   :         none
249 
250 *---------------------------------------------------------------------*/
251 /*---------------------------------------------------------------------*
252     C version
253  *---------------------------------------------------------------------*/
C_MTXCopy(const Mtx src,Mtx dst)254 void C_MTXCopy ( const Mtx src, Mtx dst )
255 {
256 
257     ASSERTMSG( (src != 0) , MTX_COPY_1 );
258     ASSERTMSG( (dst != 0) , MTX_COPY_2 );
259 
260     if( src == dst )
261     {
262         return;
263     }
264 
265 
266     dst[0][0] = src[0][0];    dst[0][1] = src[0][1];    dst[0][2] = src[0][2];    dst[0][3] = src[0][3];
267 
268     dst[1][0] = src[1][0];    dst[1][1] = src[1][1];    dst[1][2] = src[1][2];    dst[1][3] = src[1][3];
269 
270     dst[2][0] = src[2][0];    dst[2][1] = src[2][1];    dst[2][2] = src[2][2];    dst[2][3] = src[2][3];
271 
272 }
273 
274 /*---------------------------------------------------------------------*
275     Paired-Single assembler version
276  *---------------------------------------------------------------------*
277                 Note that this performs NO error checking.
278  *---------------------------------------------------------------------*/
279 #ifdef GEKKO
PSMTXCopy(const register Mtx src,register Mtx dst)280 asm void PSMTXCopy
281 (
282     const register Mtx src,   // r3
283     register Mtx dst    // r4
284 )
285 {
286     nofralloc
287 
288     psq_l       fp0, 0(src),   0, 0
289     psq_st      fp0, 0(dst),   0, 0
290     psq_l       fp1, 8(src),   0, 0
291     psq_st      fp1, 8(dst),   0, 0
292     psq_l       fp2, 16(src),  0, 0
293     psq_st      fp2, 16(dst),  0, 0
294     psq_l       fp3, 24(src),  0, 0
295     psq_st      fp3, 24(dst),  0, 0
296     psq_l       fp4, 32(src),  0, 0
297     psq_st      fp4, 32(dst),  0, 0
298     psq_l       fp5, 40(src),  0, 0
299     psq_st      fp5, 40(dst),  0, 0
300 
301     blr
302 }
303 #endif // GEKKO
304 
305 /*---------------------------------------------------------------------*
306 
307 Name:           MTXConcat
308 
309 Description:    concatenates two matrices.
310                 order of operation is A x B = AB.
311                 ok for any of ab == a == b.
312 
313                 saves a MTXCopy operation if ab != to a or b.
314 
315 Arguments:      a        first matrix for concat.
316                 b        second matrix for concat.
317                 ab       resultant matrix from concat.
318 
319 Return   :         none
320 
321  *---------------------------------------------------------------------*/
322 
323 /*---------------------------------------------------------------------*
324     C version
325  *---------------------------------------------------------------------*/
C_MTXConcat(const Mtx a,const Mtx b,Mtx ab)326 void C_MTXConcat ( const Mtx a, const Mtx b, Mtx ab )
327 {
328     Mtx mTmp;
329     MtxPtr m;
330 
331     ASSERTMSG( (a  != 0), MTX_CONCAT_1 );
332     ASSERTMSG( (b  != 0), MTX_CONCAT_2 );
333     ASSERTMSG( (ab != 0), MTX_CONCAT_3 );
334 
335     if( (ab == a) || (ab == b) )
336     {
337         m = mTmp;
338     }
339 
340     else
341     {
342         m = ab;
343     }
344 
345     // compute (a x b) -> m
346 
347     m[0][0] = a[0][0]*b[0][0] + a[0][1]*b[1][0] + a[0][2]*b[2][0];
348     m[0][1] = a[0][0]*b[0][1] + a[0][1]*b[1][1] + a[0][2]*b[2][1];
349     m[0][2] = a[0][0]*b[0][2] + a[0][1]*b[1][2] + a[0][2]*b[2][2];
350     m[0][3] = a[0][0]*b[0][3] + a[0][1]*b[1][3] + a[0][2]*b[2][3] + a[0][3];
351 
352     m[1][0] = a[1][0]*b[0][0] + a[1][1]*b[1][0] + a[1][2]*b[2][0];
353     m[1][1] = a[1][0]*b[0][1] + a[1][1]*b[1][1] + a[1][2]*b[2][1];
354     m[1][2] = a[1][0]*b[0][2] + a[1][1]*b[1][2] + a[1][2]*b[2][2];
355     m[1][3] = a[1][0]*b[0][3] + a[1][1]*b[1][3] + a[1][2]*b[2][3] + a[1][3];
356 
357     m[2][0] = a[2][0]*b[0][0] + a[2][1]*b[1][0] + a[2][2]*b[2][0];
358     m[2][1] = a[2][0]*b[0][1] + a[2][1]*b[1][1] + a[2][2]*b[2][1];
359     m[2][2] = a[2][0]*b[0][2] + a[2][1]*b[1][2] + a[2][2]*b[2][2];
360     m[2][3] = a[2][0]*b[0][3] + a[2][1]*b[1][3] + a[2][2]*b[2][3] + a[2][3];
361 
362 
363     // overwrite a or b if needed
364     if(m == mTmp)
365     {
366         C_MTXCopy( mTmp, ab );
367     }
368 }
369 
370 /*---------------------------------------------------------------------*
371     Paired-Single assembler version
372  *---------------------------------------------------------------------*
373                 Note that this performs NO error checking.
374  *---------------------------------------------------------------------*/
375 #ifdef GEKKO
PSMTXConcat(const register Mtx mA,const register Mtx mB,register Mtx mAB)376 asm void PSMTXConcat
377 (
378     const register Mtx mA,    // r3
379     const register Mtx mB,    // r4
380           register Mtx mAB    // r5
381 )
382 {
383     nofralloc
384 
385 #define A00_A01 fp0
386 #define A02_A03 fp1
387 #define A10_A11 fp2
388 #define A12_A13 fp3
389 #define A20_A21 fp4
390 #define A22_A23 fp5
391 
392 #define B00_B01 fp6
393 #define B02_B03 fp7
394 #define B10_B11 fp8
395 #define B12_B13 fp9
396 #define B20_B21 fp10
397 #define B22_B23 fp11
398 
399 #define D00_D01 fp12
400 #define D02_D03 fp13
401 #define D10_D11 fp14
402 #define D12_D13 fp15
403 #define D20_D21 fp2
404 #define D22_D23 fp0
405 
406 #define UNIT01  fp31
407 
408     // don't save LR since we don't make any function calls
409     //    mflr    r0
410     //    stw     r0, 4(r1)
411     stwu    r1, -64(r1)
412     psq_l   A00_A01, 0(mA), 0, 0
413     stfd    fp14, 8(r1)
414     psq_l   B00_B01, 0(mB), 0, 0
415     addis   r6, 0, Unit01@ha
416     psq_l   B02_B03, 8(mB), 0, 0
417     stfd    fp15, 16(r1)
418     addi    r6, r6, Unit01@l
419     stfd    fp31, 40(r1)
420     psq_l   B10_B11, 16(mB), 0, 0
421     // D00_D01 = b00a00 , b01a00
422     ps_muls0 D00_D01, B00_B01, A00_A01
423     psq_l   A10_A11, 16(mA), 0, 0
424     // D02_D03 = b02a00 , b03a00
425     ps_muls0 D02_D03, B02_B03, A00_A01
426     psq_l   UNIT01, 0(r6), 0, 0
427     // D10_D11 = a10b00 , a10b01
428     ps_muls0 D10_D11, B00_B01, A10_A11
429     psq_l   B12_B13, 24(mB), 0, 0
430     // D12_D13 = a10b02 , a10b03
431     ps_muls0 D12_D13, B02_B03, A10_A11
432     psq_l   A02_A03, 8(mA), 0, 0
433         // fp12 = b10a01 + b00a00 , b11a01 + b01a00
434         ps_madds1 D00_D01, B10_B11, A00_A01, D00_D01
435     psq_l   A12_A13, 24(mA), 0, 0
436         // D10_D11 = a10b00 + a11b10 , a10b01 + a11b11
437         ps_madds1 D10_D11, B10_B11, A10_A11, D10_D11
438     psq_l   B20_B21, 32(mB), 0, 0
439         // D02_D03 = b12a01 + b02a00 , b13a01 + b03a00
440         ps_madds1 D02_D03, B12_B13, A00_A01, D02_D03  // YYY LAST TIME FP0 IS USED
441     psq_l   B22_B23, 40(mB), 0, 0
442         // D12_D13 = a10b02 + a11b12, a10b03+a11b13
443         ps_madds1 D12_D13, B12_B13, A10_A11, D12_D13 // YYY LAST TIME FP2 IS USED
444     psq_l   A20_A21, 32(mA), 0, 0
445     psq_l   A22_A23, 40(mA), 0, 0
446             // D00_D01 = b20a02 + b10a01 + b00a00 , b21a02 + b11a01 + b01a00
447             ps_madds0 D00_D01, B20_B21, A02_A03, D00_D01 // m00, m01 computed
448             // D02_D03 = b12a01 + b02a00 + b22a02 , b13a01 + b03a00 + b23a02
449             ps_madds0 D02_D03, B22_B23, A02_A03, D02_D03
450             // D10_D11 = a10b00 + a11b10 +a12b20, a10b01 + a11b11 + a12b21
451             ps_madds0 D10_D11, B20_B21, A12_A13, D10_D11 // m10, m11 computed
452             // D12_D13 = a10b02 + a11b12 + a12b22, a10b03+a11b13 + a12b23 + a13
453             ps_madds0 D12_D13, B22_B23, A12_A13, D12_D13
454 
455 
456     // store m00m01
457     psq_st  D00_D01, 0(mAB), 0, 0 // YYY LAST TIME FP12 IS USED
458 
459     // D20_D21 = a20b00, a20b01
460     ps_muls0 D20_D21, B00_B01, A20_A21 // YYY LAST TIME FP6 IS USED
461                 // get a03 from fp1 and add to D02_D03
462                 ps_madds1 D02_D03, UNIT01, A02_A03, D02_D03 // m02, m03 computed
463                 // YYY LAST TIME FP1 IS USED
464     // D22_D23 = a20b02, a20b03
465     ps_muls0 D22_D23, B02_B03, A20_A21 // YYY LAST TIME FP7 IS USED
466     // store m10m11
467     psq_st  D10_D11, 16(mAB), 0, 0
468                 // get a13 from fp3 and add to D12_D13
469                 ps_madds1 D12_D13, UNIT01, A12_A13, D12_D13 // m12, m13 computed
470     // store m02m03
471     psq_st  D02_D03, 8(mAB), 0, 0 // YYY LAST TIME D02_D03 IS USED
472 
473         // D20_D21 = a20b00 + a21b10, a20b01 + a21b11
474         ps_madds1 D20_D21, B10_B11, A20_A21, D20_D21 // YYY LAST TIME FP8 IS USED
475         // D22_D23 = a20b02 + a21b12, a20b03 + a21b13
476         ps_madds1 D22_D23, B12_B13, A20_A21, D22_D23
477             // D20_D21 = a20b00 + a21b10 + a22b20, a20b01 + a21b11 + a22b21
478             ps_madds0 D20_D21, B20_B21, A22_A23, D20_D21
479     // Restore fp14
480     lfd    fp14, 8(r1)  // D10_D11
481     // store m12m13
482     psq_st  D12_D13, 24(mAB), 0, 0
483             // D22_D23 = a20b02 + a21b12 + a22b22, a20b03 + a21b13 + a22b23 + a23
484             ps_madds0 D22_D23, B22_B23, A22_A23, D22_D23
485     // store m20m21
486     psq_st  D20_D21, 32(mAB), 0, 0
487                 // get a23 from fp5 and add to fp17
488                 ps_madds1 D22_D23, UNIT01, A22_A23, D22_D23
489     // restore stack frame
490     lfd    fp15, 16(r1) // D12_D13
491     // store m22m23
492     psq_st  D22_D23, 40(mAB), 0, 0
493 
494     lfd    fp31, 40(r1)
495     addi   r1, r1, 64
496 
497 
498     blr
499 
500 
501 #undef A00_A01
502 #undef A02_A03
503 #undef A10_A11
504 #undef A12_A13
505 #undef A20_A21
506 #undef A22_A23
507 
508 #undef B00_B01
509 #undef B02_B03
510 #undef B10_B11
511 #undef B12_B13
512 #undef B20_B21
513 #undef B22_B23
514 
515 #undef D00_D01
516 #undef D02_D03
517 #undef D10_D11
518 #undef D12_D13
519 #undef D20_D21
520 #undef D22_D23
521 
522 #undef UNIT01
523 
524 
525 /*
526   / m[0][0] = a[0][0]*b[0][0] + a[0][1]*b[1][0] + a[0][2]*b[2][0];
527 fp12m[0][1] = a[0][0]*b[0][1] + a[0][1]*b[1][1] + a[0][2]*b[2][1];
528   / m[0][2] = a[0][0]*b[0][2] + a[0][1]*b[1][2] + a[0][2]*b[2][2];
529 fp13m[0][3] = a[0][0]*b[0][3] + a[0][1]*b[1][3] + a[0][2]*b[2][3] + a[0][3];
530 
531   / m[1][0] = a[1][0]*b[0][0] + a[1][1]*b[1][0] + a[1][2]*b[2][0];
532 fp14m[1][1] = a[1][0]*b[0][1] + a[1][1]*b[1][1] + a[1][2]*b[2][1];
533   / m[1][2] = a[1][0]*b[0][2] + a[1][1]*b[1][2] + a[1][2]*b[2][2];
534 fp15m[1][3] = a[1][0]*b[0][3] + a[1][1]*b[1][3] + a[1][2]*b[2][3] + a[1][3];
535 
536   / m[2][0] = a[2][0]*b[0][0] + a[2][1]*b[1][0] + a[2][2]*b[2][0];
537 fp16m[2][1] = a[2][0]*b[0][1] + a[2][1]*b[1][1] + a[2][2]*b[2][1];
538   / m[2][2] = a[2][0]*b[0][2] + a[2][1]*b[1][2] + a[2][2]*b[2][2];
539 fp17m[2][3] = a[2][0]*b[0][3] + a[2][1]*b[1][3] + a[2][2]*b[2][3] + a[2][3];
540 OPTIMIZATIONS :
541   fp0 instead of fp17
542   fp2 instead of fp16
543     psq_l   fp0, 0(mA), 0, 0    // a[0][0], a[0][1]
544     psq_l   fp1, 8(mA), 0, 0    // a[0][2], a[0][3]
545     psq_l   fp2, 16(mA), 0, 0   // a[1][0], a[1][1]
546     psq_l   fp3, 24(mA), 0, 0   // a[1][2], a[1][3]
547     psq_l   fp4, 32(mA), 0, 0   // a[2][0], a[2][1]
548     psq_l   fp5, 40(mA), 0, 0   // a[2][2], a[2][3]
549 
550     psq_l   fp6, 0(mB), 0, 0    // b[0][0], b[0][1]
551     psq_l   fp7, 8(mB), 0, 0    // b[0][2], b[0][3]
552     psq_l   fp8, 16(mB), 0, 0   // b[1][0], b[1][1]
553     psq_l   fp9, 24(mB), 0, 0   // b[1][2], b[1][3]
554     psq_l   fp10, 32(mB), 0, 0   // b[2][0], b[2][1]
555     psq_l   fp11, 40(mB), 0, 0   // b[2][2], b[2][3]
556 
557 */
558 }
559 #endif // GEKKO
560 
561 
562 /*---------------------------------------------------------------------*
563 
564 Name:           MTXConcatArray
565 
566 Description:    concatenates a matrix to an array of matrices.
567                 order of operation is A x B(array) = AB(array).
568 
569 Arguments:      a        first matrix for concat.
570                 srcBase  array base of second matrix for concat.
571                 dstBase  array base of resultant matrix from concat.
572                 count    number of matrices in srcBase, dstBase arrays.
573 
574                 note:      cannot check for array overflow
575 
576 Return   :         none
577 
578  *---------------------------------------------------------------------*/
579 
580 /*---------------------------------------------------------------------*
581     C version
582  *---------------------------------------------------------------------*/
C_MTXConcatArray(const Mtx a,const Mtx * srcBase,Mtx * dstBase,u32 count)583 void C_MTXConcatArray ( const Mtx a, const Mtx* srcBase, Mtx* dstBase, u32 count )
584 {
585     u32 i;
586 
587     ASSERTMSG( (a       != 0), "MTXConcatArray(): NULL MtxPtr 'a' " );
588     ASSERTMSG( (srcBase != 0), "MTXConcatArray(): NULL MtxPtr 'srcBase' " );
589     ASSERTMSG( (dstBase != 0), "MTXConcatArray(): NULL MtxPtr 'dstBase' " );
590     ASSERTMSG( (count > 1),    "MTXConcatArray(): count must be greater than 1." );
591 
592     for ( i = 0 ; i < count ; i++ )
593     {
594         C_MTXConcat(a, *srcBase, *dstBase);
595 
596         srcBase++;
597         dstBase++;
598     }
599 }
600 
601 /*---------------------------------------------------------------------*
602     Paired-Single assembler version
603  *---------------------------------------------------------------------*
604                 Note that this performs NO error checking.
605  *---------------------------------------------------------------------*/
606 #ifdef GEKKO
607 
608 // This code can be compiled only with register allocation optimization.
609 #if ( defined(__MWERKS__) && defined(_DEBUG) )
610 #pragma global_optimizer    on
611 #pragma optimization_level  1
612 #endif
613 
PSMTXConcatArray(const register Mtx a,const register Mtx * srcBase,register Mtx * dstBase,register u32 count)614 void PSMTXConcatArray (
615     const register Mtx  a,
616     const register Mtx* srcBase,
617           register Mtx* dstBase,
618           register u32  count )
619 {
620     register f32    va0, va1, va2, va3, va4, va5;
621     register f32    vb0, vb1, vb2, vb3, vb4, vb5;
622     register f32    vd0, vd1, vd2, vd3, vd4, vd5;
623     register f32    u01;
624     register f32*   u01Ptr = Unit01;
625 
626     asm
627     {
628         // [a00][a01]
629         psq_l       va0, 0(a), 0, 0
630         // [a02][a03]
631         psq_l       va1, 8(a), 0, 0
632         // [a10][a11]
633         psq_l       va2, 16(a), 0, 0
634         // [a12][a13]
635         psq_l       va3, 24(a), 0, 0
636         // count--
637         subi        count, count, 1
638         // [a20][a21]
639         psq_l       va4, 32(a), 0, 0
640         // [a22][a23]
641         psq_l       va5, 40(a), 0, 0
642         // Loop count
643         mtctr       count
644         // [0][1]
645         psq_l       u01, 0(u01Ptr), 0, 0
646 
647         //---------------------------------
648         // [b00][b01]
649         psq_l       vb0, 0(srcBase), 0, 0
650         // [b10][b11]
651         psq_l       vb2, 16(srcBase), 0, 0
652 
653         // [a00*b00][a00*b01]
654         ps_muls0    vd0, vb0, va0
655         // [a10*b00][a10*b01]
656         ps_muls0    vd2, vb0, va2
657         // [a20*b00][a20*b01]
658         ps_muls0    vd4, vb0, va4
659 
660         // [b20][b21]
661         psq_l       vb4, 32(srcBase), 0, 0
662 
663         // [a00*b00 + a01*b10][a00*b01 + a01*b11]
664         ps_madds1   vd0, vb2, va0, vd0
665         // [a10*b00 + a11*b10][a10*b01 + a11*b11]
666         ps_madds1   vd2, vb2, va2, vd2
667         // [a20*b00 + a21*b10][a20*b01 + a21*b11]
668         ps_madds1   vd4, vb2, va4, vd4
669 
670         // [b02][b03]
671         psq_l       vb1, 8(srcBase), 0, 0
672 
673         // [a00*b00 + a01*b10 + a02*b20][a00*b01 + a01*b11 + a02*b21]
674         ps_madds0   vd0, vb4, va1, vd0
675         // [a10*b00 + a11*b10 + a12*b20][a10*b01 + a11*b11 + a12*b21]
676         ps_madds0   vd2, vb4, va3, vd2
677         // [a20*b00 + a21*b10 + a22*b20][a20*b01 + a21*b11 + a22*b21]
678         ps_madds0   vd4, vb4, va5, vd4
679 
680         // [b12][b13]
681         psq_l       vb3, 24(srcBase), 0, 0
682         // [a00*b00 + a01*b10 + a02*b20][a00*b01 + a01*b11 + a02*b21]
683         psq_st      vd0, 0(dstBase), 0, 0
684 
685         // [a00*b02][a00*b03]
686         ps_muls0    vd1, vb1, va0
687         // [a10*b02][a10*b03]
688         ps_muls0    vd3, vb1, va2
689         // [a20*b02][a20*b03]
690         ps_muls0    vd5, vb1, va4
691 
692         // [b22][b23]
693         psq_l       vb5, 40(srcBase), 0, 0
694         // [a10*b00 + a11*b10 + a12*b20][a10*b01 + a11*b11 + a12*b21]
695         psq_st      vd2, 16(dstBase), 0, 0
696 
697         // [a00*b02 + a01*b12][a00*b03 + a01*b13]
698         ps_madds1   vd1, vb3, va0, vd1
699         // [a10*b02 + a11*b12][a10*b03 + a11*b13]
700         ps_madds1   vd3, vb3, va2, vd3
701         // [a20*b02 + a21*b12][a20*b03 + a21*b13]
702         ps_madds1   vd5, vb3, va4, vd5
703 
704     _loop:
705 
706         // ++srcBase
707         addi        srcBase, srcBase, sizeof(Mtx)
708 
709         // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23]
710         ps_madds0   vd1, vb5, va1, vd1
711         // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23]
712         ps_madds0   vd3, vb5, va3, vd3
713         // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23]
714         ps_madds0   vd5, vb5, va5, vd5
715 
716             // [b00][b01]
717             psq_l       vb0, 0(srcBase), 0, 0
718         // [a20*b00 + a21*b10 + a22*b20][a20*b01 + a21*b11 + a22*b21]
719         psq_st      vd4, 32(dstBase), 0, 0
720 
721         // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23 + a03]
722         ps_madd     vd1, u01, va1, vd1
723         // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23 + a13]
724         ps_madd     vd3, u01, va3, vd3
725         // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23 + a23]
726         ps_madd     vd5, u01, va5, vd5
727 
728             // [b10][b11]
729             psq_l       vb2, 16(srcBase), 0, 0
730         // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23 + a03]
731         psq_st      vd1, 8(dstBase), 0, 0
732 
733             // [a00*b00][a00*b01]
734             ps_muls0    vd0, vb0, va0
735             // [a10*b00][a10*b01]
736             ps_muls0    vd2, vb0, va2
737             // [a20*b00][a20*b01]
738             ps_muls0    vd4, vb0, va4
739 
740             // [b20][b21]
741             psq_l       vb4, 32(srcBase), 0, 0
742         // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23 + a13]
743         psq_st      vd3, 24(dstBase), 0, 0
744 
745             // [a00*b00 + a01*b10][a00*b01 + a01*b11]
746             ps_madds1   vd0, vb2, va0, vd0
747             // [a10*b00 + a11*b10][a10*b01 + a11*b11]
748             ps_madds1   vd2, vb2, va2, vd2
749             // [a20*b00 + a21*b10][a20*b01 + a21*b11]
750             ps_madds1   vd4, vb2, va4, vd4
751 
752             // [b02][b03]
753             psq_l       vb1, 8(srcBase), 0, 0
754         // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23 + a23]
755         psq_st      vd5, 40(dstBase), 0, 0
756         // ++dstBase
757         addi        dstBase, dstBase, sizeof(Mtx)
758 
759             // [a00*b00 + a01*b10 + a02*b20][a00*b01 + a01*b11 + a02*b21]
760             ps_madds0   vd0, vb4, va1, vd0
761             // [a10*b00 + a11*b10 + a12*b20][a10*b01 + a11*b11 + a12*b21]
762             ps_madds0   vd2, vb4, va3, vd2
763             // [a20*b00 + a21*b10 + a22*b20][a20*b01 + a21*b11 + a22*b21]
764             ps_madds0   vd4, vb4, va5, vd4
765 
766             // [b12][b13]
767             psq_l       vb3, 24(srcBase), 0, 0
768             // [a00*b00 + a01*b10 + a02*b20][a00*b01 + a01*b11 + a02*b21]
769             psq_st      vd0, 0(dstBase), 0, 0
770 
771             // [a00*b02][a00*b03]
772             ps_muls0    vd1, vb1, va0
773             // [a10*b02][a10*b03]
774             ps_muls0    vd3, vb1, va2
775             // [a20*b02][a20*b03]
776             ps_muls0    vd5, vb1, va4
777 
778             // [b22][b23]
779             psq_l       vb5, 40(srcBase), 0, 0
780             // [a10*b00 + a11*b10 + a12*b20][a10*b01 + a11*b11 + a12*b21]
781             psq_st      vd2, 16(dstBase), 0, 0
782 
783             // [a00*b02 + a01*b12][a00*b03 + a01*b13]
784             ps_madds1   vd1, vb3, va0, vd1
785             // [a10*b02 + a11*b12][a10*b03 + a11*b13]
786             ps_madds1   vd3, vb3, va2, vd3
787             // [a20*b02 + a21*b12][a20*b03 + a21*b13]
788             ps_madds1   vd5, vb3, va4, vd5
789 
790         // LOOP
791         bdnz        _loop
792 
793             // [a20*b00 + a21*b10 + a22*b20][a20*b01 + a21*b11 + a22*b21]
794             psq_st      vd4, 32(dstBase), 0, 0
795 
796             // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23]
797             ps_madds0   vd1, vb5, va1, vd1
798             // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23]
799             ps_madds0   vd3, vb5, va3, vd3
800             // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23]
801             ps_madds0   vd5, vb5, va5, vd5
802 
803             // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23 + a03]
804             ps_madd     vd1, u01, va1, vd1
805             // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23 + a13]
806             ps_madd     vd3, u01, va3, vd3
807             // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23 + a23]
808             ps_madd     vd5, u01, va5, vd5
809 
810             // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23 + a03]
811             psq_st      vd1, 8(dstBase), 0, 0
812             // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23 + a13]
813             psq_st      vd3, 24(dstBase), 0, 0
814             // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23 + a23]
815             psq_st      vd5, 40(dstBase), 0, 0
816             //---------------------------------
817     }
818 }
819 
820 #if ( defined(__MWERKS__) && defined(_DEBUG) )
821 #pragma optimization_level  0
822 #pragma global_optimizer    reset
823 #endif
824 
825 #endif
826 
827 /*---------------------------------------------------------------------*
828 
829 Name:           MTXTranspose
830 
831 Description:    computes the transpose of a matrix.
832                 As matrices are 3x4, fourth column (translation component) is
833                 lost and becomes (0,0,0).
834 
835                 This function is intended for use in computing an
836                 inverse-transpose matrix to transform normals for lighting.
837                 In this case, lost translation component doesn't matter.
838 
839 
840 Arguments:      src       source matrix.
841                 xPose     destination (transposed) matrix.
842                           ok if src == xPose.
843 
844 
845 
846 Return   :         none
847 
848 *---------------------------------------------------------------------*/
849 /*---------------------------------------------------------------------*
850     C version
851  *---------------------------------------------------------------------*/
C_MTXTranspose(const Mtx src,Mtx xPose)852 void C_MTXTranspose ( const Mtx src, Mtx xPose )
853 {
854     Mtx mTmp;
855     MtxPtr m;
856 
857 
858     ASSERTMSG( (src   != 0), MTX_TRANSPOSE_1  );
859     ASSERTMSG( (xPose != 0), MTX_TRANSPOSE_2  );
860 
861 
862     if(src == xPose)
863     {
864         m = mTmp;
865     }
866     else
867     {
868         m = xPose;
869     }
870 
871 
872     m[0][0] = src[0][0];   m[0][1] = src[1][0];      m[0][2] = src[2][0];     m[0][3] = 0.0f;
873     m[1][0] = src[0][1];   m[1][1] = src[1][1];      m[1][2] = src[2][1];     m[1][3] = 0.0f;
874     m[2][0] = src[0][2];   m[2][1] = src[1][2];      m[2][2] = src[2][2];     m[2][3] = 0.0f;
875 
876 
877     // copy back if needed
878     if( m == mTmp )
879     {
880         C_MTXCopy( mTmp, xPose );
881     }
882 }
883 
884 /*---------------------------------------------------------------------*
885     Paired-Single assembler version
886  *---------------------------------------------------------------------*
887                 Note that this performs NO error checking.
888  *---------------------------------------------------------------------*/
889 #ifdef GEKKO
PSMTXTranspose(const register Mtx src,register Mtx xPose)890 void PSMTXTranspose ( const register Mtx src, register Mtx xPose )
891 {
892     register f32 c_zero = 0.0F;
893     register f32 row0a, row1a, row0b, row1b;
894     register f32 trns0, trns1, trns2;
895 
896     asm
897     {
898         psq_l       row0a, 0(src),  0, 0    // [0][0], [0][1]
899         stfs        c_zero, 44(xPose)       // 0 -> [2][3]
900         psq_l       row1a, 16(src), 0, 0    // [1][0], [1][1]
901         ps_merge00  trns0, row0a, row1a     // [0][0], [1][0]
902         psq_l       row0b, 8(src),  1, 0    // [0][2], 1
903         ps_merge11  trns1, row0a, row1a     // [0][1], [1][1]
904         psq_l       row1b, 24(src), 1, 0    // [1][2], 1
905         psq_st      trns0, 0(xPose),  0, 0  // [0][0], [1][0] -> [0][0], [0][1]
906         psq_l       row0a, 32(src), 0, 0    // [2][0], [2][1]
907         ps_merge00  trns2, row0b, row1b     // [0][2], [1][2]
908         psq_st      trns1, 16(xPose), 0, 0  // [0][1], [1][1] -> [1][0], [1][1]
909         ps_merge00  trns0, row0a, c_zero    // [2][0], 0
910         psq_st      trns2, 32(xPose), 0, 0  // [0][2], [1][2] -> [2][0], [2][1]
911         ps_merge10  trns1, row0a, c_zero    // [2][1], 0
912         psq_st      trns0, 8(xPose),  0, 0  // [2][0], 0 -> [0][2], [0][3]
913         lfs         row0b, 40(src)          // [2][2]
914         psq_st      trns1, 24(xPose), 0, 0  // [2][1], 0 -> [1][2], [1][3]
915         stfs        row0b, 40(xPose)        // [2][2] -> [2][2]
916     }
917 }
918 #endif // GEKKO
919 
920 /*---------------------------------------------------------------------*
921 
922 Name:           MTXInverse
923 
924 Description:    computes a fast inverse of a matrix.
925                 this algorithm works for matrices with a fourth row of
926                 (0,0,0,1).
927 
928                 for a matrix
929                 M =  |     A      C      |  where A is the upper 3x3 submatrix,
930                      |     0      1      |        C is a 1x3 column vector
931 
932 
933                 INV(M)     =    |  inv(A)      (inv(A))*(-C)    |
934                                 |     0               1         |
935 
936 
937 
938 Arguments:      src       source matrix.
939                 inv       destination (inverse) matrix.
940                           ok if src == inv.
941 
942 
943 
944 Return   :         0 if src is not invertible.
945                 1 on success.
946 
947 *---------------------------------------------------------------------*/
948 /*---------------------------------------------------------------------*
949     C version
950  *---------------------------------------------------------------------*/
C_MTXInverse(const Mtx src,Mtx inv)951 u32 C_MTXInverse ( const Mtx src, Mtx inv )
952 {
953     Mtx mTmp;
954     MtxPtr m;
955     f32 det;
956 
957     ASSERTMSG( (src != 0), MTX_INVERSE_1 );
958     ASSERTMSG( (inv != 0), MTX_INVERSE_2 );
959 
960     if( src == inv )
961     {
962         m = mTmp;
963     }
964     else
965     {
966         m = inv;
967     }
968 
969 
970     // compute the determinant of the upper 3x3 submatrix
971     det =   src[0][0]*src[1][1]*src[2][2] + src[0][1]*src[1][2]*src[2][0] + src[0][2]*src[1][0]*src[2][1]
972           - src[2][0]*src[1][1]*src[0][2] - src[1][0]*src[0][1]*src[2][2] - src[0][0]*src[2][1]*src[1][2];
973 
974 
975     // check if matrix is singular
976     if( det == 0.0f )
977     {
978         return 0;
979     }
980 
981 
982     // compute the inverse of the upper submatrix:
983 
984     // find the transposed matrix of cofactors of the upper submatrix
985     // and multiply by (1/det)
986 
987     det = 1.0f / det;
988 
989 
990     m[0][0] =  (src[1][1]*src[2][2] - src[2][1]*src[1][2]) * det;
991     m[0][1] = -(src[0][1]*src[2][2] - src[2][1]*src[0][2]) * det;
992     m[0][2] =  (src[0][1]*src[1][2] - src[1][1]*src[0][2]) * det;
993 
994     m[1][0] = -(src[1][0]*src[2][2] - src[2][0]*src[1][2]) * det;
995     m[1][1] =  (src[0][0]*src[2][2] - src[2][0]*src[0][2]) * det;
996     m[1][2] = -(src[0][0]*src[1][2] - src[1][0]*src[0][2]) * det;
997 
998     m[2][0] =  (src[1][0]*src[2][1] - src[2][0]*src[1][1]) * det;
999     m[2][1] = -(src[0][0]*src[2][1] - src[2][0]*src[0][1]) * det;
1000     m[2][2] =  (src[0][0]*src[1][1] - src[1][0]*src[0][1]) * det;
1001 
1002 
1003     // compute (invA)*(-C)
1004     m[0][3] = -m[0][0]*src[0][3] - m[0][1]*src[1][3] - m[0][2]*src[2][3];
1005     m[1][3] = -m[1][0]*src[0][3] - m[1][1]*src[1][3] - m[1][2]*src[2][3];
1006     m[2][3] = -m[2][0]*src[0][3] - m[2][1]*src[1][3] - m[2][2]*src[2][3];
1007 
1008     // copy back if needed
1009     if( m == mTmp )
1010     {
1011         C_MTXCopy( mTmp,inv );
1012     }
1013 
1014     return 1;
1015 }
1016 
1017 /*---------------------------------------------------------------------*
1018     Paired-Single assembler version
1019  *---------------------------------------------------------------------*
1020             Note that this performs NO error checking.
1021             Results may be a little bit different from the C version
1022             because it doesn't perform exactly same calculation.
1023  *---------------------------------------------------------------------*/
1024 #ifdef GEKKO
PSMTXInverse(const register Mtx src,register Mtx inv)1025 asm u32 PSMTXInverse ( const register Mtx src, register Mtx inv )
1026 {
1027     nofralloc
1028 
1029     // fp0 [ 00 ][ 1.0F ] : Load
1030     psq_l       fp0, 0(src), 1, 0
1031     // fp1 [ 01 ][ 02 ]   : Load
1032     psq_l       fp1, 4(src), 0, 0
1033     // fp2 [ 10 ][ 1.0F ] : Load
1034     psq_l       fp2, 16(src), 1, 0
1035         // fp6 [ 02 ][ 00 ]
1036         ps_merge10  fp6, fp1, fp0
1037     // fp3 [ 11 ][ 12 ]   : Load
1038     psq_l       fp3, 20(src), 0, 0
1039     // fp4 [ 20 ][ 1.0F ] : Load
1040     psq_l       fp4, 32(src), 1, 0
1041         // fp7 [ 12 ][ 10 ]
1042         ps_merge10  fp7, fp3, fp2
1043     // fp5 [ 21 ][ 22 ]   : Load
1044     psq_l       fp5, 36(src), 0, 0
1045 
1046     // fp11[ 11*02 ][ 00*12 ]
1047     ps_mul      fp11, fp3, fp6
1048     // fp13[ 21*12 ][ 10*22 ]
1049     ps_mul      fp13, fp5, fp7
1050         // fp8 [ 22 ][ 20 ]
1051         ps_merge10  fp8, fp5, fp4
1052     // fp11[ 01*12 - 11*02 ][ 10*02 - 00*12 ]  -> free(fp7[ 12 ][ 10 ])
1053     ps_msub     fp11, fp1, fp7, fp11
1054     // fp12[ 01*22 ][ 20*02 ]
1055     ps_mul      fp12, fp1, fp8
1056     // fp13[ 11*22 - 21*12 ][ 20*12 - 10*22 ]  -> free(fp8[ 22 ][ 20 ])
1057     ps_msub     fp13, fp3, fp8, fp13
1058         // fp10[ 20*11 ][ N/A ]
1059         ps_mul      fp10, fp3, fp4
1060     // fp12[ 21*02 - 01*22 ][ 00*22 - 20*02 ]  -> free(fp6[ 02 ][ 00 ])
1061     ps_msub     fp12, fp5, fp6, fp12
1062         // fp9 [ 00*21 ][ N/A ]
1063         ps_mul      fp9,  fp0, fp5
1064         // fp8 [ 10*01 ][ N/A ]
1065         ps_mul      fp8,  fp1, fp2
1066 
1067     // fp6 [ 0.0F ][ 0.0F ]
1068     ps_sub      fp6, fp6, fp6
1069         // fp10[ 10*21 - 20*11 ][ N/A ] -> free(fp5[ 21 ][ 22 ])
1070         ps_msub     fp10, fp2, fp5, fp10
1071     // fp7 [ 00*(11*22-21*12) ][ N/A ]
1072     ps_mul      fp7, fp0, fp13
1073         // fp9 [ 20*01 - 00*21 ][ N/A ]
1074         ps_msub     fp9,  fp1, fp4, fp9
1075     // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) ][ N/A ]
1076     ps_madd     fp7, fp2, fp12, fp7
1077         // fp8 [ 00*11 - 10*01 ][ N/A ]
1078         ps_msub     fp8,  fp0, fp3, fp8
1079     // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) + 20*(01*12-11*02) ][ N/A ] : det
1080     ps_madd     fp7, fp4, fp11, fp7
1081 
1082     // ( det == 0 ) ?
1083     ps_cmpo0    cr0, fp7, fp6
1084     bne         _regular
1085 
1086     // return value (singular)
1087     addi        r3, 0, 0
1088     blr
1089 
1090   _regular:
1091 
1092     // fp0 [ rdet = 1/det ][ N/A ]
1093     fres        fp0, fp7
1094 
1095     // Refinement : ( E = est. of 1/K ) -> ( E' = 2E - K*E*E )
1096     ps_add      fp6, fp0, fp0
1097     ps_mul      fp5, fp0, fp0
1098     ps_nmsub    fp0, fp7, fp5, fp6
1099 
1100 
1101     // fp1 [ 03 ][ 03 ] : Load
1102     lfs         fp1, 12(src)
1103         // fp13[ ( 11*22 - 21*12 ) * rdet ][ ( 20*12 - 10*22 ) * rdet ] : i[0][0], i[1][0]
1104         ps_muls0    fp13, fp13, fp0
1105     // fp2 [ 13 ][ 13 ] : Load
1106     lfs         fp2, 28(src)
1107         // fp12[ ( 21*02 - 01*22 ) * rdet ][ ( 00*22 - 20*02 ) * rdet ] : i[0][1], i[1][1]
1108         ps_muls0    fp12, fp12, fp0
1109     // fp3 [ 23 ][ 23 ]   : Load
1110     lfs         fp3, 44(src)
1111         // fp11[ ( 01*12 - 11*02 ) * rdet ][ ( 10*02 - 00*12 ) * rdet ] : i[0][2], i[1][2]
1112         ps_muls0    fp11, fp11, fp0
1113     // fp5 [ i00 ][ i01 ]
1114     ps_merge00  fp5, fp13, fp12
1115         // fp10[ ( 10*21 - 20*11 ) * rdet ] : i[2][0]
1116         ps_muls0    fp10, fp10, fp0
1117     // fp4 [ i10 ][ i11 ]
1118     ps_merge11  fp4, fp13, fp12
1119         // fp9 [ ( 20*01 - 00*21 ) * rdet ] : i[2][1]
1120         ps_muls0    fp9,  fp9,  fp0
1121 
1122     // [ i00 ][ i01 ] : Store fp5   -> free(fp5[ i00 ][ i01 ])
1123     psq_st      fp5,  0(inv), 0, 0
1124         // fp6 [ i00*03 ][ i10*03 ]
1125         ps_mul      fp6, fp13, fp1
1126     // [ i10 ][ i11 ] : Store fp4   -> free(fp4[ i10 ][ i11 ])
1127     psq_st      fp4,  16(inv), 0, 0
1128         // fp8 [ ( 00*11 - 10*01 ) * rdet ] : i[2][2]
1129         ps_muls0    fp8,  fp8,  fp0
1130         // fp6 [ i00*03+i01*13 ][ i10*03+i11*13 ]
1131         ps_madd     fp6, fp12, fp2, fp6
1132     // [ i20 ] : Store fp10
1133     psq_st      fp10, 32(inv), 1, 0
1134         // fp6 [ -i00*03-i01*13-i02*23 ][ -i10*03-i11*13-i12*23 ] : i[0][3], i[1][3]
1135         ps_nmadd    fp6, fp11, fp3, fp6
1136     // [ i21 ] : Store fp9
1137     psq_st      fp9,  36(inv), 1, 0
1138         // fp7 [ i20*03 ][ N/A ]
1139         ps_mul      fp7, fp10, fp1
1140         // fp5 [ i02 ][ i03 ]
1141         ps_merge00  fp5, fp11, fp6
1142     // [ i22 ] : Store fp8
1143     psq_st      fp8,  40(inv), 1, 0
1144         // fp4 [ i12 ][ i13 ]
1145         ps_merge11  fp4, fp11, fp6
1146     // [ i02 ][ i03 ] : Store fp5
1147     psq_st      fp5,  8(inv), 0, 0
1148         // fp7 [ i20*03+i21*13 ][ N/A ]
1149         ps_madd     fp7, fp9,  fp2, fp7
1150     // [ i12 ][ i13 ] : Store fp4
1151     psq_st      fp4,  24(inv), 0, 0
1152         // fp7 [ -i20*03-i21*13-i22*23 ][ N/A ] : i[2][3]
1153         ps_nmadd    fp7, fp8,  fp3, fp7
1154             // return value (regular)
1155             addi        r3, 0, 1
1156     // [ i23 ] : Store fp7
1157     psq_st      fp7,  44(inv), 1, 0
1158 
1159     blr
1160 }
1161 #endif // GEKKO
1162 
1163 /*---------------------------------------------------------------------*
1164 
1165 Name:           MTXInvXpose
1166 
1167 Description:    computes a fast inverse-transpose of a matrix.
1168                 this algorithm works for matrices with a fourth row of
1169                 (0,0,0,1). Commonly used for calculating normal transform
1170                 matrices.
1171 
1172                 This function is equivalent to the combination of
1173                 two functions MTXInverse + MTXTranspose.
1174 
1175 Arguments:      src       source matrix.
1176                 invx      destination (inverse-transpose) matrix.
1177                           ok if src == invx.
1178 
1179 Return   :         0 if src is not invertible.
1180                 1 on success.
1181 
1182 *---------------------------------------------------------------------*/
1183 /*---------------------------------------------------------------------*
1184     C version
1185  *---------------------------------------------------------------------*/
C_MTXInvXpose(const Mtx src,Mtx invX)1186 u32 C_MTXInvXpose ( const Mtx src, Mtx invX )
1187 {
1188     Mtx mTmp;
1189     MtxPtr m;
1190     f32 det;
1191 
1192     ASSERTMSG( (src != 0), MTX_INVXPOSE_1 );
1193     ASSERTMSG( (invX != 0), MTX_INVXPOSE_2 );
1194 
1195     if( src == invX )
1196     {
1197         m = mTmp;
1198     }
1199     else
1200     {
1201         m = invX;
1202     }
1203 
1204     // compute the determinant of the upper 3x3 submatrix
1205     det =   src[0][0]*src[1][1]*src[2][2] + src[0][1]*src[1][2]*src[2][0] + src[0][2]*src[1][0]*src[2][1]
1206           - src[2][0]*src[1][1]*src[0][2] - src[1][0]*src[0][1]*src[2][2] - src[0][0]*src[2][1]*src[1][2];
1207 
1208     // check if matrix is singular
1209     if( det == 0.0f )
1210     {
1211         return 0;
1212     }
1213 
1214     // compute the inverse-transpose of the upper submatrix:
1215 
1216     // find the transposed matrix of cofactors of the upper submatrix
1217     // and multiply by (1/det)
1218 
1219     det = 1.0f / det;
1220 
1221     m[0][0] =  (src[1][1]*src[2][2] - src[2][1]*src[1][2]) * det;
1222     m[0][1] = -(src[1][0]*src[2][2] - src[2][0]*src[1][2]) * det;
1223     m[0][2] =  (src[1][0]*src[2][1] - src[2][0]*src[1][1]) * det;
1224 
1225     m[1][0] = -(src[0][1]*src[2][2] - src[2][1]*src[0][2]) * det;
1226     m[1][1] =  (src[0][0]*src[2][2] - src[2][0]*src[0][2]) * det;
1227     m[1][2] = -(src[0][0]*src[2][1] - src[2][0]*src[0][1]) * det;
1228 
1229     m[2][0] =  (src[0][1]*src[1][2] - src[1][1]*src[0][2]) * det;
1230     m[2][1] = -(src[0][0]*src[1][2] - src[1][0]*src[0][2]) * det;
1231     m[2][2] =  (src[0][0]*src[1][1] - src[1][0]*src[0][1]) * det;
1232 
1233 
1234     // the fourth columns should be all zero
1235     m[0][3] = 0.0F;
1236     m[1][3] = 0.0F;
1237     m[2][3] = 0.0F;
1238 
1239     // copy back if needed
1240     if( m == mTmp )
1241     {
1242         C_MTXCopy( mTmp,invX );
1243     }
1244 
1245     return 1;
1246 }
1247 
1248 /*---------------------------------------------------------------------*
1249     Paired-Single assembler version
1250  *---------------------------------------------------------------------*
1251             Note that this performs NO error checking.
1252             Results may be a little bit different from the C version
1253             because it doesn't perform exactly same calculation.
1254  *---------------------------------------------------------------------*/
1255 #ifdef GEKKO
PSMTXInvXpose(const register Mtx src,register Mtx invX)1256 asm u32 PSMTXInvXpose ( const register Mtx src, register Mtx invX )
1257 {
1258     nofralloc
1259 
1260     // fp0 [ 00 ][ 1.0F ] : Load
1261     psq_l       fp0, 0(src), 1, 0
1262     // fp1 [ 01 ][ 02 ]   : Load
1263     psq_l       fp1, 4(src), 0, 0
1264     // fp2 [ 10 ][ 1.0F ] : Load
1265     psq_l       fp2, 16(src), 1, 0
1266         // fp6 [ 02 ][ 00 ]
1267         ps_merge10  fp6, fp1, fp0
1268     // fp3 [ 11 ][ 12 ]   : Load
1269     psq_l       fp3, 20(src), 0, 0
1270     // fp4 [ 20 ][ 1.0F ] : Load
1271     psq_l       fp4, 32(src), 1, 0
1272         // fp7 [ 12 ][ 10 ]
1273         ps_merge10  fp7, fp3, fp2
1274     // fp5 [ 21 ][ 22 ]   : Load
1275     psq_l       fp5, 36(src), 0, 0
1276 
1277         // fp11[ 11*02 ][ 00*12 ]
1278         ps_mul      fp11, fp3, fp6
1279 
1280         // fp8 [ 22 ][ 20 ]
1281         ps_merge10  fp8, fp5, fp4
1282 
1283         // fp13[ 21*12 ][ 10*22 ]
1284         ps_mul      fp13, fp5, fp7
1285         // fp11[ 01*12 - 11*02 ][ 10*02 - 00*12 ]
1286         ps_msub     fp11, fp1, fp7, fp11
1287         // fp12[ 01*22 ][ 20*02 ]
1288         ps_mul      fp12, fp1, fp8
1289         // fp13[ 11*22 - 21*12 ][ 20*12 - 10*22 ]
1290         ps_msub     fp13, fp3, fp8, fp13
1291         // fp12[ 21*02 - 01*22 ][ 00*22 - 20*02 ]
1292         ps_msub     fp12, fp5, fp6, fp12
1293 
1294         // fp10[ 20*11 ][ N/A ]
1295         ps_mul      fp10, fp3, fp4
1296         // fp9 [ 00*21 ][ N/A ]
1297         ps_mul      fp9,  fp0, fp5
1298         // fp8 [ 10*01 ][ N/A ]
1299         ps_mul      fp8,  fp1, fp2
1300         // fp10[ 10*21 - 20*11 ][ N/A ]
1301         ps_msub     fp10, fp2, fp5, fp10
1302         // fp9 [ 20*01 - 00*21 ][ N/A ]
1303         ps_msub     fp9,  fp1, fp4, fp9
1304         // fp8 [ 00*11 - 10*01 ][ N/A ]
1305         ps_msub     fp8,  fp0, fp3, fp8
1306 
1307         // fp7 [ 00*(11*22-21*12) ][ N/A ]
1308         ps_mul      fp7, fp0, fp13
1309         // fp1 [ 0.0F ][ 0.0F ]
1310         ps_sub      fp1, fp1, fp1
1311         // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) ][ N/A ]
1312         ps_madd     fp7, fp2, fp12, fp7
1313         // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) + 20*(01*12-11*02) ][ N/A ] : det
1314         ps_madd     fp7, fp4, fp11, fp7
1315 
1316         // ( det == 0 ) ?
1317         ps_cmpo0    cr0, fp7, fp1
1318         bne         _regular
1319 
1320         // return value (singular)
1321         addi        r3, 0, 0
1322         blr
1323 
1324   _regular:
1325 
1326         // fp0 [ 1/det ][ N/A ]
1327         fres        fp0, fp7
1328 
1329     // [ ix03 ] : Store fp1
1330     psq_st      fp1,  12(invX), 1, 0
1331 
1332         // Refinement : ( E = est. of 1/N ) -> ( E' = 2E - N*E*E )
1333         ps_add      fp6, fp0, fp0
1334         ps_mul      fp5, fp0, fp0
1335 
1336     // [ ix13 ] : Store fp1
1337     psq_st      fp1,  28(invX), 1, 0
1338 
1339         ps_nmsub    fp0, fp7, fp5, fp6
1340 
1341     // [ ix23 ] : Store fp1
1342     psq_st      fp1,  44(invX), 1, 0
1343 
1344         // fp13[ ( 11*22 - 21*12 ) * rdet ][ ( 20*12 - 10*22 ) * rdet ] : ix[0][0], ix[0][1]
1345         ps_muls0    fp13, fp13, fp0
1346         // fp12[ ( 21*02 - 01*22 ) * rdet ][ ( 00*22 - 20*02 ) * rdet ] : ix[1][0], ix[1][1]
1347         ps_muls0    fp12, fp12, fp0
1348         // fp11[ ( 01*12 - 11*02 ) * rdet ][ ( 10*02 - 00*12 ) * rdet ] : ix[2][0], ix[2][1]
1349         ps_muls0    fp11, fp11, fp0
1350 
1351     // [ ix00 ][ ix01 ] : Store fp13
1352     psq_st      fp13,  0(invX), 0, 0
1353     // [ ix10 ][ ix11 ] : Store fp12
1354     psq_st      fp12,  16(invX), 0, 0
1355 
1356         // fp10[ ( 10*21 - 20*11 ) * rdet ] : i[0][2]
1357         ps_muls0    fp10, fp10, fp0
1358 
1359         // fp9 [ ( 20*01 - 00*21 ) * rdet ] : i[1][2]
1360         ps_muls0    fp9,  fp9,  fp0
1361 
1362     // [ ix20 ][ ix21 ] : Store fp11
1363     psq_st      fp11,  32(invX), 0, 0
1364     // [ ix02 ]         : Store fp10
1365     psq_st      fp10,  8(invX), 1, 0
1366 
1367         // fp8 [ ( 00*11 - 10*01 ) * rdet ] : i[2][2]
1368         ps_muls0    fp8,  fp8,  fp0
1369 
1370         // return value (regular)
1371         addi        r3, 0, 1
1372 
1373     // [ ix12 ]         : Store fp9
1374     psq_st      fp9,   24(invX), 1, 0
1375     // [ ix22 ]         : Store fp8
1376     psq_st      fp8,   40(invX), 1, 0
1377 
1378     blr
1379 }
1380 #endif
1381 
1382 /*---------------------------------------------------------------------*
1383 
1384 
1385 
1386 
1387 
1388                              MODEL  SECTION
1389 
1390 
1391 
1392 
1393 
1394 *---------------------------------------------------------------------*/
1395 
1396 
1397 
1398 
1399 
1400 /*---------------------------------------------------------------------*
1401 
1402 Name:           MTXRotDeg
1403 
1404 Description:    sets a rotation matrix about one of the X, Y or Z axes
1405 
1406 Arguments:      m       matrix to be set
1407 
1408                 axis    major axis about which to rotate.
1409                         axis is passed in as a character.
1410                         it must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1411 
1412                 deg     rotation angle in degrees.
1413 
1414                         note:  counter-clockwise rotation is positive.
1415 
1416 Return   :         none
1417 
1418 *---------------------------------------------------------------------*/
1419 // The function is defined as a macro in mtx.h
1420 /*
1421 void MTXRotDeg ( Mtx m, char axis, f32 deg )
1422 {
1423     MTXRotRad( m, axis, MTXDegToRad( deg ) );
1424 }
1425 */
1426 
1427 /*---------------------------------------------------------------------*
1428 
1429 Name:           MTXRotRad
1430 
1431 Description:    sets a rotation matrix about one of the X, Y or Z axes
1432 
1433 Arguments:      m       matrix to be set
1434 
1435                 axis    major axis about which to rotate.
1436                         axis is passed in as a character.
1437                         it must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1438 
1439                 deg     rotation angle in radians.
1440 
1441                         note:  counter-clockwise rotation is positive.
1442 
1443 Return   :         none
1444 
1445 *---------------------------------------------------------------------*/
1446 /*---------------------------------------------------------------------*
1447     C version
1448  *---------------------------------------------------------------------*/
C_MTXRotRad(Mtx m,char axis,f32 rad)1449 void C_MTXRotRad ( Mtx m, char axis, f32 rad )
1450 {
1451 
1452     f32 sinA, cosA;
1453 
1454     ASSERTMSG( (m != 0), MTX_ROTRAD_1 );
1455 
1456     // verification of "axis" will occur in MTXRotTrig
1457 
1458     sinA = sinf(rad);
1459     cosA = cosf(rad);
1460 
1461     C_MTXRotTrig( m, axis, sinA, cosA );
1462 }
1463 
1464 /*---------------------------------------------------------------------*
1465     Paired-Single assembler version
1466  *---------------------------------------------------------------------*
1467                 Note that this performs NO error checking.
1468  *---------------------------------------------------------------------*/
1469 #ifdef GEKKO
PSMTXRotRad(Mtx m,char axis,f32 rad)1470 void PSMTXRotRad ( Mtx m, char axis, f32 rad )
1471 {
1472     f32 sinA, cosA;
1473 
1474     sinA = sinf(rad);
1475     cosA = cosf(rad);
1476 
1477     PSMTXRotTrig( m, axis, sinA, cosA );
1478 }
1479 #endif // GEKKO
1480 
1481 /*---------------------------------------------------------------------*
1482 
1483 Name:           MTXRotTrig
1484 
1485 Description:    sets a rotation matrix about one of the X, Y or Z axes
1486                 from specified trig ratios
1487 
1488 Arguments:      m       matrix to be set
1489 
1490                 axis    major axis about which to rotate.
1491                         axis is passed in as a character.
1492                         It must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1493 
1494                 sinA    sine of rotation angle.
1495 
1496                 cosA    cosine of rotation angle.
1497 
1498                         note:  counter-clockwise rotation is positive.
1499 
1500 Return   :         none
1501 
1502 *---------------------------------------------------------------------*/
1503 /*---------------------------------------------------------------------*
1504     C version
1505  *---------------------------------------------------------------------*/
C_MTXRotTrig(Mtx m,char axis,f32 sinA,f32 cosA)1506 void C_MTXRotTrig ( Mtx m, char axis, f32 sinA, f32 cosA )
1507 {
1508 
1509     ASSERTMSG( (m != 0), MTX_ROTTRIG_1 );
1510 
1511     switch(axis)
1512     {
1513 
1514     case 'x':
1515     case 'X':
1516         m[0][0] =  1.0f;  m[0][1] =  0.0f;    m[0][2] =  0.0f;  m[0][3] = 0.0f;
1517         m[1][0] =  0.0f;  m[1][1] =  cosA;    m[1][2] = -sinA;  m[1][3] = 0.0f;
1518         m[2][0] =  0.0f;  m[2][1] =  sinA;    m[2][2] =  cosA;  m[2][3] = 0.0f;
1519         break;
1520 
1521     case 'y':
1522     case 'Y':
1523         m[0][0] =  cosA;  m[0][1] =  0.0f;    m[0][2] =  sinA;  m[0][3] = 0.0f;
1524         m[1][0] =  0.0f;  m[1][1] =  1.0f;    m[1][2] =  0.0f;  m[1][3] = 0.0f;
1525         m[2][0] = -sinA;  m[2][1] =  0.0f;    m[2][2] =  cosA;  m[2][3] = 0.0f;
1526         break;
1527 
1528     case 'z':
1529     case 'Z':
1530         m[0][0] =  cosA;  m[0][1] = -sinA;    m[0][2] =  0.0f;  m[0][3] = 0.0f;
1531         m[1][0] =  sinA;  m[1][1] =  cosA;    m[1][2] =  0.0f;  m[1][3] = 0.0f;
1532         m[2][0] =  0.0f;  m[2][1] =  0.0f;    m[2][2] =  1.0f;  m[2][3] = 0.0f;
1533         break;
1534 
1535     default:
1536         ASSERTMSG( 0, MTX_ROTTRIG_2 );
1537         break;
1538 
1539     }
1540 
1541 }
1542 
1543 /*---------------------------------------------------------------------*
1544     Paired-Single assembler version
1545  *---------------------------------------------------------------------*
1546                 Note that this performs NO error checking.
1547  *---------------------------------------------------------------------*/
1548 #ifdef GEKKO
PSMTXRotTrig(register Mtx m,register char axis,register f32 sinA,register f32 cosA)1549 void PSMTXRotTrig (
1550     register Mtx    m,
1551     register char   axis,
1552     register f32    sinA,
1553     register f32    cosA )
1554 {
1555     register f32    fc0, fc1, nsinA;
1556     register f32    fw0, fw1, fw2, fw3;
1557 
1558     asm
1559     {
1560         frsp        sinA, sinA      // to make sure sinA = single precision
1561         frsp        cosA, cosA      // to make sure cosA = single precision
1562     }
1563 
1564     fc0 = 0.0F;
1565     fc1 = 1.0F;
1566 
1567     asm
1568     {
1569         // always lower case
1570         ori         axis, axis, 0x20
1571         ps_neg      nsinA, sinA
1572 
1573         // branches
1574         cmplwi      axis, 'x'
1575         beq         _case_x
1576         cmplwi      axis, 'y'
1577         beq         _case_y
1578         cmplwi      axis, 'z'
1579         beq         _case_z
1580         b           _end
1581 
1582     _case_x:
1583         psq_st      fc1,  0(m), 1, 0
1584         psq_st      fc0,  4(m), 0, 0
1585         ps_merge00  fw0, sinA, cosA
1586         psq_st      fc0, 12(m), 0, 0
1587         ps_merge00  fw1, cosA, nsinA
1588         psq_st      fc0, 28(m), 0, 0
1589         psq_st      fc0, 44(m), 1, 0
1590         psq_st      fw0, 36(m), 0, 0
1591         psq_st      fw1, 20(m), 0, 0
1592         b           _end;
1593 
1594     _case_y:
1595         ps_merge00  fw0, cosA, fc0
1596         ps_merge00  fw1, fc0, fc1
1597         psq_st      fc0, 24(m), 0, 0
1598         psq_st      fw0,  0(m), 0, 0
1599         ps_merge00  fw2, nsinA, fc0
1600         ps_merge00  fw3, sinA, fc0
1601         psq_st      fw0, 40(m), 0, 0;
1602         psq_st      fw1, 16(m), 0, 0;
1603         psq_st      fw3,  8(m), 0, 0;
1604         psq_st      fw2, 32(m), 0, 0;
1605         b           _end;
1606 
1607     _case_z:
1608         psq_st      fc0,  8(m), 0, 0
1609         ps_merge00  fw0, sinA, cosA
1610         ps_merge00  fw2, cosA, nsinA
1611         psq_st      fc0, 24(m), 0, 0
1612         psq_st      fc0, 32(m), 0, 0
1613         ps_merge00  fw1, fc1, fc0
1614         psq_st      fw0, 16(m), 0, 0
1615         psq_st      fw2,  0(m), 0, 0
1616         psq_st      fw1, 40(m), 0, 0
1617 
1618     _end:
1619 
1620     }
1621 }
1622 #endif // GEKKO
1623 
1624 /*---------------------------------------------------------------------*
1625 
1626 Name:           MTXRotAxisDeg
1627 
1628 Description:    Sets a rotation matrix about an arbitrary axis
1629 
1630 
1631 Arguments:      m       matrix to be set
1632 
1633                 axis    ptr to a vector containing the x,y,z axis
1634                         components.
1635                         axis does not have to be a unit vector.
1636 
1637                 deg     rotation angle in degrees.
1638 
1639                         note:  counter-clockwise rotation is positive.
1640 
1641 Return   :         none
1642 
1643 *---------------------------------------------------------------------*/
1644 // The function is defined as a macro in mtx.h
1645 /*
1646 void MTXRotAxisDeg ( Mtx m, Vec *axis, f32 deg )
1647 {
1648     MTXRotAxisRad( m, axis, MTXDegToRad( deg ) );
1649 }
1650 */
1651 
1652 /*---------------------------------------------------------------------*
1653 
1654 Name:           MTXRotAxisRad
1655 
1656 Description:    Sets a rotation matrix about an arbitrary axis
1657 
1658 
1659 Arguments:      m       matrix to be set
1660 
1661                 axis    ptr to a vector containing the x,y,z axis
1662                         components.
1663                         axis does not have to be a unit vector.
1664 
1665                 deg     rotation angle in radians.
1666 
1667                         note:  counter-clockwise rotation is positive.
1668 
1669 Return   :         none
1670 
1671 *---------------------------------------------------------------------*/
1672 /*---------------------------------------------------------------------*
1673     C version
1674  *---------------------------------------------------------------------*/
C_MTXRotAxisRad(Mtx m,const Vec * axis,f32 rad)1675 void C_MTXRotAxisRad( Mtx m, const Vec *axis, f32 rad )
1676 {
1677     Vec vN;
1678     f32 s, c;             // sinTheta, cosTheta
1679     f32 t;                // ( 1 - cosTheta )
1680     f32 x, y, z;          // x, y, z components of normalized axis
1681     f32 xSq, ySq, zSq;    // x, y, z squared
1682 
1683 
1684     ASSERTMSG( (m    != 0), MTX_ROTAXIS_1  );
1685     ASSERTMSG( (axis != 0), MTX_ROTAXIS_2  );
1686 
1687 
1688     s = sinf(rad);
1689     c = cosf(rad);
1690     t = 1.0f - c;
1691 
1692     C_VECNormalize( axis, &vN );
1693 
1694     x = vN.x;
1695     y = vN.y;
1696     z = vN.z;
1697 
1698     xSq = x * x;
1699     ySq = y * y;
1700     zSq = z * z;
1701 
1702     m[0][0] = ( t * xSq )   + ( c );
1703     m[0][1] = ( t * x * y ) - ( s * z );
1704     m[0][2] = ( t * x * z ) + ( s * y );
1705     m[0][3] =    0.0f;
1706 
1707     m[1][0] = ( t * x * y ) + ( s * z );
1708     m[1][1] = ( t * ySq )   + ( c );
1709     m[1][2] = ( t * y * z ) - ( s * x );
1710     m[1][3] =    0.0f;
1711 
1712     m[2][0] = ( t * x * z ) - ( s * y );
1713     m[2][1] = ( t * y * z ) + ( s * x );
1714     m[2][2] = ( t * zSq )   + ( c );
1715     m[2][3] =    0.0f;
1716 
1717 }
1718 
1719 /*---------------------------------------------------------------------*
1720     Paired-Single assembler version
1721  *---------------------------------------------------------------------*
1722                 Note that this performs NO error checking.
1723  *---------------------------------------------------------------------*/
1724 #ifdef GEKKO
1725 
__PSMTXRotAxisRadInternal(register Mtx m,const register Vec * axis,register f32 sT,register f32 cT)1726 static void __PSMTXRotAxisRadInternal(
1727     register Mtx    m,
1728     const register Vec *axis,
1729     register f32    sT,
1730     register f32    cT )
1731 {
1732     register f32    tT, fc0;
1733     register f32    tmp0, tmp1, tmp2, tmp3, tmp4;
1734     register f32    tmp5, tmp6, tmp7, tmp8, tmp9;
1735 
1736     tmp9 = 0.5F;
1737     tmp8 = 3.0F;
1738 
1739     asm
1740     {
1741         // to make sure cT = (single precision float value)
1742         frsp        cT, cT
1743         // tmp0 = [x][y] : LOAD
1744         psq_l       tmp0, 0(axis), 0, 0
1745         // to make sure sT = (single precision float value)
1746         frsp        sT, sT
1747         // tmp1 = [z][z] : LOAD
1748         lfs         tmp1, 8(axis)
1749 
1750         // tmp2 = [x*x][y*y]
1751         ps_mul      tmp2, tmp0, tmp0
1752         // tmp7 = [1.0F]
1753         fadds       tmp7, tmp9, tmp9
1754         // tmp3 = [x*x+z*z][y*y+z*z]
1755         ps_madd     tmp3, tmp1, tmp1, tmp2
1756         // fc0 = [0.0F]
1757         fsubs       fc0, tmp9, tmp9
1758         // tmp4 = [S = x*x+y*y+z*z][z]
1759         ps_sum0     tmp4, tmp3, tmp1, tmp2
1760 
1761         // tT = 1.0F - cT
1762         fsubs       tT, tmp7, cT
1763 
1764         // tmp5 = [1.0/sqrt(S)] :estimation[E]
1765         frsqrte     tmp5, tmp4
1766         // Newton-Rapson refinement step
1767         // E' = E/2(3.0 - E*E*S)
1768         fmuls       tmp2, tmp5, tmp5            // E*E
1769         fmuls       tmp3, tmp5, tmp9            // E/2
1770         fnmsubs     tmp2, tmp2, tmp4, tmp8      // (3-E*E*S)
1771         fmuls       tmp5, tmp2, tmp3            // (E/2)(3-E*E*S)
1772 
1773         // cT = [c][c]
1774         ps_merge00  cT, cT, cT
1775 
1776         // tmp0 = [nx = x/sqrt(S)][ny = y/sqrt(S)]
1777         ps_muls0    tmp0, tmp0, tmp5
1778         // tmp1 = [nz = z/sqrt(S)][nz = z/sqrt(S)]
1779         ps_muls0    tmp1, tmp1, tmp5
1780 
1781         // tmp4 = [t*nx][t*ny]
1782         ps_muls0    tmp4, tmp0, tT
1783         // tmp9 = [s*nx][s*ny]
1784         ps_muls0    tmp9, tmp0, sT
1785         // tmp5 = [t*nz][t*nz]
1786         ps_muls0    tmp5, tmp1, tT
1787 
1788         // tmp3 = [t*nx*ny][t*ny*ny]
1789         ps_muls1    tmp3, tmp4, tmp0
1790         // tmp2 = [t*nx*nx][t*ny*nx]
1791         ps_muls0    tmp2, tmp4, tmp0
1792         // tmp4 = [t*nx*nz][t*ny*nz]
1793         ps_muls0    tmp4, tmp4, tmp1
1794 
1795         // tmp6 = [t*nx*ny-s*nz][t*nx*ny-s*nz]
1796         fnmsubs     tmp6, tmp1, sT, tmp3
1797         // tmp7 = [t*nx*ny+s*nz][t*ny*ny+s*nz]
1798         fmadds      tmp7, tmp1, sT, tmp3
1799 
1800         // tmp0 = [-s*nx][-s*ny]
1801         ps_neg      tmp0, tmp9
1802         // tmp8 = [t*nx*nz+s*ny][0] == [m02][m03]
1803         ps_sum0     tmp8, tmp4, fc0, tmp9
1804         // tmp2 = [t*nx*nx+c][t*nx*ny-s*nz] == [m00][m01]
1805         ps_sum0     tmp2, tmp2, tmp6, cT
1806         // tmp3 = [t*nx*ny+s*nz][t*ny*ny+c] == [m10][m11]
1807         ps_sum1     tmp3, cT, tmp7, tmp3
1808         // tmp6 = [t*ny*nz-s*nx][0] == [m12][m13]
1809         ps_sum0     tmp6, tmp0, fc0 ,tmp4
1810 
1811             // tmp8 [m02][m03] : STORE
1812             psq_st      tmp8, 8(m), 0, 0
1813         // tmp0 = [t*nx*nz-s*ny][t*ny*nz]
1814         ps_sum0     tmp0, tmp4, tmp4, tmp0
1815             // tmp2 [m00][m01] : STORE
1816             psq_st      tmp2, 0(m), 0, 0
1817         // tmp5 = [t*nz*nz][t*nz*nz]
1818         ps_muls0    tmp5, tmp5, tmp1
1819             // tmp3 [m10][m11] : STORE
1820             psq_st      tmp3, 16(m), 0, 0
1821         // tmp4 = [t*nx*nz-s*ny][t*ny*nz+s*nx] == [m20][m21]
1822         ps_sum1     tmp4, tmp9, tmp0, tmp4
1823             // tmp6 [m12][m13] : STORE
1824             psq_st      tmp6, 24(m), 0, 0
1825         // tmp5 = [t*nz*nz+c][0]   == [m22][m23]
1826         ps_sum0     tmp5, tmp5, fc0, cT
1827             // tmp4 [m20][m21] : STORE
1828             psq_st      tmp4, 32(m), 0, 0
1829             // tmp5 [m22][m23] : STORE
1830             psq_st      tmp5, 40(m), 0, 0
1831     }
1832 }
1833 
PSMTXRotAxisRad(Mtx m,const Vec * axis,f32 rad)1834 void PSMTXRotAxisRad(
1835     Mtx             m,
1836     const Vec      *axis,
1837     f32             rad )
1838 {
1839     f32     sinT, cosT;
1840 
1841     sinT = sinf(rad);
1842     cosT = cosf(rad);
1843 
1844     __PSMTXRotAxisRadInternal(m, axis, sinT, cosT);
1845 }
1846 
1847 #endif // GEKKO
1848 
1849 /*---------------------------------------------------------------------*
1850 
1851 Name:           MTXTrans
1852 
1853 Description:    sets a translation matrix.
1854 
1855 
1856 Arguments:       m       matrix to be set
1857 
1858                 xT        x component of translation.
1859 
1860                 yT        y component of translation.
1861 
1862                 zT        z component of translation.
1863 
1864 Return   :         none
1865 
1866 *---------------------------------------------------------------------*/
1867 /*---------------------------------------------------------------------*
1868     C version
1869  *---------------------------------------------------------------------*/
C_MTXTrans(Mtx m,f32 xT,f32 yT,f32 zT)1870 void C_MTXTrans ( Mtx m, f32 xT, f32 yT, f32 zT )
1871 {
1872 
1873     ASSERTMSG( (m != 0), MTX_TRANS_1 );
1874 
1875 
1876     m[0][0] = 1.0f;  m[0][1] = 0.0f;  m[0][2] = 0.0f;  m[0][3] =  xT;
1877     m[1][0] = 0.0f;  m[1][1] = 1.0f;  m[1][2] = 0.0f;  m[1][3] =  yT;
1878     m[2][0] = 0.0f;  m[2][1] = 0.0f;  m[2][2] = 1.0f;  m[2][3] =  zT;
1879 
1880 }
1881 
1882 /*---------------------------------------------------------------------*
1883     Paired-Single assembler version
1884  *---------------------------------------------------------------------*
1885                 Note that this performs NO error checking.
1886  *---------------------------------------------------------------------*/
1887 #ifdef  GEKKO
PSMTXTrans(register Mtx m,register f32 xT,register f32 yT,register f32 zT)1888 void PSMTXTrans(
1889     register Mtx m,
1890     register f32 xT,
1891     register f32 yT,
1892     register f32 zT
1893 )
1894 {
1895     register f32 c0 = 0.0F;
1896     register f32 c1 = 1.0F;
1897 
1898     asm
1899     {
1900         stfs        xT,     12(m)
1901         stfs        yT,     28(m)
1902         psq_st      c0,      4(m), 0, 0
1903         psq_st      c0,     32(m), 0, 0
1904         stfs        c0,     16(m)
1905         stfs        c1,     20(m)
1906         stfs        c0,     24(m)
1907         stfs        c1,     40(m)
1908         stfs        zT,     44(m)
1909         stfs        c1,      0(m)
1910     }
1911 }
1912 #endif  // GEKKO
1913 
1914 /*---------------------------------------------------------------------*
1915 
1916 Name:           MTXTransApply
1917 
1918 Description:    This function performs the operation equivalent to
1919                 MTXTrans + MTXConcat.
1920 
1921 
1922 Arguments:      src       matrix to be operated.
1923 
1924                 dst       resultant matrix from concat.
1925 
1926                 xT        x component of translation.
1927 
1928                 yT        y component of translation.
1929 
1930                 zT        z component of translation.
1931 
1932 Return   :         none
1933 
1934 *---------------------------------------------------------------------*/
1935 /*---------------------------------------------------------------------*
1936     C version
1937  *---------------------------------------------------------------------*/
C_MTXTransApply(const Mtx src,Mtx dst,f32 xT,f32 yT,f32 zT)1938 void C_MTXTransApply ( const Mtx src, Mtx dst, f32 xT, f32 yT, f32 zT )
1939 {
1940     ASSERTMSG( (src != 0), MTX_TRANSAPPLY_1 );
1941     ASSERTMSG( (dst != 0), MTX_TRANSAPPLY_1 );
1942 
1943     if ( src != dst )
1944     {
1945         dst[0][0] = src[0][0];    dst[0][1] = src[0][1];    dst[0][2] = src[0][2];
1946         dst[1][0] = src[1][0];    dst[1][1] = src[1][1];    dst[1][2] = src[1][2];
1947         dst[2][0] = src[2][0];    dst[2][1] = src[2][1];    dst[2][2] = src[2][2];
1948     }
1949 
1950     dst[0][3] = src[0][3] + xT;
1951     dst[1][3] = src[1][3] + yT;
1952     dst[2][3] = src[2][3] + zT;
1953 }
1954 
1955 /*---------------------------------------------------------------------*
1956     Paired-Single assembler version
1957  *---------------------------------------------------------------------*
1958                 Note that this performs NO error checking.
1959  *---------------------------------------------------------------------*/
1960 #ifdef  GEKKO
PSMTXTransApply(const register Mtx src,register Mtx dst,register f32 xT,register f32 yT,register f32 zT)1961 asm void PSMTXTransApply(
1962     const register Mtx src,
1963     register Mtx dst,
1964     register f32 xT,
1965     register f32 yT,
1966     register f32 zT )
1967 {
1968     nofralloc;
1969     psq_l       fp4, 0(src),        0, 0;
1970     frsp        xT, xT;                     // to make sure xT = single precision
1971     psq_l       fp5, 8(src),        0, 0;
1972     frsp        yT, yT;                     // to make sure yT = single precision
1973     psq_l       fp7, 24(src),       0, 0;
1974     frsp        zT, zT;                     // to make sure zT = single precision
1975     psq_l       fp8, 40(src),       0, 0;
1976     psq_st      fp4, 0(dst),        0, 0;
1977     ps_sum1     fp5, xT, fp5, fp5;
1978     psq_l       fp6, 16(src),       0, 0;
1979     psq_st      fp5, 8(dst),        0, 0;
1980     ps_sum1     fp7, yT, fp7, fp7;
1981     psq_l       fp9, 32(src),       0, 0;
1982     psq_st      fp6, 16(dst),       0, 0;
1983     ps_sum1     fp8, zT, fp8, fp8;
1984     psq_st      fp7, 24(dst),       0, 0;
1985     psq_st      fp9, 32(dst),       0, 0;
1986     psq_st      fp8, 40(dst),       0, 0;
1987     blr;
1988 }
1989 #endif  // GEKKO
1990 
1991 
1992 /*---------------------------------------------------------------------*
1993 
1994 Name:            MTXScale
1995 
1996 Description:     sets a scaling matrix.
1997 
1998 
1999 Arguments:       m       matrix to be set
2000 
2001                 xS        x scale factor.
2002 
2003                 yS        y scale factor.
2004 
2005                 zS        z scale factor.
2006 
2007 Return   :         none
2008 
2009  *---------------------------------------------------------------------*/
2010 /*---------------------------------------------------------------------*
2011     C version
2012  *---------------------------------------------------------------------*/
C_MTXScale(Mtx m,f32 xS,f32 yS,f32 zS)2013 void C_MTXScale ( Mtx m, f32 xS, f32 yS, f32 zS )
2014 {
2015     ASSERTMSG( (m != 0), MTX_SCALE_1 );
2016 
2017 
2018     m[0][0] = xS;    m[0][1] = 0.0f;  m[0][2] = 0.0f;  m[0][3] = 0.0f;
2019     m[1][0] = 0.0f;  m[1][1] = yS;    m[1][2] = 0.0f;  m[1][3] = 0.0f;
2020     m[2][0] = 0.0f;  m[2][1] = 0.0f;  m[2][2] = zS;    m[2][3] = 0.0f;
2021 }
2022 
2023 /*---------------------------------------------------------------------*
2024     Paired-Single assembler version
2025  *---------------------------------------------------------------------*
2026                 Note that this performs NO error checking.
2027  *---------------------------------------------------------------------*/
2028 #ifdef  GEKKO
PSMTXScale(register Mtx m,register f32 xS,register f32 yS,register f32 zS)2029 void PSMTXScale(
2030     register Mtx m,
2031     register f32 xS,
2032     register f32 yS,
2033     register f32 zS
2034 )
2035 {
2036     register f32 c0 = 0.0F;
2037 
2038     asm
2039     {
2040         stfs        xS,      0(m)
2041         psq_st      c0,      4(m), 0, 0
2042         psq_st      c0,     12(m), 0, 0
2043         stfs        yS,     20(m)
2044         psq_st      c0,     24(m), 0, 0
2045         psq_st      c0,     32(m), 0, 0
2046         stfs        zS,     40(m)
2047         stfs        c0,     44(m)
2048     }
2049 }
2050 #endif  // GEKKO
2051 
2052 /*---------------------------------------------------------------------*
2053 
2054 Name:           MTXScaleApply
2055 
2056 Description:    This function performs the operation equivalent to
2057                 MTXScale + MTXConcat
2058 
2059 Arguments:      src       matrix to be operated.
2060 
2061                 dst       resultant matrix from concat.
2062 
2063                 xS        x scale factor.
2064 
2065                 yS        y scale factor.
2066 
2067                 zS        z scale factor.
2068 
2069 Return   :         none
2070 
2071  *---------------------------------------------------------------------*/
2072 /*---------------------------------------------------------------------*
2073     C version
2074  *---------------------------------------------------------------------*/
C_MTXScaleApply(const Mtx src,Mtx dst,f32 xS,f32 yS,f32 zS)2075 void C_MTXScaleApply ( const Mtx src, Mtx dst, f32 xS, f32 yS, f32 zS )
2076 {
2077     ASSERTMSG( (src != 0), MTX_SCALEAPPLY_1 );
2078     ASSERTMSG( (dst != 0), MTX_SCALEAPPLY_2 );
2079 
2080     dst[0][0] = src[0][0] * xS;     dst[0][1] = src[0][1] * xS;
2081     dst[0][2] = src[0][2] * xS;     dst[0][3] = src[0][3] * xS;
2082 
2083     dst[1][0] = src[1][0] * yS;     dst[1][1] = src[1][1] * yS;
2084     dst[1][2] = src[1][2] * yS;     dst[1][3] = src[1][3] * yS;
2085 
2086     dst[2][0] = src[2][0] * zS;     dst[2][1] = src[2][1] * zS;
2087     dst[2][2] = src[2][2] * zS;     dst[2][3] = src[2][3] * zS;
2088 }
2089 
2090 /*---------------------------------------------------------------------*
2091     Paired-Single assembler version
2092  *---------------------------------------------------------------------*
2093                 Note that this performs NO error checking.
2094  *---------------------------------------------------------------------*/
2095 #ifdef  GEKKO
PSMTXScaleApply(const register Mtx src,register Mtx dst,register f32 xS,register f32 yS,register f32 zS)2096 asm void PSMTXScaleApply (
2097     const register Mtx src,
2098     register Mtx dst,
2099     register f32 xS,
2100     register f32 yS,
2101     register f32 zS )
2102 {
2103     nofralloc;
2104     frsp        xS, xS;                     // to make sure xS = single precision
2105     psq_l       fp4, 0(src),        0, 0;
2106     frsp        yS, yS;                     // to make sure yS = single precision
2107     psq_l       fp5, 8(src),        0, 0;
2108     frsp        zS, zS;                     // to make sure zS = single precision
2109     ps_muls0    fp4, fp4, xS;
2110     psq_l       fp6, 16(src),       0, 0;
2111     ps_muls0    fp5, fp5, xS;
2112     psq_l       fp7, 24(src),       0, 0;
2113     ps_muls0    fp6, fp6, yS;
2114     psq_l       fp8, 32(src),       0, 0;
2115     psq_st      fp4, 0(dst),        0, 0;
2116     ps_muls0    fp7, fp7, yS;
2117     psq_l       fp2, 40(src),       0, 0;
2118     psq_st      fp5, 8(dst),        0, 0;
2119     ps_muls0    fp8, fp8, zS;
2120     psq_st      fp6, 16(dst),       0, 0;
2121     ps_muls0    fp2, fp2, zS;
2122     psq_st      fp7, 24(dst),       0, 0;
2123     psq_st      fp8, 32(dst),       0, 0;
2124     psq_st      fp2, 40(dst),       0, 0;
2125     blr;
2126 }
2127 #endif  // GEKKO
2128 
2129 /*---------------------------------------------------------------------*
2130 
2131 Name:            MTXQuat
2132 
2133 Description:     sets a rotation matrix from a quaternion.
2134 
2135 
2136 Arguments:       m       matrix to be set
2137 
2138                  q        ptr to quaternion.
2139 
2140 Return   :          none
2141 
2142  *---------------------------------------------------------------------*/
2143 /*---------------------------------------------------------------------*
2144     C version
2145  *---------------------------------------------------------------------*/
C_MTXQuat(Mtx m,const Quaternion * q)2146 void C_MTXQuat ( Mtx m, const Quaternion *q )
2147 {
2148 
2149     f32 s,xs,ys,zs,wx,wy,wz,xx,xy,xz,yy,yz,zz;
2150 
2151 
2152     ASSERTMSG( (m != 0),                         MTX_QUAT_1     );
2153     ASSERTMSG( (q != 0),                         MTX_QUAT_2     );
2154     ASSERTMSG( ( q->x || q->y || q->z || q->w ), MTX_QUAT_3     );
2155 
2156 
2157     s = 2.0f / ( (q->x * q->x) + (q->y * q->y) + (q->z * q->z) + (q->w * q->w) );
2158 
2159     xs = q->x *  s;     ys = q->y *  s;  zs = q->z *  s;
2160     wx = q->w * xs;     wy = q->w * ys;  wz = q->w * zs;
2161     xx = q->x * xs;     xy = q->x * ys;  xz = q->x * zs;
2162     yy = q->y * ys;     yz = q->y * zs;  zz = q->z * zs;
2163 
2164 
2165     m[0][0] = 1.0f - (yy + zz);
2166     m[0][1] = xy   - wz;
2167     m[0][2] = xz   + wy;
2168     m[0][3] = 0.0f;
2169 
2170     m[1][0] = xy   + wz;
2171     m[1][1] = 1.0f - (xx + zz);
2172     m[1][2] = yz   - wx;
2173     m[1][3] = 0.0f;
2174 
2175     m[2][0] = xz   - wy;
2176     m[2][1] = yz   + wx;
2177     m[2][2] = 1.0f - (xx + yy);
2178     m[2][3] = 0.0f;
2179 
2180 }
2181 
2182 /*---------------------------------------------------------------------*
2183     Paired-Single assembler version
2184  *---------------------------------------------------------------------*
2185                 Note that this performs NO error checking.
2186  *---------------------------------------------------------------------*/
2187 #ifdef GEKKO
PSMTXQuat(register Mtx m,const register Quaternion * q)2188 void PSMTXQuat ( register Mtx m, const register Quaternion *q )
2189 {
2190     register f32    c_zero, c_one, c_two, scale;
2191     register f32    tmp0, tmp1, tmp2, tmp3, tmp4;
2192     register f32    tmp5, tmp6, tmp7, tmp8, tmp9;
2193 
2194     c_one = 1.0F;
2195 
2196     asm
2197     {
2198         // tmp0 = [qx][qy] : LOAD
2199         psq_l       tmp0, 0(q), 0, 0
2200         // tmp1 = [qz][qw] : LOAD
2201         psq_l       tmp1, 8(q), 0, 0
2202         // c_zero = [0.0F][0.0F]
2203         fsubs       c_zero, c_one, c_one
2204         // c_two  = [2.0F][2.0F]
2205         fadds       c_two, c_one, c_one
2206         // tmp2 = [qx*qx][qy*qy]
2207         ps_mul      tmp2, tmp0, tmp0
2208         // tmp5 = [qy][qx]
2209         ps_merge10  tmp5, tmp0, tmp0
2210         // tmp4 = [qx*qx+qz*qz][qy*qy+qw*qw]
2211         ps_madd     tmp4, tmp1, tmp1, tmp2
2212         // tmp3 = [qz*qz][qw*qw]
2213         ps_mul      tmp3, tmp1, tmp1
2214             // scale = [qx*qx+qy*qy+qz*qz+qw*qw][?]
2215             ps_sum0     scale, tmp4, tmp4, tmp4
2216         // tmp7 = [qy*qw][qx*qw]
2217         ps_muls1    tmp7, tmp5, tmp1
2218             // Newton-Rapson refinement (1/X) : E' = 2E-X*E*E
2219             // tmp9 = [E = Est.(1/X)]
2220             fres        tmp9, scale
2221         // tmp4 = [qx*qx+qz*qz][qy*qy+qz*qz]
2222         ps_sum1     tmp4, tmp3, tmp4, tmp2
2223             // scale = [2-X*E]
2224             ps_nmsub    scale, scale, tmp9, c_two
2225         // tmp6 = [qz*qw][?]
2226         ps_muls1    tmp6, tmp1, tmp1
2227             // scale = [E(2-scale*E) = E']
2228             ps_mul      scale, tmp9, scale
2229         // tmp2 = [qx*qx+qy*qy]
2230         ps_sum0     tmp2, tmp2, tmp2, tmp2
2231             // scale = [s = 2E' = 2.0F/(qx*qx+qy*qy+qz*qz+qw*qw)]
2232             fmuls       scale, scale, c_two
2233         // tmp8 = [qx*qy+qz*qw][?]
2234         ps_madd     tmp8, tmp0, tmp5, tmp6
2235         // tmp6 = [qx*qy-qz*qw][?]
2236         ps_msub     tmp6, tmp0, tmp5, tmp6
2237             // c_zero [m03] : STORE
2238             psq_st      c_zero, 12(m), 1, 0
2239         // tmp2 = [1-s(qx*qx+qy*qy)]   : [m22]
2240         ps_nmsub    tmp2, tmp2, scale, c_one
2241         // tmp4 = [1-s(qx*qx+qz*qz)][1-s(qy*qy+qz*qz)] : [m11][m00]
2242         ps_nmsub    tmp4, tmp4, scale, c_one
2243             // c_zero [m23] : STORE
2244             psq_st      c_zero, 44(m), 1, 0
2245         // tmp8 = [s(qx*qy+qz*qw)][?]  : [m10]
2246         ps_mul      tmp8, tmp8, scale
2247         // tmp6 = [s(qx*qy-qz*qw)][?]  : [m01]
2248         ps_mul      tmp6, tmp6, scale
2249             // tmp2 [m22] : STORE
2250             psq_st      tmp2, 40(m), 1, 0
2251         // tmp5 = [qx*qz+qy*qw][qy*qz+qx*qw]
2252         ps_madds0   tmp5, tmp0, tmp1, tmp7
2253         // tmp1 = [m10][m11]
2254         ps_merge00  tmp1, tmp8, tmp4
2255         // tmp7 = [qx*qz-qy*qw][qy*qz-qx*qw]
2256         ps_nmsub    tmp7, tmp7, c_two, tmp5
2257         // tmp0 = [m00][m01]
2258         ps_merge10  tmp0, tmp4, tmp6
2259             // tmp1 [m10][m11] : STORE
2260             psq_st      tmp1, 16(m), 0, 0
2261         // tmp5 = [s(qx*qz+qy*qw)][s(qy*qz+qx*qw)] : [m02][m21]
2262         ps_mul      tmp5, tmp5, scale
2263         // tmp7 = [s(qx*qz-qy*qw)][s(qy*qz-qx*qw)] : [m20][m12]
2264         ps_mul      tmp7, tmp7, scale
2265             // tmp0 [m00][m01] : STORE
2266             psq_st      tmp0,  0(m), 0, 0
2267             // tmp5 [m02] : STORE
2268             psq_st      tmp5,  8(m), 1, 0
2269         // tmp3 = [m12][m13]
2270         ps_merge10  tmp3, tmp7, c_zero
2271         // tmp9 = [m20][m21]
2272         ps_merge01  tmp9, tmp7, tmp5
2273             // tmp3 [m12][m13] : STORE
2274             psq_st      tmp3, 24(m), 0, 0
2275             // tmp9 [m20][m21] : STORE
2276             psq_st      tmp9, 32(m), 0, 0
2277 
2278     }
2279 }
2280 #endif // GEKKO
2281 
2282 /*---------------------------------------------------------------------*
2283 
2284 Name:           MTXReflect
2285 
2286 Description:    reflect a rotation matrix with respect to a plane.
2287 
2288 
2289 Arguments:      m       matrix to be set
2290 
2291                 p        point on the planar reflector.
2292 
2293                 n       normal of the planar reflector.
2294 
2295 Return   :         none
2296 
2297  *---------------------------------------------------------------------*/
2298 /*---------------------------------------------------------------------*
2299     C version
2300  *---------------------------------------------------------------------*/
C_MTXReflect(Mtx m,const Vec * p,const Vec * n)2301 void C_MTXReflect ( Mtx m, const Vec *p, const Vec *n )
2302 {
2303     f32 vxy, vxz, vyz, pdotn;
2304 
2305     vxy   = -2.0f * n->x * n->y;
2306     vxz   = -2.0f * n->x * n->z;
2307     vyz   = -2.0f * n->y * n->z;
2308     pdotn = 2.0f * C_VECDotProduct(p, n);
2309 
2310     m[0][0] = 1.0f - 2.0f * n->x * n->x;
2311     m[0][1] = vxy;
2312     m[0][2] = vxz;
2313     m[0][3] = pdotn * n->x;
2314 
2315     m[1][0] = vxy;
2316     m[1][1] = 1.0f - 2.0f * n->y * n->y;
2317     m[1][2] = vyz;
2318     m[1][3] = pdotn * n->y;
2319 
2320     m[2][0] = vxz;
2321     m[2][1] = vyz;
2322     m[2][2] = 1.0f - 2.0f * n->z * n->z;
2323     m[2][3] = pdotn * n->z;
2324 }
2325 
2326 /*---------------------------------------------------------------------*
2327     Paired-Single assembler version
2328  *---------------------------------------------------------------------*/
2329 #ifdef GEKKO
PSMTXReflect(register Mtx m,const register Vec * p,const register Vec * n)2330 void PSMTXReflect( register Mtx m, const register Vec *p, const register Vec *n )
2331 {
2332     register f32    c_one = 1.0F;
2333     register f32    vn_xy, vn_z1, n2vn_xy, n2vn_z1, pdotn;
2334     register f32    tmp0, tmp1, tmp2, tmp3;
2335     register f32    tmp4, tmp5, tmp6, tmp7;
2336 
2337     asm
2338     {
2339             // vn_z1 = [nz][1.0F] : LOAD
2340             psq_l       vn_z1, 8(n), 1, 0
2341             // vn_xy = [nx][ny]   : LOAD
2342             psq_l       vn_xy, 0(n), 0, 0
2343 
2344             // tmp0 = [px][py]   : LOAD
2345             psq_l       tmp0,  0(p), 0, 0
2346         // n2vn_z1 = [-2nz][-2.0F]
2347         ps_nmadd    n2vn_z1, vn_z1, c_one, vn_z1
2348             // tmp1 = [pz][1.0F] : LOAD
2349             psq_l       tmp1,  8(p), 1, 0
2350         // n2vn_xy = [-2nx][-2ny]
2351         ps_nmadd    n2vn_xy, vn_xy, c_one, vn_xy
2352 
2353         // tmp4 = [-2nx*nz][-2ny*nz]   : [m20][m21]
2354         ps_muls0    tmp4, vn_xy, n2vn_z1
2355         // pdotn = [-2(px*nx)][-2(py*ny)]
2356         ps_mul      pdotn, n2vn_xy, tmp0
2357         // tmp2 = [-2nx*nx][-2nx*ny]
2358         ps_muls0    tmp2, vn_xy, n2vn_xy
2359         // pdotn = [-2(px*nx+py*ny)][?]
2360         ps_sum0     pdotn, pdotn, pdotn, pdotn
2361         // tmp3 = [-2nx*ny][-2ny*ny]
2362         ps_muls1    tmp3, vn_xy, n2vn_xy
2363             // tmp4 = [m20][m21] : STORE
2364             psq_st      tmp4, 32(m), 0, 0
2365         // tmp2 = [1-2nx*nx][-2nx*ny]  : [m00][m01]
2366         ps_sum0     tmp2, tmp2, tmp2, c_one
2367         // pdotn = [2(px*nx+py*ny+pz*nz)][?]
2368         ps_nmadd    pdotn, n2vn_z1, tmp1, pdotn
2369         // tmp3 = [-2nx*ny][1-2ny*ny]  : [m10][m11]
2370         ps_sum1     tmp3, c_one, tmp3, tmp3
2371             // tmp2 = [m00][m01] : STORE
2372             psq_st      tmp2,  0(m), 0, 0
2373         // tmp5 = [pdotn*nx][pdotn*ny]
2374         ps_muls0    tmp5, vn_xy, pdotn
2375         // tmp6 = [-2nz][pdotn]
2376         ps_merge00  tmp6, n2vn_z1, pdotn
2377             // tmp3 = [m10][m11] : STORE
2378             psq_st      tmp3, 16(m), 0, 0
2379 
2380         // tmp7 = [-2nx*nz][pdotn*nx]  : [m02][m03]
2381         ps_merge00  tmp7, tmp4, tmp5
2382         // tmp6 = [-2nz*nz][pdotn*nz]
2383         ps_muls0    tmp6, tmp6, vn_z1
2384         // tmp5 = [-2ny*nz][pdotn*ny]  : [m12][m13]
2385         ps_merge11  tmp5, tmp4, tmp5
2386             // tmp7 = [m02][m03] : STORE
2387             psq_st      tmp7,  8(m), 0, 0
2388         // tmp6 = [1-2nz*nz][pdotn*nz] : [m22][m23]
2389         ps_sum0     tmp6, tmp6, tmp6, c_one
2390             // tmp5 = [m12][m13] : STORE
2391             psq_st      tmp5, 24(m), 0, 0
2392             // tmp6 = [m22][m23] : STORE
2393             psq_st      tmp6, 40(m), 0, 0
2394     }
2395 }
2396 #endif // GEKKO
2397 
2398 /*---------------------------------------------------------------------*
2399 
2400 
2401 
2402 
2403 
2404                              VIEW   SECTION
2405 
2406 
2407 
2408 
2409 
2410 *---------------------------------------------------------------------*/
2411 
2412 
2413 
2414 
2415 
2416 
2417 /*---------------------------------------------------------------------*
2418 
2419 Name:           MTXLookAt
2420 
2421 Description:    compute a matrix to transform points to camera coordinates.
2422 
2423 
2424 Arguments:      m       matrix to be set
2425 
2426                 camPos   camera position.
2427 
2428                 camUp    camera 'up' direction.
2429 
2430                 target   camera aim point.
2431 
2432 
2433 Return   :         none
2434 
2435 *---------------------------------------------------------------------*/
2436 /*---------------------------------------------------------------------*
2437     C version
2438  *---------------------------------------------------------------------*/
C_MTXLookAt(Mtx m,const Point3d * camPos,const Vec * camUp,const Point3d * target)2439 void C_MTXLookAt ( Mtx m, const Point3d *camPos, const Vec *camUp, const Point3d *target )
2440 {
2441 
2442     Vec vLook,vRight,vUp;
2443 
2444 
2445     ASSERTMSG( (m != 0),      MTX_LOOKAT_1    );
2446     ASSERTMSG( (camPos != 0), MTX_LOOKAT_2    );
2447     ASSERTMSG( (camUp  != 0), MTX_LOOKAT_3    );
2448     ASSERTMSG( (target != 0), MTX_LOOKAT_4    );
2449 
2450 
2451     // compute unit target vector
2452     // use negative value to look down (-Z) axis
2453     vLook.x = camPos->x - target->x;
2454     vLook.y = camPos->y - target->y;
2455     vLook.z = camPos->z - target->z;
2456     VECNormalize( &vLook,&vLook );
2457 
2458 
2459     // vRight = camUp x vLook
2460     VECCrossProduct    ( camUp, &vLook, &vRight );
2461     VECNormalize( &vRight,&vRight );
2462 
2463 
2464     // vUp = vLook x vRight
2465     VECCrossProduct( &vLook, &vRight, &vUp );
2466     // Don't need to normalize vUp since it should already be unit length
2467     // VECNormalize( &vUp, &vUp );
2468 
2469 
2470     m[0][0] = vRight.x;
2471     m[0][1] = vRight.y;
2472     m[0][2] = vRight.z;
2473     m[0][3] = -( camPos->x * vRight.x + camPos->y * vRight.y + camPos->z * vRight.z );
2474 
2475     m[1][0] = vUp.x;
2476     m[1][1] = vUp.y;
2477     m[1][2] = vUp.z;
2478     m[1][3] = -( camPos->x * vUp.x + camPos->y * vUp.y + camPos->z * vUp.z );
2479 
2480     m[2][0] = vLook.x;
2481     m[2][1] = vLook.y;
2482     m[2][2] = vLook.z;
2483     m[2][3] = -( camPos->x * vLook.x + camPos->y * vLook.y + camPos->z * vLook.z );
2484 
2485 
2486 }
2487 
2488 /*---------------------------------------------------------------------*
2489 
2490 
2491 
2492 
2493 
2494                        TEXTURE PROJECTION SECTION
2495 
2496 
2497 
2498 
2499 
2500 *---------------------------------------------------------------------*/
2501 
2502 
2503 /*---------------------------------------------------------------------*
2504 
2505 Name:           MTXLightFrustum
2506 
2507 Description:    Compute a 3x4 projection matrix for texture projection
2508 
2509 
2510 Arguments:      m        3x4 matrix to be set
2511 
2512                 t        top coord. of view volume at the near clipping plane
2513 
2514                 b        bottom coord of view volume at the near clipping plane
2515 
2516                 l        left coord. of view volume at near clipping plane
2517 
2518                 r        right coord. of view volume at near clipping plane
2519 
2520                 n            positive distance from camera to near clipping plane
2521 
2522                 scaleS   scale in the S direction for projected coordinates
2523                          (usually 0.5)
2524 
2525                 scaleT   scale in the T direction for projected coordinates
2526                          (usually 0.5)
2527 
2528                 transS   translate in the S direction for projected coordinates
2529                          (usually 0.5)
2530 
2531                 transT   translate in the T direction for projected coordinates
2532                          (usually 0.5)
2533 
2534 
2535 Return   :         none.
2536 
2537  *---------------------------------------------------------------------*/
2538 /*---------------------------------------------------------------------*
2539     C version
2540  *---------------------------------------------------------------------*/
C_MTXLightFrustum(Mtx m,float t,float b,float l,float r,float n,float scaleS,float scaleT,float transS,float transT)2541 void C_MTXLightFrustum  ( Mtx m, float t, float b, float l, float r, float n,
2542                           float scaleS, float scaleT, float transS,
2543                           float transT )
2544 {
2545     f32 tmp;
2546 
2547 
2548     ASSERTMSG( (m != 0),  MTX_LIGHT_FRUSTUM_1  );
2549     ASSERTMSG( (t != b),  MTX_LIGHT_FRUSTUM_2  );
2550     ASSERTMSG( (l != r),  MTX_LIGHT_FRUSTUM_3  );
2551 
2552 
2553     // NOTE: Be careful about "l" vs. "1" below!!!
2554 
2555     tmp     =  1.0f / (r - l);
2556     m[0][0] =  ((2*n) * tmp) * scaleS;
2557     m[0][1] =  0.0f;
2558     m[0][2] =  (((r + l) * tmp) * scaleS) - transS;
2559     m[0][3] =  0.0f;
2560 
2561     tmp     =  1.0f / (t - b);
2562     m[1][0] =  0.0f;
2563     m[1][1] =  ((2*n) * tmp) * scaleT;
2564     m[1][2] =  (((t + b) * tmp) * scaleT) - transT;
2565     m[1][3] =  0.0f;
2566 
2567     m[2][0] =  0.0f;
2568     m[2][1] =  0.0f;
2569     m[2][2] = -1.0f;
2570     m[2][3] =  0.0f;
2571 }
2572 
2573 /*---------------------------------------------------------------------*
2574 
2575 Name:           MTXLightPerspective
2576 
2577 Description:    compute a 3x4 perspective projection matrix from
2578                 field of view and aspect ratio for texture projection.
2579 
2580 
2581 Arguments:      m        3x4 matrix to be set
2582 
2583                 fovy     total field of view in in degrees in the YZ plane
2584 
2585                 aspect       ratio of view window width:height (X / Y)
2586 
2587                 scaleS   scale in the S direction for projected coordinates
2588                          (usually 0.5)
2589 
2590                 scaleT   scale in the T direction for projected coordinates
2591                          (usually 0.5)
2592 
2593                 transS   translate in the S direction for projected coordinates
2594                          (usually 0.5)
2595 
2596                 transT   translate in the T direction for projected coordinates
2597                          (usually 0.5)
2598 
2599 
2600 Return   :         none
2601 
2602  *---------------------------------------------------------------------*/
2603 /*---------------------------------------------------------------------*
2604     C version
2605  *---------------------------------------------------------------------*/
C_MTXLightPerspective(Mtx m,f32 fovY,f32 aspect,float scaleS,float scaleT,float transS,float transT)2606 void C_MTXLightPerspective  ( Mtx m, f32 fovY, f32 aspect, float scaleS,
2607                               float scaleT, float transS, float transT )
2608 {
2609     f32 angle;
2610     f32 cot;
2611 
2612     ASSERTMSG( (m != 0),                            MTX_LIGHT_PERSPECTIVE_1  );
2613     ASSERTMSG( ( (fovY > 0.0) && ( fovY < 180.0) ), MTX_LIGHT_PERSPECTIVE_2  );
2614     ASSERTMSG( (aspect != 0),                       MTX_LIGHT_PERSPECTIVE_3  );
2615 
2616     // find the cotangent of half the (YZ) field of view
2617     angle = fovY * 0.5f;
2618     angle = MTXDegToRad( angle );
2619 
2620     cot = 1.0f / tanf(angle);
2621 
2622     m[0][0] =    (cot / aspect) * scaleS;
2623     m[0][1] =    0.0f;
2624     m[0][2] =    -transS;
2625     m[0][3] =    0.0f;
2626 
2627     m[1][0] =    0.0f;
2628     m[1][1] =    cot * scaleT;
2629     m[1][2] =    -transT;
2630     m[1][3] =    0.0f;
2631 
2632     m[2][0] =    0.0f;
2633     m[2][1] =    0.0f;
2634     m[2][2] =   -1.0f;
2635     m[2][3] =    0.0f;
2636 }
2637 
2638 /*---------------------------------------------------------------------*
2639 
2640 Name:           MTXLightOrtho
2641 
2642 Description:    compute a 3x4 orthographic projection matrix.
2643 
2644 
2645 Arguments:      m       matrix to be set
2646 
2647                 t        top coord. of parallel view volume
2648 
2649                 b        bottom coord of parallel view volume
2650 
2651                 l        left coord. of parallel view volume
2652 
2653                 r        right coord. of parallel view volume
2654 
2655                 scaleS   scale in the S direction for projected coordinates
2656                          (usually 0.5)
2657 
2658                 scaleT   scale in the T direction for projected coordinates
2659                          (usually 0.5)
2660 
2661                 transS   translate in the S direction for projected coordinates
2662                          (usually 0.5)
2663 
2664                 transT   translate in the T direction for projected coordinates
2665                          (usually 0.5)
2666 
2667 
2668 Return   :         none
2669 
2670  *---------------------------------------------------------------------*/
2671 /*---------------------------------------------------------------------*
2672     C version
2673  *---------------------------------------------------------------------*/
C_MTXLightOrtho(Mtx m,f32 t,f32 b,f32 l,f32 r,float scaleS,float scaleT,float transS,float transT)2674 void C_MTXLightOrtho ( Mtx m, f32 t, f32 b, f32 l, f32 r, float scaleS,
2675                               float scaleT, float transS, float transT )
2676 {
2677     f32 tmp;
2678 
2679 
2680     ASSERTMSG( (m != 0),  MTX_LIGHT_ORTHO_1     );
2681     ASSERTMSG( (t != b),  MTX_LIGHT_ORTHO_2     );
2682     ASSERTMSG( (l != r),  MTX_LIGHT_ORTHO_3     );
2683 
2684 
2685     // NOTE: Be careful about "l" vs. "1" below!!!
2686 
2687     tmp     =  1.0f / (r - l);
2688     m[0][0] =  (2.0f * tmp * scaleS);
2689     m[0][1] =  0.0f;
2690     m[0][2] =  0.0f;
2691     m[0][3] =  ((-(r + l) * tmp) * scaleS) + transS;
2692 
2693     tmp     =  1.0f / (t - b);
2694     m[1][0] =  0.0f;
2695     m[1][1] =  (2.0f * tmp) * scaleT;
2696     m[1][2] =  0.0f;
2697     m[1][3] =  ((-(t + b) * tmp)* scaleT) + transT;
2698 
2699     m[2][0] =  0.0f;
2700     m[2][1] =  0.0f;
2701     m[2][2] =  0.0f;
2702     m[2][3] =  1.0f;
2703 }
2704 
2705 /*===========================================================================*/
2706 
2707 #if ( __MWERKS__ == 0x00004100 )
2708 #pragma defer_codegen reset
2709 #endif
2710 
2711