1 /*---------------------------------------------------------------------------*
2   Project: matrix vector Library
3   File:    mtx.c
4 
5   Copyright 1998 - 2006 Nintendo.  All rights reserved.
6 
7   These coded instructions, statements, and computer programs contain
8   proprietary information of Nintendo of America Inc. and/or Nintendo
9   Company Ltd., and are protected by Federal copyright law.     They may
10   not be disclosed to third parties or copied or duplicated in any form,
11   in whole or in part, without the prior written consent of Nintendo.
12 
13 
14   $Log: mtx.c,v $
15   Revision 1.6  2008/05/23 08:21:52  nakano_yoshinobu
16   Improved the performance of PSMTXInverse and PSInvXpose
17 
18   Revision 1.5  2008/05/23 01:50:36  nakano_yoshinobu
19   Fixed overflow in PSMTXInverse.
20   Changed ( E' = 2E - K*E*E ) to ( E' = ( 2 - K * E ) * E ).
21 
22   Revision 1.4  2007/01/11 00:45:26  aka
23   Removed win32.h.
24 
25   Revision 1.3  2006/02/20 04:25:42  mitu
26   changed include path from dolphin/ to revolution/.
27 
28   Revision 1.2  2005/12/30 06:01:51  hirose
29   Temporary workaround for CW3.0 Alpha3 problem.
30 
31 
32     30    02/06/26 13:49 Hirose
33     Fixed a register allocation problem with DEBUG build.
34 
35     29    02/06/20 11:21 Hirose
36     Added MTXConcatArray.
37 
38     28    02/04/11 13:10 Hirose
39     const type specifier support. (worked by Hiratsu@IRD)
40 
41     27    02/02/18 9:27 Hirose
42     Workaround for floating-point precision mismatch issue.
43 
44     26    9/13/01 4:28p Hirose
45     Removed unnecessary possibilities of zero division exception.
46 
47     25    8/28/01 3:48p Hirose
48     Removed one refinement process in the calculation of square root.
49 
50     24    7/30/01 10:21p Hirose
51     Changes for function definitions.
52 
53     23    7/23/01 8:45p Hirose
54     Now all model matrix functions and matrix-vector functions have PS
55     version.
56 
57     22    7/07/01 7:37p Hirose
58     added PS versions of MTXTrans* / MTXScale* functions.
59 
60     21    3/29/01 8:23p Hirose
61     fixed assertion messages
62 
63     20    3/29/01 3:06p Hirose
64     added MTXInvXpose
65 
66     19    3/16/01 11:43p Hirose
67     added PSMTXInverse
68 
69     18    2/22/01 11:58p Hirose
70     Moved some sections into other files.
71     Added some new Paired-Single functions. Etc.
72 
73     17    7/31/00 2:07p Carl
74     Removed redundant verify in MTXRotRad.
75     Minor potential optimization in VECNormalize.
76 
77     16    7/21/00 4:19p Carl
78     Removed as many divides as possible.
79 
80     15    7/21/00 2:58p Carl
81     Removed unnecessary VECNormalize in MTXLookAt.
82 
83     14    7/12/00 4:41p John
84     Substitues MTXConcat and MTXMultVecArray with their paired-singles
85     equivalent for Gekko nondebug builds.
86 
87     13    7/07/00 7:09p Dante
88     PC Compatibility
89 
90     12    5/10/00 1:50p Hirose
91     added radian-based rotation matrix functions
92     moved paired-single matrix stuff into an another source file
93 
94     11    4/13/00 11:26a Danm
95     Restore hgh performance version of ROMulVecArray.
96 
97     10    4/10/00 11:56a Danm
98     Added 2 matrix skinning support.
99 
100     9     3/27/00 6:38p Carl
101     Fixed other comments.
102 
103     8     3/27/00 6:20p Carl
104     Changed comment regarding z scale in MTXFrustum.
105 
106     7     3/22/00 2:01p John
107     Added VECSquareDistance and VECDistance.
108 
109     6     2/14/00 2:04p Mikepc
110     detabbed code.
111 
112     5     2/11/00 4:22p Tian
113     Cleaned up reordered paired single code
114 
115     4     1/27/00 5:21p Tian
116     Documented paired single ops.
117 
118     3     1/27/00 4:56p Tian
119     Implemented fast paired single matrix ops : PSMTXReorder, PSMTXConcat,
120     PSMTXROMultVecArray, PSMTXMultVecArray, PSMTXMultS16VecArray,
121     PSMTXROMultS16VecArray
122 
123     2     12/06/99 12:40p Alligator
124     modify proj mtx to scale/translate z to (0, w) range for hardware
125 
126     20    11/17/99 4:32p Ryan
127     update to fin win32 compiler warnings
128 
129     19    11/12/99 12:08p Mikepc
130     changed #include<dolphin/mtx/mtxAssert.h> to #include"mtxAssert.h" to
131     reflect new location of mtxAssert.h file.
132 
133     18    11/10/99 7:21p Hirose
134     added singular case check for VECHalfAngle
135 
136     17    11/10/99 4:40p Alligator
137     added MTXReflect
138 
139     16    10/06/99 12:28p Yasu
140     Appended MTXMultVecSR and MTXMultVecArraySR
141 
142     15    8/31/99 5:41p Shiki
143     Revised to use sqrtf() instead of sqrt().
144 
145     14      8/31/99 5:18p Shiki
146     Revised to use sinf()/cosf() instead of sin()/cos().
147 
148     13      7/28/99 3:38p Ryan
149 
150     12      7/28/99 11:30a Ryan
151     Added Texture Projection functions
152 
153     11      7/02/99 12:53p Mikepc
154     changed mtx row/col
155 
156   $NoKeywords: $
157  *---------------------------------------------------------------------------*/
158 
159 #include <math.h>
160 #include <revolution/mtx.h>
161 #include "mtxAssert.h"
162 
163 #if ( __MWERKS__ == 0x00004100 )
164 #pragma defer_codegen on    // This will be removed when compiler is fixed.
165 #endif
166 
167 /*---------------------------------------------------------------------*
168     Constants
169  *---------------------------------------------------------------------*/
170 static f32 Unit01[] = { 0.0f, 1.0f };
171 
172 
173 /*---------------------------------------------------------------------*
174 
175 
176 
177 
178                             GENERAL SECTION
179 
180 
181 
182 
183 *---------------------------------------------------------------------*/
184 
185 
186 /*---------------------------------------------------------------------*
187 
188 Name:           MTXIdentity
189 
190 Description:    sets a matrix to identity
191 
192 Arguments:      m :  matrix to be set
193 
194 Return:         none
195 
196 *---------------------------------------------------------------------*/
197 /*---------------------------------------------------------------------*
198     C version
199  *---------------------------------------------------------------------*/
C_MTXIdentity(Mtx m)200 void C_MTXIdentity ( Mtx m )
201 {
202 
203     ASSERTMSG( (m != 0), MTX_IDENTITY_1 );
204 
205 
206     m[0][0] = 1.0f;     m[0][1] = 0.0f;  m[0][2] = 0.0f;  m[0][3] = 0.0f;
207 
208     m[1][0] = 0.0f;     m[1][1] = 1.0f;  m[1][2] = 0.0f;  m[1][3] = 0.0f;
209 
210     m[2][0] = 0.0f;     m[2][1] = 0.0f;  m[2][2] = 1.0f;  m[2][3] = 0.0f;
211 
212 }
213 
214 /*---------------------------------------------------------------------*
215     Paired-Single assembler version
216  *---------------------------------------------------------------------*
217             Note that this performs NO error checking.
218             Actually there is not much performance advantage.
219  *---------------------------------------------------------------------*/
220 #ifdef GEKKO
PSMTXIdentity(register Mtx m)221 void PSMTXIdentity
222 (
223     register Mtx m    // r3
224 )
225 {
226     register f32 c_zero = 0.0F; // { 0.0F, 0.0F };
227     register f32 c_one  = 1.0F; // { 1.0F, 1.0F };
228     register f32 c_01;
229     register f32 c_10;
230 
231     asm
232     {
233         psq_st      c_zero, 8(m),   0, 0     // m[0][2], m[0][3]
234         ps_merge01  c_01, c_zero, c_one      // { 0.1F, 1.0F }
235         psq_st      c_zero, 24(m),  0, 0     // m[1][2], m[1][3]
236         ps_merge10  c_10, c_one, c_zero      // fp2 = { 1.0F, 0.0F }
237         psq_st      c_zero, 32(m),  0, 0     // m[2][0], m[2][1]
238         psq_st      c_01,   16(m),  0, 0     // m[1][0], m[1][1]
239         psq_st      c_10,   0(m),   0, 0     // m[0][0], m[0][1]
240         psq_st      c_10,   40(m),  0, 0     // m[2][2], m[2][3]
241     }
242 }
243 #endif // GEKKO
244 
245 /*---------------------------------------------------------------------*
246 
247 Name:           MTXCopy
248 
249 Description:    copies the contents of one matrix into another
250 
251 Arguments:      src        source matrix for copy
252                 dst        destination matrix for copy
253 
254 
255 Return:         none
256 
257 *---------------------------------------------------------------------*/
258 /*---------------------------------------------------------------------*
259     C version
260  *---------------------------------------------------------------------*/
C_MTXCopy(const Mtx src,Mtx dst)261 void C_MTXCopy ( const Mtx src, Mtx dst )
262 {
263 
264     ASSERTMSG( (src != 0) , MTX_COPY_1 );
265     ASSERTMSG( (dst != 0) , MTX_COPY_2 );
266 
267     if( src == dst )
268     {
269         return;
270     }
271 
272 
273     dst[0][0] = src[0][0];    dst[0][1] = src[0][1];    dst[0][2] = src[0][2];    dst[0][3] = src[0][3];
274 
275     dst[1][0] = src[1][0];    dst[1][1] = src[1][1];    dst[1][2] = src[1][2];    dst[1][3] = src[1][3];
276 
277     dst[2][0] = src[2][0];    dst[2][1] = src[2][1];    dst[2][2] = src[2][2];    dst[2][3] = src[2][3];
278 
279 }
280 
281 /*---------------------------------------------------------------------*
282     Paired-Single assembler version
283  *---------------------------------------------------------------------*
284                 Note that this performs NO error checking.
285  *---------------------------------------------------------------------*/
286 #ifdef GEKKO
PSMTXCopy(const register Mtx src,register Mtx dst)287 asm void PSMTXCopy
288 (
289     const register Mtx src,   // r3
290     register Mtx dst    // r4
291 )
292 {
293     nofralloc
294 
295     psq_l       fp0, 0(src),   0, 0
296     psq_st      fp0, 0(dst),   0, 0
297     psq_l       fp1, 8(src),   0, 0
298     psq_st      fp1, 8(dst),   0, 0
299     psq_l       fp2, 16(src),  0, 0
300     psq_st      fp2, 16(dst),  0, 0
301     psq_l       fp3, 24(src),  0, 0
302     psq_st      fp3, 24(dst),  0, 0
303     psq_l       fp4, 32(src),  0, 0
304     psq_st      fp4, 32(dst),  0, 0
305     psq_l       fp5, 40(src),  0, 0
306     psq_st      fp5, 40(dst),  0, 0
307 
308     blr
309 }
310 #endif // GEKKO
311 
312 /*---------------------------------------------------------------------*
313 
314 Name:           MTXConcat
315 
316 Description:    concatenates two matrices.
317                 order of operation is A x B = AB.
318                 ok for any of ab == a == b.
319 
320                 saves a MTXCopy operation if ab != to a or b.
321 
322 Arguments:      a        first matrix for concat.
323                 b        second matrix for concat.
324                 ab       resultant matrix from concat.
325 
326 Return:         none
327 
328  *---------------------------------------------------------------------*/
329 
330 /*---------------------------------------------------------------------*
331     C version
332  *---------------------------------------------------------------------*/
C_MTXConcat(const Mtx a,const Mtx b,Mtx ab)333 void C_MTXConcat ( const Mtx a, const Mtx b, Mtx ab )
334 {
335     Mtx mTmp;
336     MtxPtr m;
337 
338     ASSERTMSG( (a  != 0), MTX_CONCAT_1 );
339     ASSERTMSG( (b  != 0), MTX_CONCAT_2 );
340     ASSERTMSG( (ab != 0), MTX_CONCAT_3 );
341 
342     if( (ab == a) || (ab == b) )
343     {
344         m = mTmp;
345     }
346 
347     else
348     {
349         m = ab;
350     }
351 
352     // compute (a x b) -> m
353 
354     m[0][0] = a[0][0]*b[0][0] + a[0][1]*b[1][0] + a[0][2]*b[2][0];
355     m[0][1] = a[0][0]*b[0][1] + a[0][1]*b[1][1] + a[0][2]*b[2][1];
356     m[0][2] = a[0][0]*b[0][2] + a[0][1]*b[1][2] + a[0][2]*b[2][2];
357     m[0][3] = a[0][0]*b[0][3] + a[0][1]*b[1][3] + a[0][2]*b[2][3] + a[0][3];
358 
359     m[1][0] = a[1][0]*b[0][0] + a[1][1]*b[1][0] + a[1][2]*b[2][0];
360     m[1][1] = a[1][0]*b[0][1] + a[1][1]*b[1][1] + a[1][2]*b[2][1];
361     m[1][2] = a[1][0]*b[0][2] + a[1][1]*b[1][2] + a[1][2]*b[2][2];
362     m[1][3] = a[1][0]*b[0][3] + a[1][1]*b[1][3] + a[1][2]*b[2][3] + a[1][3];
363 
364     m[2][0] = a[2][0]*b[0][0] + a[2][1]*b[1][0] + a[2][2]*b[2][0];
365     m[2][1] = a[2][0]*b[0][1] + a[2][1]*b[1][1] + a[2][2]*b[2][1];
366     m[2][2] = a[2][0]*b[0][2] + a[2][1]*b[1][2] + a[2][2]*b[2][2];
367     m[2][3] = a[2][0]*b[0][3] + a[2][1]*b[1][3] + a[2][2]*b[2][3] + a[2][3];
368 
369 
370     // overwrite a or b if needed
371     if(m == mTmp)
372     {
373         C_MTXCopy( mTmp, ab );
374     }
375 }
376 
377 /*---------------------------------------------------------------------*
378     Paired-Single assembler version
379  *---------------------------------------------------------------------*
380                 Note that this performs NO error checking.
381  *---------------------------------------------------------------------*/
382 #ifdef GEKKO
PSMTXConcat(const register Mtx mA,const register Mtx mB,register Mtx mAB)383 asm void PSMTXConcat
384 (
385     const register Mtx mA,    // r3
386     const register Mtx mB,    // r4
387           register Mtx mAB    // r5
388 )
389 {
390     nofralloc
391 
392 #define A00_A01 fp0
393 #define A02_A03 fp1
394 #define A10_A11 fp2
395 #define A12_A13 fp3
396 #define A20_A21 fp4
397 #define A22_A23 fp5
398 
399 #define B00_B01 fp6
400 #define B02_B03 fp7
401 #define B10_B11 fp8
402 #define B12_B13 fp9
403 #define B20_B21 fp10
404 #define B22_B23 fp11
405 
406 #define D00_D01 fp12
407 #define D02_D03 fp13
408 #define D10_D11 fp14
409 #define D12_D13 fp15
410 #define D20_D21 fp2
411 #define D22_D23 fp0
412 
413 #define UNIT01  fp31
414 
415     // don't save LR since we don't make any function calls
416     //    mflr    r0
417     //    stw     r0, 4(r1)
418     stwu    r1, -64(r1)
419     psq_l   A00_A01, 0(mA), 0, 0
420     stfd    fp14, 8(r1)
421     psq_l   B00_B01, 0(mB), 0, 0
422     addis   r6, 0, Unit01@ha
423     psq_l   B02_B03, 8(mB), 0, 0
424     stfd    fp15, 16(r1)
425     addi    r6, r6, Unit01@l
426     stfd    fp31, 40(r1)
427     psq_l   B10_B11, 16(mB), 0, 0
428     // D00_D01 = b00a00 , b01a00
429     ps_muls0 D00_D01, B00_B01, A00_A01
430     psq_l   A10_A11, 16(mA), 0, 0
431     // D02_D03 = b02a00 , b03a00
432     ps_muls0 D02_D03, B02_B03, A00_A01
433     psq_l   UNIT01, 0(r6), 0, 0
434     // D10_D11 = a10b00 , a10b01
435     ps_muls0 D10_D11, B00_B01, A10_A11
436     psq_l   B12_B13, 24(mB), 0, 0
437     // D12_D13 = a10b02 , a10b03
438     ps_muls0 D12_D13, B02_B03, A10_A11
439     psq_l   A02_A03, 8(mA), 0, 0
440         // fp12 = b10a01 + b00a00 , b11a01 + b01a00
441         ps_madds1 D00_D01, B10_B11, A00_A01, D00_D01
442     psq_l   A12_A13, 24(mA), 0, 0
443         // D10_D11 = a10b00 + a11b10 , a10b01 + a11b11
444         ps_madds1 D10_D11, B10_B11, A10_A11, D10_D11
445     psq_l   B20_B21, 32(mB), 0, 0
446         // D02_D03 = b12a01 + b02a00 , b13a01 + b03a00
447         ps_madds1 D02_D03, B12_B13, A00_A01, D02_D03  // YYY LAST TIME FP0 IS USED
448     psq_l   B22_B23, 40(mB), 0, 0
449         // D12_D13 = a10b02 + a11b12, a10b03+a11b13
450         ps_madds1 D12_D13, B12_B13, A10_A11, D12_D13 // YYY LAST TIME FP2 IS USED
451     psq_l   A20_A21, 32(mA), 0, 0
452     psq_l   A22_A23, 40(mA), 0, 0
453             // D00_D01 = b20a02 + b10a01 + b00a00 , b21a02 + b11a01 + b01a00
454             ps_madds0 D00_D01, B20_B21, A02_A03, D00_D01 // m00, m01 computed
455             // D02_D03 = b12a01 + b02a00 + b22a02 , b13a01 + b03a00 + b23a02
456             ps_madds0 D02_D03, B22_B23, A02_A03, D02_D03
457             // D10_D11 = a10b00 + a11b10 +a12b20, a10b01 + a11b11 + a12b21
458             ps_madds0 D10_D11, B20_B21, A12_A13, D10_D11 // m10, m11 computed
459             // D12_D13 = a10b02 + a11b12 + a12b22, a10b03+a11b13 + a12b23 + a13
460             ps_madds0 D12_D13, B22_B23, A12_A13, D12_D13
461 
462 
463     // store m00m01
464     psq_st  D00_D01, 0(mAB), 0, 0 // YYY LAST TIME FP12 IS USED
465 
466     // D20_D21 = a20b00, a20b01
467     ps_muls0 D20_D21, B00_B01, A20_A21 // YYY LAST TIME FP6 IS USED
468                 // get a03 from fp1 and add to D02_D03
469                 ps_madds1 D02_D03, UNIT01, A02_A03, D02_D03 // m02, m03 computed
470                 // YYY LAST TIME FP1 IS USED
471     // D22_D23 = a20b02, a20b03
472     ps_muls0 D22_D23, B02_B03, A20_A21 // YYY LAST TIME FP7 IS USED
473     // store m10m11
474     psq_st  D10_D11, 16(mAB), 0, 0
475                 // get a13 from fp3 and add to D12_D13
476                 ps_madds1 D12_D13, UNIT01, A12_A13, D12_D13 // m12, m13 computed
477     // store m02m03
478     psq_st  D02_D03, 8(mAB), 0, 0 // YYY LAST TIME D02_D03 IS USED
479 
480         // D20_D21 = a20b00 + a21b10, a20b01 + a21b11
481         ps_madds1 D20_D21, B10_B11, A20_A21, D20_D21 // YYY LAST TIME FP8 IS USED
482         // D22_D23 = a20b02 + a21b12, a20b03 + a21b13
483         ps_madds1 D22_D23, B12_B13, A20_A21, D22_D23
484             // D20_D21 = a20b00 + a21b10 + a22b20, a20b01 + a21b11 + a22b21
485             ps_madds0 D20_D21, B20_B21, A22_A23, D20_D21
486     // Restore fp14
487     lfd    fp14, 8(r1)  // D10_D11
488     // store m12m13
489     psq_st  D12_D13, 24(mAB), 0, 0
490             // D22_D23 = a20b02 + a21b12 + a22b22, a20b03 + a21b13 + a22b23 + a23
491             ps_madds0 D22_D23, B22_B23, A22_A23, D22_D23
492     // store m20m21
493     psq_st  D20_D21, 32(mAB), 0, 0
494                 // get a23 from fp5 and add to fp17
495                 ps_madds1 D22_D23, UNIT01, A22_A23, D22_D23
496     // restore stack frame
497     lfd    fp15, 16(r1) // D12_D13
498     // store m22m23
499     psq_st  D22_D23, 40(mAB), 0, 0
500 
501     lfd    fp31, 40(r1)
502     addi   r1, r1, 64
503 
504 
505     blr
506 
507 
508 #undef A00_A01
509 #undef A02_A03
510 #undef A10_A11
511 #undef A12_A13
512 #undef A20_A21
513 #undef A22_A23
514 
515 #undef B00_B01
516 #undef B02_B03
517 #undef B10_B11
518 #undef B12_B13
519 #undef B20_B21
520 #undef B22_B23
521 
522 #undef D00_D01
523 #undef D02_D03
524 #undef D10_D11
525 #undef D12_D13
526 #undef D20_D21
527 #undef D22_D23
528 
529 #undef UNIT01
530 
531 
532 /*
533   / m[0][0] = a[0][0]*b[0][0] + a[0][1]*b[1][0] + a[0][2]*b[2][0];
534 fp12m[0][1] = a[0][0]*b[0][1] + a[0][1]*b[1][1] + a[0][2]*b[2][1];
535   / m[0][2] = a[0][0]*b[0][2] + a[0][1]*b[1][2] + a[0][2]*b[2][2];
536 fp13m[0][3] = a[0][0]*b[0][3] + a[0][1]*b[1][3] + a[0][2]*b[2][3] + a[0][3];
537 
538   / m[1][0] = a[1][0]*b[0][0] + a[1][1]*b[1][0] + a[1][2]*b[2][0];
539 fp14m[1][1] = a[1][0]*b[0][1] + a[1][1]*b[1][1] + a[1][2]*b[2][1];
540   / m[1][2] = a[1][0]*b[0][2] + a[1][1]*b[1][2] + a[1][2]*b[2][2];
541 fp15m[1][3] = a[1][0]*b[0][3] + a[1][1]*b[1][3] + a[1][2]*b[2][3] + a[1][3];
542 
543   / m[2][0] = a[2][0]*b[0][0] + a[2][1]*b[1][0] + a[2][2]*b[2][0];
544 fp16m[2][1] = a[2][0]*b[0][1] + a[2][1]*b[1][1] + a[2][2]*b[2][1];
545   / m[2][2] = a[2][0]*b[0][2] + a[2][1]*b[1][2] + a[2][2]*b[2][2];
546 fp17m[2][3] = a[2][0]*b[0][3] + a[2][1]*b[1][3] + a[2][2]*b[2][3] + a[2][3];
547 OPTIMIZATIONS :
548   fp0 instead of fp17
549   fp2 instead of fp16
550     psq_l   fp0, 0(mA), 0, 0    // a[0][0], a[0][1]
551     psq_l   fp1, 8(mA), 0, 0    // a[0][2], a[0][3]
552     psq_l   fp2, 16(mA), 0, 0   // a[1][0], a[1][1]
553     psq_l   fp3, 24(mA), 0, 0   // a[1][2], a[1][3]
554     psq_l   fp4, 32(mA), 0, 0   // a[2][0], a[2][1]
555     psq_l   fp5, 40(mA), 0, 0   // a[2][2], a[2][3]
556 
557     psq_l   fp6, 0(mB), 0, 0    // b[0][0], b[0][1]
558     psq_l   fp7, 8(mB), 0, 0    // b[0][2], b[0][3]
559     psq_l   fp8, 16(mB), 0, 0   // b[1][0], b[1][1]
560     psq_l   fp9, 24(mB), 0, 0   // b[1][2], b[1][3]
561     psq_l   fp10, 32(mB), 0, 0   // b[2][0], b[2][1]
562     psq_l   fp11, 40(mB), 0, 0   // b[2][2], b[2][3]
563 
564 */
565 }
566 #endif // GEKKO
567 
568 
569 /*---------------------------------------------------------------------*
570 
571 Name:           MTXConcatArray
572 
573 Description:    concatenates a matrix to an array of matrices.
574                 order of operation is A x B(array) = AB(array).
575 
576 Arguments:      a        first matrix for concat.
577                 srcBase  array base of second matrix for concat.
578                 dstBase  array base of resultant matrix from concat.
579                 count    number of matrices in srcBase, dstBase arrays.
580 
581                 note:      cannot check for array overflow
582 
583 Return:         none
584 
585  *---------------------------------------------------------------------*/
586 
587 /*---------------------------------------------------------------------*
588     C version
589  *---------------------------------------------------------------------*/
C_MTXConcatArray(const Mtx a,const Mtx * srcBase,Mtx * dstBase,u32 count)590 void C_MTXConcatArray ( const Mtx a, const Mtx* srcBase, Mtx* dstBase, u32 count )
591 {
592     u32 i;
593 
594     ASSERTMSG( (a       != 0), "MTXConcatArray(): NULL MtxPtr 'a' " );
595     ASSERTMSG( (srcBase != 0), "MTXConcatArray(): NULL MtxPtr 'srcBase' " );
596     ASSERTMSG( (dstBase != 0), "MTXConcatArray(): NULL MtxPtr 'dstBase' " );
597     ASSERTMSG( (count > 1),    "MTXConcatArray(): count must be greater than 1." );
598 
599     for ( i = 0 ; i < count ; i++ )
600     {
601         C_MTXConcat(a, *srcBase, *dstBase);
602 
603         srcBase++;
604         dstBase++;
605     }
606 }
607 
608 /*---------------------------------------------------------------------*
609     Paired-Single assembler version
610  *---------------------------------------------------------------------*
611                 Note that this performs NO error checking.
612  *---------------------------------------------------------------------*/
613 #ifdef GEKKO
614 
615 // This code can be compiled only with register allocation optimization.
616 #if ( defined(__MWERKS__) && defined(_DEBUG) )
617 #pragma global_optimizer    on
618 #pragma optimization_level  1
619 #endif
620 
PSMTXConcatArray(const register Mtx a,const register Mtx * srcBase,register Mtx * dstBase,register u32 count)621 void PSMTXConcatArray (
622     const register Mtx  a,
623     const register Mtx* srcBase,
624           register Mtx* dstBase,
625           register u32  count )
626 {
627     register f32    va0, va1, va2, va3, va4, va5;
628     register f32    vb0, vb1, vb2, vb3, vb4, vb5;
629     register f32    vd0, vd1, vd2, vd3, vd4, vd5;
630     register f32    u01;
631     register f32*   u01Ptr = Unit01;
632 
633     asm
634     {
635         // [a00][a01]
636         psq_l       va0, 0(a), 0, 0
637         // [a02][a03]
638         psq_l       va1, 8(a), 0, 0
639         // [a10][a11]
640         psq_l       va2, 16(a), 0, 0
641         // [a12][a13]
642         psq_l       va3, 24(a), 0, 0
643         // count--
644         subi        count, count, 1
645         // [a20][a21]
646         psq_l       va4, 32(a), 0, 0
647         // [a22][a23]
648         psq_l       va5, 40(a), 0, 0
649         // Loop count
650         mtctr       count
651         // [0][1]
652         psq_l       u01, 0(u01Ptr), 0, 0
653 
654         //---------------------------------
655         // [b00][b01]
656         psq_l       vb0, 0(srcBase), 0, 0
657         // [b10][b11]
658         psq_l       vb2, 16(srcBase), 0, 0
659 
660         // [a00*b00][a00*b01]
661         ps_muls0    vd0, vb0, va0
662         // [a10*b00][a10*b01]
663         ps_muls0    vd2, vb0, va2
664         // [a20*b00][a20*b01]
665         ps_muls0    vd4, vb0, va4
666 
667         // [b20][b21]
668         psq_l       vb4, 32(srcBase), 0, 0
669 
670         // [a00*b00 + a01*b10][a00*b01 + a01*b11]
671         ps_madds1   vd0, vb2, va0, vd0
672         // [a10*b00 + a11*b10][a10*b01 + a11*b11]
673         ps_madds1   vd2, vb2, va2, vd2
674         // [a20*b00 + a21*b10][a20*b01 + a21*b11]
675         ps_madds1   vd4, vb2, va4, vd4
676 
677         // [b02][b03]
678         psq_l       vb1, 8(srcBase), 0, 0
679 
680         // [a00*b00 + a01*b10 + a02*b20][a00*b01 + a01*b11 + a02*b21]
681         ps_madds0   vd0, vb4, va1, vd0
682         // [a10*b00 + a11*b10 + a12*b20][a10*b01 + a11*b11 + a12*b21]
683         ps_madds0   vd2, vb4, va3, vd2
684         // [a20*b00 + a21*b10 + a22*b20][a20*b01 + a21*b11 + a22*b21]
685         ps_madds0   vd4, vb4, va5, vd4
686 
687         // [b12][b13]
688         psq_l       vb3, 24(srcBase), 0, 0
689         // [a00*b00 + a01*b10 + a02*b20][a00*b01 + a01*b11 + a02*b21]
690         psq_st      vd0, 0(dstBase), 0, 0
691 
692         // [a00*b02][a00*b03]
693         ps_muls0    vd1, vb1, va0
694         // [a10*b02][a10*b03]
695         ps_muls0    vd3, vb1, va2
696         // [a20*b02][a20*b03]
697         ps_muls0    vd5, vb1, va4
698 
699         // [b22][b23]
700         psq_l       vb5, 40(srcBase), 0, 0
701         // [a10*b00 + a11*b10 + a12*b20][a10*b01 + a11*b11 + a12*b21]
702         psq_st      vd2, 16(dstBase), 0, 0
703 
704         // [a00*b02 + a01*b12][a00*b03 + a01*b13]
705         ps_madds1   vd1, vb3, va0, vd1
706         // [a10*b02 + a11*b12][a10*b03 + a11*b13]
707         ps_madds1   vd3, vb3, va2, vd3
708         // [a20*b02 + a21*b12][a20*b03 + a21*b13]
709         ps_madds1   vd5, vb3, va4, vd5
710 
711     _loop:
712 
713         // ++srcBase
714         addi        srcBase, srcBase, sizeof(Mtx)
715 
716         // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23]
717         ps_madds0   vd1, vb5, va1, vd1
718         // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23]
719         ps_madds0   vd3, vb5, va3, vd3
720         // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23]
721         ps_madds0   vd5, vb5, va5, vd5
722 
723             // [b00][b01]
724             psq_l       vb0, 0(srcBase), 0, 0
725         // [a20*b00 + a21*b10 + a22*b20][a20*b01 + a21*b11 + a22*b21]
726         psq_st      vd4, 32(dstBase), 0, 0
727 
728         // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23 + a03]
729         ps_madd     vd1, u01, va1, vd1
730         // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23 + a13]
731         ps_madd     vd3, u01, va3, vd3
732         // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23 + a23]
733         ps_madd     vd5, u01, va5, vd5
734 
735             // [b10][b11]
736             psq_l       vb2, 16(srcBase), 0, 0
737         // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23 + a03]
738         psq_st      vd1, 8(dstBase), 0, 0
739 
740             // [a00*b00][a00*b01]
741             ps_muls0    vd0, vb0, va0
742             // [a10*b00][a10*b01]
743             ps_muls0    vd2, vb0, va2
744             // [a20*b00][a20*b01]
745             ps_muls0    vd4, vb0, va4
746 
747             // [b20][b21]
748             psq_l       vb4, 32(srcBase), 0, 0
749         // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23 + a13]
750         psq_st      vd3, 24(dstBase), 0, 0
751 
752             // [a00*b00 + a01*b10][a00*b01 + a01*b11]
753             ps_madds1   vd0, vb2, va0, vd0
754             // [a10*b00 + a11*b10][a10*b01 + a11*b11]
755             ps_madds1   vd2, vb2, va2, vd2
756             // [a20*b00 + a21*b10][a20*b01 + a21*b11]
757             ps_madds1   vd4, vb2, va4, vd4
758 
759             // [b02][b03]
760             psq_l       vb1, 8(srcBase), 0, 0
761         // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23 + a23]
762         psq_st      vd5, 40(dstBase), 0, 0
763         // ++dstBase
764         addi        dstBase, dstBase, sizeof(Mtx)
765 
766             // [a00*b00 + a01*b10 + a02*b20][a00*b01 + a01*b11 + a02*b21]
767             ps_madds0   vd0, vb4, va1, vd0
768             // [a10*b00 + a11*b10 + a12*b20][a10*b01 + a11*b11 + a12*b21]
769             ps_madds0   vd2, vb4, va3, vd2
770             // [a20*b00 + a21*b10 + a22*b20][a20*b01 + a21*b11 + a22*b21]
771             ps_madds0   vd4, vb4, va5, vd4
772 
773             // [b12][b13]
774             psq_l       vb3, 24(srcBase), 0, 0
775             // [a00*b00 + a01*b10 + a02*b20][a00*b01 + a01*b11 + a02*b21]
776             psq_st      vd0, 0(dstBase), 0, 0
777 
778             // [a00*b02][a00*b03]
779             ps_muls0    vd1, vb1, va0
780             // [a10*b02][a10*b03]
781             ps_muls0    vd3, vb1, va2
782             // [a20*b02][a20*b03]
783             ps_muls0    vd5, vb1, va4
784 
785             // [b22][b23]
786             psq_l       vb5, 40(srcBase), 0, 0
787             // [a10*b00 + a11*b10 + a12*b20][a10*b01 + a11*b11 + a12*b21]
788             psq_st      vd2, 16(dstBase), 0, 0
789 
790             // [a00*b02 + a01*b12][a00*b03 + a01*b13]
791             ps_madds1   vd1, vb3, va0, vd1
792             // [a10*b02 + a11*b12][a10*b03 + a11*b13]
793             ps_madds1   vd3, vb3, va2, vd3
794             // [a20*b02 + a21*b12][a20*b03 + a21*b13]
795             ps_madds1   vd5, vb3, va4, vd5
796 
797         // LOOP
798         bdnz        _loop
799 
800             // [a20*b00 + a21*b10 + a22*b20][a20*b01 + a21*b11 + a22*b21]
801             psq_st      vd4, 32(dstBase), 0, 0
802 
803             // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23]
804             ps_madds0   vd1, vb5, va1, vd1
805             // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23]
806             ps_madds0   vd3, vb5, va3, vd3
807             // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23]
808             ps_madds0   vd5, vb5, va5, vd5
809 
810             // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23 + a03]
811             ps_madd     vd1, u01, va1, vd1
812             // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23 + a13]
813             ps_madd     vd3, u01, va3, vd3
814             // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23 + a23]
815             ps_madd     vd5, u01, va5, vd5
816 
817             // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23 + a03]
818             psq_st      vd1, 8(dstBase), 0, 0
819             // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23 + a13]
820             psq_st      vd3, 24(dstBase), 0, 0
821             // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23 + a23]
822             psq_st      vd5, 40(dstBase), 0, 0
823             //---------------------------------
824     }
825 }
826 
827 #if ( defined(__MWERKS__) && defined(_DEBUG) )
828 #pragma optimization_level  0
829 #pragma global_optimizer    reset
830 #endif
831 
832 #endif
833 
834 /*---------------------------------------------------------------------*
835 
836 Name:           MTXTranspose
837 
838 Description:    computes the transpose of a matrix.
839                 As matrices are 3x4, fourth column (translation component) is
840                 lost and becomes (0,0,0).
841 
842                 This function is intended for use in computing an
843                 inverse-transpose matrix to transform normals for lighting.
844                 In this case, lost translation component doesn't matter.
845 
846 
847 Arguments:      src       source matrix.
848                 xPose     destination (transposed) matrix.
849                           ok if src == xPose.
850 
851 
852 
853 Return:         none
854 
855 *---------------------------------------------------------------------*/
856 /*---------------------------------------------------------------------*
857     C version
858  *---------------------------------------------------------------------*/
C_MTXTranspose(const Mtx src,Mtx xPose)859 void C_MTXTranspose ( const Mtx src, Mtx xPose )
860 {
861     Mtx mTmp;
862     MtxPtr m;
863 
864 
865     ASSERTMSG( (src   != 0), MTX_TRANSPOSE_1  );
866     ASSERTMSG( (xPose != 0), MTX_TRANSPOSE_2  );
867 
868 
869     if(src == xPose)
870     {
871         m = mTmp;
872     }
873     else
874     {
875         m = xPose;
876     }
877 
878 
879     m[0][0] = src[0][0];   m[0][1] = src[1][0];      m[0][2] = src[2][0];     m[0][3] = 0.0f;
880     m[1][0] = src[0][1];   m[1][1] = src[1][1];      m[1][2] = src[2][1];     m[1][3] = 0.0f;
881     m[2][0] = src[0][2];   m[2][1] = src[1][2];      m[2][2] = src[2][2];     m[2][3] = 0.0f;
882 
883 
884     // copy back if needed
885     if( m == mTmp )
886     {
887         C_MTXCopy( mTmp, xPose );
888     }
889 }
890 
891 /*---------------------------------------------------------------------*
892     Paired-Single assembler version
893  *---------------------------------------------------------------------*
894                 Note that this performs NO error checking.
895  *---------------------------------------------------------------------*/
896 #ifdef GEKKO
PSMTXTranspose(const register Mtx src,register Mtx xPose)897 void PSMTXTranspose ( const register Mtx src, register Mtx xPose )
898 {
899     register f32 c_zero = 0.0F;
900     register f32 row0a, row1a, row0b, row1b;
901     register f32 trns0, trns1, trns2;
902 
903     asm
904     {
905         psq_l       row0a, 0(src),  0, 0    // [0][0], [0][1]
906         stfs        c_zero, 44(xPose)       // 0 -> [2][3]
907         psq_l       row1a, 16(src), 0, 0    // [1][0], [1][1]
908         ps_merge00  trns0, row0a, row1a     // [0][0], [1][0]
909         psq_l       row0b, 8(src),  1, 0    // [0][2], 1
910         ps_merge11  trns1, row0a, row1a     // [0][1], [1][1]
911         psq_l       row1b, 24(src), 1, 0    // [1][2], 1
912         psq_st      trns0, 0(xPose),  0, 0  // [0][0], [1][0] -> [0][0], [0][1]
913         psq_l       row0a, 32(src), 0, 0    // [2][0], [2][1]
914         ps_merge00  trns2, row0b, row1b     // [0][2], [1][2]
915         psq_st      trns1, 16(xPose), 0, 0  // [0][1], [1][1] -> [1][0], [1][1]
916         ps_merge00  trns0, row0a, c_zero    // [2][0], 0
917         psq_st      trns2, 32(xPose), 0, 0  // [0][2], [1][2] -> [2][0], [2][1]
918         ps_merge10  trns1, row0a, c_zero    // [2][1], 0
919         psq_st      trns0, 8(xPose),  0, 0  // [2][0], 0 -> [0][2], [0][3]
920         lfs         row0b, 40(src)          // [2][2]
921         psq_st      trns1, 24(xPose), 0, 0  // [2][1], 0 -> [1][2], [1][3]
922         stfs        row0b, 40(xPose)        // [2][2] -> [2][2]
923     }
924 }
925 #endif // GEKKO
926 
927 /*---------------------------------------------------------------------*
928 
929 Name:           MTXInverse
930 
931 Description:    computes a fast inverse of a matrix.
932                 this algorithm works for matrices with a fourth row of
933                 (0,0,0,1).
934 
935                 for a matrix
936                 M =  |     A      C      |  where A is the upper 3x3 submatrix,
937                      |     0      1      |        C is a 1x3 column vector
938 
939 
940                 INV(M)     =    |  inv(A)      (inv(A))*(-C)    |
941                                 |     0               1         |
942 
943 
944 
945 Arguments:      src       source matrix.
946                 inv       destination (inverse) matrix.
947                           ok if src == inv.
948 
949 
950 
951 Return:         0 if src is not invertible.
952                 1 on success.
953 
954 *---------------------------------------------------------------------*/
955 /*---------------------------------------------------------------------*
956     C version
957  *---------------------------------------------------------------------*/
C_MTXInverse(const Mtx src,Mtx inv)958 u32 C_MTXInverse ( const Mtx src, Mtx inv )
959 {
960     Mtx mTmp;
961     MtxPtr m;
962     f32 det;
963 
964     ASSERTMSG( (src != 0), MTX_INVERSE_1 );
965     ASSERTMSG( (inv != 0), MTX_INVERSE_2 );
966 
967     if( src == inv )
968     {
969         m = mTmp;
970     }
971     else
972     {
973         m = inv;
974     }
975 
976 
977     // compute the determinant of the upper 3x3 submatrix
978     det =   src[0][0]*src[1][1]*src[2][2] + src[0][1]*src[1][2]*src[2][0] + src[0][2]*src[1][0]*src[2][1]
979           - src[2][0]*src[1][1]*src[0][2] - src[1][0]*src[0][1]*src[2][2] - src[0][0]*src[2][1]*src[1][2];
980 
981 
982     // check if matrix is singular
983     if( det == 0.0f )
984     {
985         return 0;
986     }
987 
988 
989     // compute the inverse of the upper submatrix:
990 
991     // find the transposed matrix of cofactors of the upper submatrix
992     // and multiply by (1/det)
993 
994     det = 1.0f / det;
995 
996 
997     m[0][0] =  (src[1][1]*src[2][2] - src[2][1]*src[1][2]) * det;
998     m[0][1] = -(src[0][1]*src[2][2] - src[2][1]*src[0][2]) * det;
999     m[0][2] =  (src[0][1]*src[1][2] - src[1][1]*src[0][2]) * det;
1000 
1001     m[1][0] = -(src[1][0]*src[2][2] - src[2][0]*src[1][2]) * det;
1002     m[1][1] =  (src[0][0]*src[2][2] - src[2][0]*src[0][2]) * det;
1003     m[1][2] = -(src[0][0]*src[1][2] - src[1][0]*src[0][2]) * det;
1004 
1005     m[2][0] =  (src[1][0]*src[2][1] - src[2][0]*src[1][1]) * det;
1006     m[2][1] = -(src[0][0]*src[2][1] - src[2][0]*src[0][1]) * det;
1007     m[2][2] =  (src[0][0]*src[1][1] - src[1][0]*src[0][1]) * det;
1008 
1009 
1010     // compute (invA)*(-C)
1011     m[0][3] = -m[0][0]*src[0][3] - m[0][1]*src[1][3] - m[0][2]*src[2][3];
1012     m[1][3] = -m[1][0]*src[0][3] - m[1][1]*src[1][3] - m[1][2]*src[2][3];
1013     m[2][3] = -m[2][0]*src[0][3] - m[2][1]*src[1][3] - m[2][2]*src[2][3];
1014 
1015     // copy back if needed
1016     if( m == mTmp )
1017     {
1018         C_MTXCopy( mTmp,inv );
1019     }
1020 
1021     return 1;
1022 }
1023 
1024 /*---------------------------------------------------------------------*
1025     Paired-Single assembler version
1026  *---------------------------------------------------------------------*
1027             Note that this performs NO error checking.
1028             Results may be a little bit different from the C version
1029             because it doesn't perform exactly same calculation.
1030  *---------------------------------------------------------------------*/
1031 #ifdef GEKKO
PSMTXInverse(const register Mtx src,register Mtx inv)1032 asm u32 PSMTXInverse ( const register Mtx src, register Mtx inv )
1033 {
1034     nofralloc
1035 
1036     // fp0 [ 00 ][ 1.0F ] : Load
1037     psq_l fp0, 0( src ), 1, 0;
1038     // fp1 [ 01 ][ 02 ]   : Load
1039     psq_l   fp1, 4( src ), 0, 0;
1040     // fp2 [ 10 ][ 1.0F ] : Load
1041     psq_l   fp2, 16( src ), 1, 0;
1042     // fp6 [ 02 ][ 00 ]
1043     ps_merge10 fp6, fp1, fp0;
1044     // fp3 [ 11 ][ 12 ]   : Load
1045     psq_l   fp3, 20( src ), 0, 0;
1046     // fp4 [ 20 ][ 1.0F ] : Load
1047     psq_l   fp4, 32( src ), 1, 0;
1048     // fp7 [ 12 ][ 10 ]
1049     ps_merge10 fp7, fp3, fp2;
1050     // fp5 [ 21 ][ 22 ]   : Load
1051     psq_l   fp5, 36( src ), 0, 0;
1052     // fp11[ 11*02 ][ 00*12 ]
1053     ps_mul  fp11, fp3, fp6;
1054     // fp8 [ 22 ][ 20 ]
1055     ps_merge10 fp8, fp5, fp4;
1056     // fp13[ 21*12 ][ 10*22 ]
1057     ps_mul  fp13, fp5, fp7;
1058     // fp11[ 01*12 - 11*02 ][ 10*02 - 00*12 ]
1059     ps_msub fp11, fp1, fp7, fp11;
1060     // fp12[ 01*22 ][ 20*02 ]
1061     ps_mul  fp12, fp1, fp8;
1062     // fp13[ 11*22 - 21*12 ][ 20*12 - 10*22 ]
1063     ps_msub fp13, fp3, fp8, fp13;
1064     // fp10[ 20*11 ][ N/A ]
1065     ps_mul  fp10, fp3, fp4;
1066     // fp12[ 21*02 - 01*22 ][ 00*22 - 20*02 ]
1067     ps_msub fp12, fp5, fp6, fp12;
1068     // fp7 [ 00*(11*22-21*12) ][ N/A ]
1069     ps_mul  fp7, fp0, fp13;
1070     // fp9 [ 00*21 ][ N/A ]
1071     ps_mul  fp9, fp0, fp5;
1072     // fp8 [ 10*01 ][ N/A ]
1073     ps_mul  fp8, fp1, fp2;
1074     // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) ][ N/A ]
1075     ps_madd fp7, fp2, fp12, fp7;
1076     // fp6 [ 0.0F ][ 0.0F ]
1077     ps_sub  fp6, fp6, fp6;
1078     // fp10[ 10*21 - 20*11 ][ N/A ]
1079     ps_msub fp10, fp2, fp5, fp10;
1080     // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) + 20*(01*12-11*02) ][ N/A ] : det
1081     ps_madd fp7, fp4, fp11, fp7;
1082     // fp9 [ 20*01 - 00*21 ][ N/A ]
1083     ps_msub fp9, fp1, fp4, fp9;
1084     // fp8 [ 00*11 - 10*01 ][ N/A ]
1085     ps_msub fp8, fp0, fp3, fp8;
1086 
1087     // ( det == 0 ) ?
1088     ps_cmpo0 cr0, fp7, fp6;
1089     bne     _regular;
1090 
1091     // return value (singular)
1092     addi    r3, 0, 0;
1093 
1094     blr;
1095 
1096   _regular:
1097 
1098     // fp0 [ 1/det ][ N/A ]
1099     fres fp0, fp7;
1100 
1101     // Newton's approximation
1102     // Refinement : ( E = est. of 1/K ) -> ( E' = ( 2 - K * E ) * E )
1103     ps_add      fp6, fp0, fp0
1104     ps_mul      fp5, fp7, fp0
1105     ps_nmsub    fp0, fp0, fp5, fp6
1106 
1107     // fp1 [ 03 ][ 03 ] : Load
1108     lfs         fp1, 12(src);
1109     // fp13[ ( 11*22 - 21*12 ) * rdet ][ ( 20*12 - 10*22 ) * rdet ] : i[0][0], i[1][0]
1110     ps_muls0    fp13, fp13, fp0;
1111     // fp2 [ 13 ][ 13 ] : Load
1112     lfs         fp2, 28(src);
1113     // fp12[ ( 21*02 - 01*22 ) * rdet ][ ( 00*22 - 20*02 ) * rdet ] : i[0][1], i[1][1]
1114     ps_muls0    fp12, fp12, fp0;
1115     // fp3 [ 23 ][ 23 ] : Load
1116     lfs         fp3, 44(src);
1117     // fp11[ ( 01*12 - 11*02 ) * rdet ][ ( 10*02 - 00*12 ) * rdet ] : i[0][2], i[1][2]
1118     ps_muls0    fp11, fp11, fp0;
1119     // fp5 [ i00 ][ i01 ]
1120     ps_merge00  fp5, fp13, fp12;
1121     // fp4 [ i10 ][ i11 ]
1122     ps_merge11  fp4, fp13, fp12;
1123     // fp6 [ i00*03 ][ i10*03 ]
1124     ps_mul      fp6, fp13, fp1;
1125     // [ i00 ][ i01 ] : Store fp5   -> free(fp5[ i00 ][ i01 ])
1126     psq_st      fp5,  0(inv), 0, 0;
1127     // [ i10 ][ i11 ] : Store fp4   -> free(fp4[ i10 ][ i11 ])
1128     psq_st      fp4,  16(inv), 0, 0;
1129     // fp10[ ( 10*21 - 20*11 ) * rdet ] : i[2][0]
1130     ps_muls0    fp10, fp10, fp0;
1131     // fp9 [ ( 20*01 - 00*21 ) * rdet ] : i[2][1]
1132     ps_muls0    fp9,  fp9,  fp0;
1133     // fp6 [ i00*03+i01*13 ][ i10*03+i11*13 ]
1134     ps_madd     fp6, fp12, fp2, fp6;
1135     // [ i20 ] : Store fp10
1136     psq_st      fp10, 32(inv), 1, 0;
1137     // fp8 [ ( 00*11 - 10*01 ) * rdet ] : i[2][2]
1138     ps_muls0    fp8,  fp8,  fp0;
1139     // fp6 [ -i00*03-i01*13-i02*23 ][ -i10*03-i11*13-i12*23 ] : i[0][3], i[1][3]
1140     ps_nmadd    fp6, fp11, fp3, fp6;
1141     // [ i21 ] : Store fp9
1142     psq_st      fp9,  36(inv), 1, 0;
1143     // fp7 [ i20*03 ][ N/A ]
1144     ps_mul      fp7, fp10, fp1;
1145     // fp5 [ i02 ][ i03 ]
1146     ps_merge00  fp5, fp11, fp6;
1147     // [ i22 ] : Store fp8
1148     psq_st      fp8,  40(inv), 1, 0;
1149     // fp7 [ i20*03+i21*13 ][ N/A ]
1150     ps_madd     fp7, fp9,  fp2, fp7;
1151     // fp4 [ i12 ][ i13 ]
1152     ps_merge11  fp4, fp11, fp6;
1153     // [ i02 ][ i03 ] : Store fp5
1154     psq_st      fp5,  8(inv), 0, 0;
1155     // fp7 [ -i20*03-i21*13-i22*23 ][ N/A ] : i[2][3]
1156     ps_nmadd    fp7, fp8,  fp3, fp7;
1157     // [ i12 ][ i13 ] : Store fp4
1158     psq_st      fp4,  24(inv), 0, 0;
1159     // [ i23 ] : Store fp7
1160     psq_st      fp7,  44(inv), 1, 0;
1161 
1162     // return value (regular)
1163     addi    r3, 0, 1;
1164 
1165     blr;
1166 }
1167 #endif // GEKKO
1168 
1169 /*---------------------------------------------------------------------*
1170 
1171 Name:           MTXInvXpose
1172 
1173 Description:    computes a fast inverse-transpose of a matrix.
1174                 this algorithm works for matrices with a fourth row of
1175                 (0,0,0,1). Commonly used for calculating normal transform
1176                 matrices.
1177 
1178                 This function is equivalent to the combination of
1179                 two functions MTXInverse + MTXTranspose.
1180 
1181 Arguments:      src       source matrix.
1182                 invx      destination (inverse-transpose) matrix.
1183                           ok if src == invx.
1184 
1185 Return:         0 if src is not invertible.
1186                 1 on success.
1187 
1188 *---------------------------------------------------------------------*/
1189 /*---------------------------------------------------------------------*
1190     C version
1191  *---------------------------------------------------------------------*/
C_MTXInvXpose(const Mtx src,Mtx invX)1192 u32 C_MTXInvXpose ( const Mtx src, Mtx invX )
1193 {
1194     Mtx mTmp;
1195     MtxPtr m;
1196     f32 det;
1197 
1198     ASSERTMSG( (src != 0), MTX_INVXPOSE_1 );
1199     ASSERTMSG( (invX != 0), MTX_INVXPOSE_2 );
1200 
1201     if( src == invX )
1202     {
1203         m = mTmp;
1204     }
1205     else
1206     {
1207         m = invX;
1208     }
1209 
1210     // compute the determinant of the upper 3x3 submatrix
1211     det =   src[0][0]*src[1][1]*src[2][2] + src[0][1]*src[1][2]*src[2][0] + src[0][2]*src[1][0]*src[2][1]
1212           - src[2][0]*src[1][1]*src[0][2] - src[1][0]*src[0][1]*src[2][2] - src[0][0]*src[2][1]*src[1][2];
1213 
1214     // check if matrix is singular
1215     if( det == 0.0f )
1216     {
1217         return 0;
1218     }
1219 
1220     // compute the inverse-transpose of the upper submatrix:
1221 
1222     // find the transposed matrix of cofactors of the upper submatrix
1223     // and multiply by (1/det)
1224 
1225     det = 1.0f / det;
1226 
1227     m[0][0] =  (src[1][1]*src[2][2] - src[2][1]*src[1][2]) * det;
1228     m[0][1] = -(src[1][0]*src[2][2] - src[2][0]*src[1][2]) * det;
1229     m[0][2] =  (src[1][0]*src[2][1] - src[2][0]*src[1][1]) * det;
1230 
1231     m[1][0] = -(src[0][1]*src[2][2] - src[2][1]*src[0][2]) * det;
1232     m[1][1] =  (src[0][0]*src[2][2] - src[2][0]*src[0][2]) * det;
1233     m[1][2] = -(src[0][0]*src[2][1] - src[2][0]*src[0][1]) * det;
1234 
1235     m[2][0] =  (src[0][1]*src[1][2] - src[1][1]*src[0][2]) * det;
1236     m[2][1] = -(src[0][0]*src[1][2] - src[1][0]*src[0][2]) * det;
1237     m[2][2] =  (src[0][0]*src[1][1] - src[1][0]*src[0][1]) * det;
1238 
1239 
1240     // the fourth columns should be all zero
1241     m[0][3] = 0.0F;
1242     m[1][3] = 0.0F;
1243     m[2][3] = 0.0F;
1244 
1245     // copy back if needed
1246     if( m == mTmp )
1247     {
1248         C_MTXCopy( mTmp,invX );
1249     }
1250 
1251     return 1;
1252 }
1253 
1254 /*---------------------------------------------------------------------*
1255     Paired-Single assembler version
1256  *---------------------------------------------------------------------*
1257             Note that this performs NO error checking.
1258             Results may be a little bit different from the C version
1259             because it doesn't perform exactly same calculation.
1260  *---------------------------------------------------------------------*/
1261 #ifdef GEKKO
PSMTXInvXpose(const register Mtx src,register Mtx invX)1262 asm u32 PSMTXInvXpose ( const register Mtx src, register Mtx invX )
1263 {
1264     nofralloc
1265 
1266     // fp0 [ 00 ][ 1.0F ] : Load
1267     psq_l fp0, 0( src ), 1, 0;
1268     // fp1 [ 01 ][ 02 ]   : Load
1269     psq_l   fp1, 4( src ), 0, 0;
1270     // fp2 [ 10 ][ 1.0F ] : Load
1271     psq_l   fp2, 16( src ), 1, 0;
1272     // fp6 [ 02 ][ 00 ]
1273     ps_merge10 fp6, fp1, fp0;
1274     // fp3 [ 11 ][ 12 ]   : Load
1275     psq_l   fp3, 20( src ), 0, 0;
1276     // fp4 [ 20 ][ 1.0F ] : Load
1277     psq_l   fp4, 32( src ), 1, 0;
1278     // fp7 [ 12 ][ 10 ]
1279     ps_merge10 fp7, fp3, fp2;
1280     // fp5 [ 21 ][ 22 ]   : Load
1281     psq_l   fp5, 36( src ), 0, 0;
1282     // fp11[ 11*02 ][ 00*12 ]
1283     ps_mul  fp11, fp3, fp6;
1284     // fp8 [ 22 ][ 20 ]
1285     ps_merge10 fp8, fp5, fp4;
1286     // fp13[ 21*12 ][ 10*22 ]
1287     ps_mul  fp13, fp5, fp7;
1288     // fp11[ 01*12 - 11*02 ][ 10*02 - 00*12 ]
1289     ps_msub fp11, fp1, fp7, fp11;
1290     // fp12[ 01*22 ][ 20*02 ]
1291     ps_mul  fp12, fp1, fp8;
1292     // fp13[ 11*22 - 21*12 ][ 20*12 - 10*22 ]
1293     ps_msub fp13, fp3, fp8, fp13;
1294     // fp10[ 20*11 ][ N/A ]
1295     ps_mul  fp10, fp3, fp4;
1296     // fp12[ 21*02 - 01*22 ][ 00*22 - 20*02 ]
1297     ps_msub fp12, fp5, fp6, fp12;
1298     // fp7 [ 00*(11*22-21*12) ][ N/A ]
1299     ps_mul  fp7, fp0, fp13;
1300     // fp9 [ 00*21 ][ N/A ]
1301     ps_mul  fp9, fp0, fp5;
1302     // fp8 [ 10*01 ][ N/A ]
1303     ps_mul  fp8, fp1, fp2;
1304     // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) ][ N/A ]
1305     ps_madd fp7, fp2, fp12, fp7;
1306     // fp6 [ 0.0F ][ 0.0F ]
1307     ps_sub  fp6, fp6, fp6;
1308     // fp10[ 10*21 - 20*11 ][ N/A ]
1309     ps_msub fp10, fp2, fp5, fp10;
1310     // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) + 20*(01*12-11*02) ][ N/A ] : det
1311     ps_madd fp7, fp4, fp11, fp7;
1312     // fp9 [ 20*01 - 00*21 ][ N/A ]
1313     ps_msub fp9, fp1, fp4, fp9;
1314     // fp8 [ 00*11 - 10*01 ][ N/A ]
1315     ps_msub fp8, fp0, fp3, fp8;
1316 
1317     // ( det == 0 ) ?
1318     ps_cmpo0 cr0, fp7, fp6;
1319     //bne     _regular;
1320     bne     _regular;
1321 
1322     // return value (singular)
1323     addi    r3, 0, 0;
1324 
1325     blr;
1326 
1327   _regular:
1328 
1329     // fp0 [ 1/det ][ N/A ]
1330     fres fp0, fp7;
1331 
1332         psq_st   fp6, 12(invX),1, 0
1333 
1334     // Newton's approximation
1335     // Refinement : ( E = est. of 1/K ) -> ( E' = ( 2 - K * E ) * E )
1336     ps_add      fp4, fp0, fp0
1337     ps_mul      fp5, fp7, fp0
1338       psq_st   fp6, 28(invX),1, 0
1339     ps_nmsub    fp0, fp0, fp5, fp4
1340       psq_st   fp6, 44(invX),1, 0
1341 
1342     // fp13[ ( 11*22 - 21*12 ) * rdet ][ ( 20*12 - 10*22 ) * rdet ] : ix[0][0], ix[0][1]
1343     ps_muls0 fp13, fp13, fp0;
1344     // fp12[ ( 21*02 - 01*22 ) * rdet ][ ( 00*22 - 20*02 ) * rdet ] : ix[1][0], ix[1][1]
1345     ps_muls0 fp12, fp12, fp0;
1346     // [ ix00 ][ ix01 ] : Store fp13
1347     psq_st  fp13, 0( invX ), 0, 0;
1348     // fp11[ ( 01*12 - 11*02 ) * rdet ][ ( 10*02 - 00*12 ) * rdet ] : ix[2][0], ix[2][1]
1349     ps_muls0 fp11, fp11, fp0;
1350     // [ ix10 ][ ix11 ] : Store fp12
1351     psq_st  fp12, 16( invX ), 0, 0;
1352     // fp10[ ( 10*21 - 20*11 ) * rdet ] : i[0][2]
1353     ps_muls0 fp10, fp10, fp0;
1354     // [ ix20 ][ ix21 ] : Store fp11
1355     psq_st  fp11, 32( invX ), 0, 0;
1356     // fp9 [ ( 20*01 - 00*21 ) * rdet ] : i[1][2]
1357     ps_muls0 fp9, fp9, fp0;
1358     // [ ix02 ]         : Store fp10
1359     psq_st  fp10, 8( invX ), 1, 0;
1360     // fp8 [ ( 00*11 - 10*01 ) * rdet ] : i[2][2]
1361     ps_muls0 fp8, fp8, fp0;
1362     // [ ix12 ]         : Store fp9
1363     psq_st  fp9, 24( invX ), 1, 0;
1364     // [ ix22 ]         : Store fp8
1365     psq_st  fp8, 40( invX ), 1, 0;
1366 
1367     // return value (regular)
1368     addi    r3, 0, 1;
1369 
1370     blr;
1371 }
1372 #endif
1373 
1374 /*---------------------------------------------------------------------*
1375 
1376 
1377 
1378 
1379 
1380                              MODEL SECTION
1381 
1382 
1383 
1384 
1385 
1386 *---------------------------------------------------------------------*/
1387 
1388 
1389 
1390 
1391 
1392 /*---------------------------------------------------------------------*
1393 
1394 Name:           MTXRotDeg
1395 
1396 Description:    sets a rotation matrix about one of the X, Y or Z axes
1397 
1398 Arguments:      m       matrix to be set
1399 
1400                 axis    major axis about which to rotate.
1401                         axis is passed in as a character.
1402                         it must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1403 
1404                 deg     rotation angle in degrees.
1405 
1406                         note:  counter-clockwise rotation is positive.
1407 
1408 Return:         none
1409 
1410 *---------------------------------------------------------------------*/
1411 // The function is defined as a macro in mtx.h
1412 /*
1413 void MTXRotDeg ( Mtx m, char axis, f32 deg )
1414 {
1415     MTXRotRad( m, axis, MTXDegToRad( deg ) );
1416 }
1417 */
1418 
1419 /*---------------------------------------------------------------------*
1420 
1421 Name:           MTXRotRad
1422 
1423 Description:    sets a rotation matrix about one of the X, Y or Z axes
1424 
1425 Arguments:      m       matrix to be set
1426 
1427                 axis    major axis about which to rotate.
1428                         axis is passed in as a character.
1429                         it must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1430 
1431                 deg     rotation angle in radians.
1432 
1433                         note:  counter-clockwise rotation is positive.
1434 
1435 Return:         none
1436 
1437 *---------------------------------------------------------------------*/
1438 /*---------------------------------------------------------------------*
1439     C version
1440  *---------------------------------------------------------------------*/
C_MTXRotRad(Mtx m,char axis,f32 rad)1441 void C_MTXRotRad ( Mtx m, char axis, f32 rad )
1442 {
1443 
1444     f32 sinA, cosA;
1445 
1446     ASSERTMSG( (m != 0), MTX_ROTRAD_1 );
1447 
1448     // verification of "axis" will occur in MTXRotTrig
1449 
1450     sinA = sinf(rad);
1451     cosA = cosf(rad);
1452 
1453     C_MTXRotTrig( m, axis, sinA, cosA );
1454 }
1455 
1456 /*---------------------------------------------------------------------*
1457     Paired-Single assembler version
1458  *---------------------------------------------------------------------*
1459                 Note that this performs NO error checking.
1460  *---------------------------------------------------------------------*/
1461 #ifdef GEKKO
PSMTXRotRad(Mtx m,char axis,f32 rad)1462 void PSMTXRotRad ( Mtx m, char axis, f32 rad )
1463 {
1464     f32 sinA, cosA;
1465 
1466     sinA = sinf(rad);
1467     cosA = cosf(rad);
1468 
1469     PSMTXRotTrig( m, axis, sinA, cosA );
1470 }
1471 #endif // GEKKO
1472 
1473 /*---------------------------------------------------------------------*
1474 
1475 Name:           MTXRotTrig
1476 
1477 Description:    sets a rotation matrix about one of the X, Y or Z axes
1478                 from specified trig ratios
1479 
1480 Arguments:      m       matrix to be set
1481 
1482                 axis    major axis about which to rotate.
1483                         axis is passed in as a character.
1484                         It must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1485 
1486                 sinA    sine of rotation angle.
1487 
1488                 cosA    cosine of rotation angle.
1489 
1490                         note:  counter-clockwise rotation is positive.
1491 
1492 Return:         none
1493 
1494 *---------------------------------------------------------------------*/
1495 /*---------------------------------------------------------------------*
1496     C version
1497  *---------------------------------------------------------------------*/
C_MTXRotTrig(Mtx m,char axis,f32 sinA,f32 cosA)1498 void C_MTXRotTrig ( Mtx m, char axis, f32 sinA, f32 cosA )
1499 {
1500 
1501     ASSERTMSG( (m != 0), MTX_ROTTRIG_1 );
1502 
1503     switch(axis)
1504     {
1505 
1506     case 'x':
1507     case 'X':
1508         m[0][0] =  1.0f;  m[0][1] =  0.0f;    m[0][2] =  0.0f;  m[0][3] = 0.0f;
1509         m[1][0] =  0.0f;  m[1][1] =  cosA;    m[1][2] = -sinA;  m[1][3] = 0.0f;
1510         m[2][0] =  0.0f;  m[2][1] =  sinA;    m[2][2] =  cosA;  m[2][3] = 0.0f;
1511         break;
1512 
1513     case 'y':
1514     case 'Y':
1515         m[0][0] =  cosA;  m[0][1] =  0.0f;    m[0][2] =  sinA;  m[0][3] = 0.0f;
1516         m[1][0] =  0.0f;  m[1][1] =  1.0f;    m[1][2] =  0.0f;  m[1][3] = 0.0f;
1517         m[2][0] = -sinA;  m[2][1] =  0.0f;    m[2][2] =  cosA;  m[2][3] = 0.0f;
1518         break;
1519 
1520     case 'z':
1521     case 'Z':
1522         m[0][0] =  cosA;  m[0][1] = -sinA;    m[0][2] =  0.0f;  m[0][3] = 0.0f;
1523         m[1][0] =  sinA;  m[1][1] =  cosA;    m[1][2] =  0.0f;  m[1][3] = 0.0f;
1524         m[2][0] =  0.0f;  m[2][1] =  0.0f;    m[2][2] =  1.0f;  m[2][3] = 0.0f;
1525         break;
1526 
1527     default:
1528         ASSERTMSG( 0, MTX_ROTTRIG_2 );
1529         break;
1530 
1531     }
1532 
1533 }
1534 
1535 /*---------------------------------------------------------------------*
1536     Paired-Single assembler version
1537  *---------------------------------------------------------------------*
1538                 Note that this performs NO error checking.
1539  *---------------------------------------------------------------------*/
1540 #ifdef GEKKO
PSMTXRotTrig(register Mtx m,register char axis,register f32 sinA,register f32 cosA)1541 void PSMTXRotTrig (
1542     register Mtx    m,
1543     register char   axis,
1544     register f32    sinA,
1545     register f32    cosA )
1546 {
1547     register f32    fc0, fc1, nsinA;
1548     register f32    fw0, fw1, fw2, fw3;
1549 
1550     asm
1551     {
1552         frsp        sinA, sinA      // to make sure sinA = single precision
1553         frsp        cosA, cosA      // to make sure cosA = single precision
1554     }
1555 
1556     fc0 = 0.0F;
1557     fc1 = 1.0F;
1558 
1559     asm
1560     {
1561         // always lower case
1562         ori         axis, axis, 0x20
1563         ps_neg      nsinA, sinA
1564 
1565         // branches
1566         cmplwi      axis, 'x'
1567         beq         _case_x
1568         cmplwi      axis, 'y'
1569         beq         _case_y
1570         cmplwi      axis, 'z'
1571         beq         _case_z
1572         b           _end
1573 
1574     _case_x:
1575         psq_st      fc1,  0(m), 1, 0
1576         psq_st      fc0,  4(m), 0, 0
1577         ps_merge00  fw0, sinA, cosA
1578         psq_st      fc0, 12(m), 0, 0
1579         ps_merge00  fw1, cosA, nsinA
1580         psq_st      fc0, 28(m), 0, 0
1581         psq_st      fc0, 44(m), 1, 0
1582         psq_st      fw0, 36(m), 0, 0
1583         psq_st      fw1, 20(m), 0, 0
1584         b           _end;
1585 
1586     _case_y:
1587         ps_merge00  fw0, cosA, fc0
1588         ps_merge00  fw1, fc0, fc1
1589         psq_st      fc0, 24(m), 0, 0
1590         psq_st      fw0,  0(m), 0, 0
1591         ps_merge00  fw2, nsinA, fc0
1592         ps_merge00  fw3, sinA, fc0
1593         psq_st      fw0, 40(m), 0, 0;
1594         psq_st      fw1, 16(m), 0, 0;
1595         psq_st      fw3,  8(m), 0, 0;
1596         psq_st      fw2, 32(m), 0, 0;
1597         b           _end;
1598 
1599     _case_z:
1600         psq_st      fc0,  8(m), 0, 0
1601         ps_merge00  fw0, sinA, cosA
1602         ps_merge00  fw2, cosA, nsinA
1603         psq_st      fc0, 24(m), 0, 0
1604         psq_st      fc0, 32(m), 0, 0
1605         ps_merge00  fw1, fc1, fc0
1606         psq_st      fw0, 16(m), 0, 0
1607         psq_st      fw2,  0(m), 0, 0
1608         psq_st      fw1, 40(m), 0, 0
1609 
1610     _end:
1611 
1612     }
1613 }
1614 #endif // GEKKO
1615 
1616 /*---------------------------------------------------------------------*
1617 
1618 Name:           MTXRotAxisDeg
1619 
1620 Description:    sets a rotation matrix about an arbitrary axis
1621 
1622 
1623 Arguments:      m       matrix to be set
1624 
1625                 axis    ptr to a vector containing the x,y,z axis
1626                         components.
1627                         axis does not have to be a unit vector.
1628 
1629                 deg     rotation angle in degrees.
1630 
1631                         note:  counter-clockwise rotation is positive.
1632 
1633 Return:         none
1634 
1635 *---------------------------------------------------------------------*/
1636 // The function is defined as a macro in mtx.h
1637 /*
1638 void MTXRotAxisDeg ( Mtx m, Vec *axis, f32 deg )
1639 {
1640     MTXRotAxisRad( m, axis, MTXDegToRad( deg ) );
1641 }
1642 */
1643 
1644 /*---------------------------------------------------------------------*
1645 
1646 Name:           MTXRotAxisRad
1647 
1648 Description:    sets a rotation matrix about an arbitrary axis
1649 
1650 
1651 Arguments:      m       matrix to be set
1652 
1653                 axis    ptr to a vector containing the x,y,z axis
1654                         components.
1655                         axis does not have to be a unit vector.
1656 
1657                 deg     rotation angle in radians.
1658 
1659                         note:  counter-clockwise rotation is positive.
1660 
1661 Return:         none
1662 
1663 *---------------------------------------------------------------------*/
1664 /*---------------------------------------------------------------------*
1665     C version
1666  *---------------------------------------------------------------------*/
C_MTXRotAxisRad(Mtx m,const Vec * axis,f32 rad)1667 void C_MTXRotAxisRad( Mtx m, const Vec *axis, f32 rad )
1668 {
1669     Vec vN;
1670     f32 s, c;             // sinTheta, cosTheta
1671     f32 t;                // ( 1 - cosTheta )
1672     f32 x, y, z;          // x, y, z components of normalized axis
1673     f32 xSq, ySq, zSq;    // x, y, z squared
1674 
1675 
1676     ASSERTMSG( (m    != 0), MTX_ROTAXIS_1  );
1677     ASSERTMSG( (axis != 0), MTX_ROTAXIS_2  );
1678 
1679 
1680     s = sinf(rad);
1681     c = cosf(rad);
1682     t = 1.0f - c;
1683 
1684     C_VECNormalize( axis, &vN );
1685 
1686     x = vN.x;
1687     y = vN.y;
1688     z = vN.z;
1689 
1690     xSq = x * x;
1691     ySq = y * y;
1692     zSq = z * z;
1693 
1694     m[0][0] = ( t * xSq )   + ( c );
1695     m[0][1] = ( t * x * y ) - ( s * z );
1696     m[0][2] = ( t * x * z ) + ( s * y );
1697     m[0][3] =    0.0f;
1698 
1699     m[1][0] = ( t * x * y ) + ( s * z );
1700     m[1][1] = ( t * ySq )   + ( c );
1701     m[1][2] = ( t * y * z ) - ( s * x );
1702     m[1][3] =    0.0f;
1703 
1704     m[2][0] = ( t * x * z ) - ( s * y );
1705     m[2][1] = ( t * y * z ) + ( s * x );
1706     m[2][2] = ( t * zSq )   + ( c );
1707     m[2][3] =    0.0f;
1708 
1709 }
1710 
1711 /*---------------------------------------------------------------------*
1712     Paired-Single assembler version
1713  *---------------------------------------------------------------------*
1714                 Note that this performs NO error checking.
1715  *---------------------------------------------------------------------*/
1716 #ifdef GEKKO
1717 
__PSMTXRotAxisRadInternal(register Mtx m,const register Vec * axis,register f32 sT,register f32 cT)1718 static void __PSMTXRotAxisRadInternal(
1719     register Mtx    m,
1720     const register Vec *axis,
1721     register f32    sT,
1722     register f32    cT )
1723 {
1724     register f32    tT, fc0;
1725     register f32    tmp0, tmp1, tmp2, tmp3, tmp4;
1726     register f32    tmp5, tmp6, tmp7, tmp8, tmp9;
1727 
1728     tmp9 = 0.5F;
1729     tmp8 = 3.0F;
1730 
1731     asm
1732     {
1733         // to make sure cT = (single precision float value)
1734         frsp        cT, cT
1735         // tmp0 = [x][y] : LOAD
1736         psq_l       tmp0, 0(axis), 0, 0
1737         // to make sure sT = (single precision float value)
1738         frsp        sT, sT
1739         // tmp1 = [z][z] : LOAD
1740         lfs         tmp1, 8(axis)
1741 
1742         // tmp2 = [x*x][y*y]
1743         ps_mul      tmp2, tmp0, tmp0
1744         // tmp7 = [1.0F]
1745         fadds       tmp7, tmp9, tmp9
1746         // tmp3 = [x*x+z*z][y*y+z*z]
1747         ps_madd     tmp3, tmp1, tmp1, tmp2
1748         // fc0 = [0.0F]
1749         fsubs       fc0, tmp9, tmp9
1750         // tmp4 = [S = x*x+y*y+z*z][z]
1751         ps_sum0     tmp4, tmp3, tmp1, tmp2
1752 
1753         // tT = 1.0F - cT
1754         fsubs       tT, tmp7, cT
1755 
1756         // tmp5 = [1.0/sqrt(S)] :estimation[E]
1757         frsqrte     tmp5, tmp4
1758         // Newton-Rapson refinement step
1759         // E' = E/2(3.0 - E*E*S)
1760         fmuls       tmp2, tmp5, tmp5            // E*E
1761         fmuls       tmp3, tmp5, tmp9            // E/2
1762         fnmsubs     tmp2, tmp2, tmp4, tmp8      // (3-E*E*S)
1763         fmuls       tmp5, tmp2, tmp3            // (E/2)(3-E*E*S)
1764 
1765         // cT = [c][c]
1766         ps_merge00  cT, cT, cT
1767 
1768         // tmp0 = [nx = x/sqrt(S)][ny = y/sqrt(S)]
1769         ps_muls0    tmp0, tmp0, tmp5
1770         // tmp1 = [nz = z/sqrt(S)][nz = z/sqrt(S)]
1771         ps_muls0    tmp1, tmp1, tmp5
1772 
1773         // tmp4 = [t*nx][t*ny]
1774         ps_muls0    tmp4, tmp0, tT
1775         // tmp9 = [s*nx][s*ny]
1776         ps_muls0    tmp9, tmp0, sT
1777         // tmp5 = [t*nz][t*nz]
1778         ps_muls0    tmp5, tmp1, tT
1779 
1780         // tmp3 = [t*nx*ny][t*ny*ny]
1781         ps_muls1    tmp3, tmp4, tmp0
1782         // tmp2 = [t*nx*nx][t*ny*nx]
1783         ps_muls0    tmp2, tmp4, tmp0
1784         // tmp4 = [t*nx*nz][t*ny*nz]
1785         ps_muls0    tmp4, tmp4, tmp1
1786 
1787         // tmp6 = [t*nx*ny-s*nz][t*nx*ny-s*nz]
1788         fnmsubs     tmp6, tmp1, sT, tmp3
1789         // tmp7 = [t*nx*ny+s*nz][t*ny*ny+s*nz]
1790         fmadds      tmp7, tmp1, sT, tmp3
1791 
1792         // tmp0 = [-s*nx][-s*ny]
1793         ps_neg      tmp0, tmp9
1794         // tmp8 = [t*nx*nz+s*ny][0] == [m02][m03]
1795         ps_sum0     tmp8, tmp4, fc0, tmp9
1796         // tmp2 = [t*nx*nx+c][t*nx*ny-s*nz] == [m00][m01]
1797         ps_sum0     tmp2, tmp2, tmp6, cT
1798         // tmp3 = [t*nx*ny+s*nz][t*ny*ny+c] == [m10][m11]
1799         ps_sum1     tmp3, cT, tmp7, tmp3
1800         // tmp6 = [t*ny*nz-s*nx][0] == [m12][m13]
1801         ps_sum0     tmp6, tmp0, fc0 ,tmp4
1802 
1803             // tmp8 [m02][m03] : STORE
1804             psq_st      tmp8, 8(m), 0, 0
1805         // tmp0 = [t*nx*nz-s*ny][t*ny*nz]
1806         ps_sum0     tmp0, tmp4, tmp4, tmp0
1807             // tmp2 [m00][m01] : STORE
1808             psq_st      tmp2, 0(m), 0, 0
1809         // tmp5 = [t*nz*nz][t*nz*nz]
1810         ps_muls0    tmp5, tmp5, tmp1
1811             // tmp3 [m10][m11] : STORE
1812             psq_st      tmp3, 16(m), 0, 0
1813         // tmp4 = [t*nx*nz-s*ny][t*ny*nz+s*nx] == [m20][m21]
1814         ps_sum1     tmp4, tmp9, tmp0, tmp4
1815             // tmp6 [m12][m13] : STORE
1816             psq_st      tmp6, 24(m), 0, 0
1817         // tmp5 = [t*nz*nz+c][0]   == [m22][m23]
1818         ps_sum0     tmp5, tmp5, fc0, cT
1819             // tmp4 [m20][m21] : STORE
1820             psq_st      tmp4, 32(m), 0, 0
1821             // tmp5 [m22][m23] : STORE
1822             psq_st      tmp5, 40(m), 0, 0
1823     }
1824 }
1825 
PSMTXRotAxisRad(Mtx m,const Vec * axis,f32 rad)1826 void PSMTXRotAxisRad(
1827     Mtx             m,
1828     const Vec      *axis,
1829     f32             rad )
1830 {
1831     f32     sinT, cosT;
1832 
1833     sinT = sinf(rad);
1834     cosT = cosf(rad);
1835 
1836     __PSMTXRotAxisRadInternal(m, axis, sinT, cosT);
1837 }
1838 
1839 #endif // GEKKO
1840 
1841 /*---------------------------------------------------------------------*
1842 
1843 Name:           MTXTrans
1844 
1845 Description:    sets a translation matrix.
1846 
1847 
1848 Arguments:       m        matrix to be set
1849 
1850                 xT        x component of translation.
1851 
1852                 yT        y component of translation.
1853 
1854                 zT        z component of translation.
1855 
1856 Return:         none
1857 
1858 *---------------------------------------------------------------------*/
1859 /*---------------------------------------------------------------------*
1860     C version
1861  *---------------------------------------------------------------------*/
C_MTXTrans(Mtx m,f32 xT,f32 yT,f32 zT)1862 void C_MTXTrans ( Mtx m, f32 xT, f32 yT, f32 zT )
1863 {
1864 
1865     ASSERTMSG( (m != 0), MTX_TRANS_1 );
1866 
1867 
1868     m[0][0] = 1.0f;  m[0][1] = 0.0f;  m[0][2] = 0.0f;  m[0][3] =  xT;
1869     m[1][0] = 0.0f;  m[1][1] = 1.0f;  m[1][2] = 0.0f;  m[1][3] =  yT;
1870     m[2][0] = 0.0f;  m[2][1] = 0.0f;  m[2][2] = 1.0f;  m[2][3] =  zT;
1871 
1872 }
1873 
1874 /*---------------------------------------------------------------------*
1875     Paired-Single assembler version
1876  *---------------------------------------------------------------------*
1877                 Note that this performs NO error checking.
1878  *---------------------------------------------------------------------*/
1879 #ifdef  GEKKO
PSMTXTrans(register Mtx m,register f32 xT,register f32 yT,register f32 zT)1880 void PSMTXTrans(
1881     register Mtx m,
1882     register f32 xT,
1883     register f32 yT,
1884     register f32 zT
1885 )
1886 {
1887     register f32 c0 = 0.0F;
1888     register f32 c1 = 1.0F;
1889 
1890     asm
1891     {
1892         stfs        xT,     12(m)
1893         stfs        yT,     28(m)
1894         psq_st      c0,      4(m), 0, 0
1895         psq_st      c0,     32(m), 0, 0
1896         stfs        c0,     16(m)
1897         stfs        c1,     20(m)
1898         stfs        c0,     24(m)
1899         stfs        c1,     40(m)
1900         stfs        zT,     44(m)
1901         stfs        c1,      0(m)
1902     }
1903 }
1904 #endif  // GEKKO
1905 
1906 /*---------------------------------------------------------------------*
1907 
1908 Name:           MTXTransApply
1909 
1910 Description:    This function performs the operation equivalent to
1911                 MTXTrans + MTXConcat.
1912 
1913 
1914 Arguments:      src       matrix to be operated.
1915 
1916                 dst       resultant matrix from concat.
1917 
1918                 xT        x component of translation.
1919 
1920                 yT        y component of translation.
1921 
1922                 zT        z component of translation.
1923 
1924 Return:         none
1925 
1926 *---------------------------------------------------------------------*/
1927 /*---------------------------------------------------------------------*
1928     C version
1929  *---------------------------------------------------------------------*/
C_MTXTransApply(const Mtx src,Mtx dst,f32 xT,f32 yT,f32 zT)1930 void C_MTXTransApply ( const Mtx src, Mtx dst, f32 xT, f32 yT, f32 zT )
1931 {
1932     ASSERTMSG( (src != 0), MTX_TRANSAPPLY_1 );
1933     ASSERTMSG( (dst != 0), MTX_TRANSAPPLY_1 );
1934 
1935     if ( src != dst )
1936     {
1937         dst[0][0] = src[0][0];    dst[0][1] = src[0][1];    dst[0][2] = src[0][2];
1938         dst[1][0] = src[1][0];    dst[1][1] = src[1][1];    dst[1][2] = src[1][2];
1939         dst[2][0] = src[2][0];    dst[2][1] = src[2][1];    dst[2][2] = src[2][2];
1940     }
1941 
1942     dst[0][3] = src[0][3] + xT;
1943     dst[1][3] = src[1][3] + yT;
1944     dst[2][3] = src[2][3] + zT;
1945 }
1946 
1947 /*---------------------------------------------------------------------*
1948     Paired-Single assembler version
1949  *---------------------------------------------------------------------*
1950                 Note that this performs NO error checking.
1951  *---------------------------------------------------------------------*/
1952 #ifdef  GEKKO
PSMTXTransApply(const register Mtx src,register Mtx dst,register f32 xT,register f32 yT,register f32 zT)1953 asm void PSMTXTransApply(
1954     const register Mtx src,
1955     register Mtx dst,
1956     register f32 xT,
1957     register f32 yT,
1958     register f32 zT )
1959 {
1960     nofralloc;
1961     psq_l       fp4, 0(src),        0, 0;
1962     frsp        xT, xT;                     // to make sure xT = single precision
1963     psq_l       fp5, 8(src),        0, 0;
1964     frsp        yT, yT;                     // to make sure yT = single precision
1965     psq_l       fp7, 24(src),       0, 0;
1966     frsp        zT, zT;                     // to make sure zT = single precision
1967     psq_l       fp8, 40(src),       0, 0;
1968     psq_st      fp4, 0(dst),        0, 0;
1969     ps_sum1     fp5, xT, fp5, fp5;
1970     psq_l       fp6, 16(src),       0, 0;
1971     psq_st      fp5, 8(dst),        0, 0;
1972     ps_sum1     fp7, yT, fp7, fp7;
1973     psq_l       fp9, 32(src),       0, 0;
1974     psq_st      fp6, 16(dst),       0, 0;
1975     ps_sum1     fp8, zT, fp8, fp8;
1976     psq_st      fp7, 24(dst),       0, 0;
1977     psq_st      fp9, 32(dst),       0, 0;
1978     psq_st      fp8, 40(dst),       0, 0;
1979     blr;
1980 }
1981 #endif  // GEKKO
1982 
1983 
1984 /*---------------------------------------------------------------------*
1985 
1986 Name:            MTXScale
1987 
1988 Description:     sets a scaling matrix.
1989 
1990 
1991 Arguments:       m        matrix to be set
1992 
1993                 xS        x scale factor.
1994 
1995                 yS        y scale factor.
1996 
1997                 zS        z scale factor.
1998 
1999 Return:         none
2000 
2001  *---------------------------------------------------------------------*/
2002 /*---------------------------------------------------------------------*
2003     C version
2004  *---------------------------------------------------------------------*/
C_MTXScale(Mtx m,f32 xS,f32 yS,f32 zS)2005 void C_MTXScale ( Mtx m, f32 xS, f32 yS, f32 zS )
2006 {
2007     ASSERTMSG( (m != 0), MTX_SCALE_1 );
2008 
2009 
2010     m[0][0] = xS;    m[0][1] = 0.0f;  m[0][2] = 0.0f;  m[0][3] = 0.0f;
2011     m[1][0] = 0.0f;  m[1][1] = yS;    m[1][2] = 0.0f;  m[1][3] = 0.0f;
2012     m[2][0] = 0.0f;  m[2][1] = 0.0f;  m[2][2] = zS;    m[2][3] = 0.0f;
2013 }
2014 
2015 /*---------------------------------------------------------------------*
2016     Paired-Single assembler version
2017  *---------------------------------------------------------------------*
2018                 Note that this performs NO error checking.
2019  *---------------------------------------------------------------------*/
2020 #ifdef  GEKKO
PSMTXScale(register Mtx m,register f32 xS,register f32 yS,register f32 zS)2021 void PSMTXScale(
2022     register Mtx m,
2023     register f32 xS,
2024     register f32 yS,
2025     register f32 zS
2026 )
2027 {
2028     register f32 c0 = 0.0F;
2029 
2030     asm
2031     {
2032         stfs        xS,      0(m)
2033         psq_st      c0,      4(m), 0, 0
2034         psq_st      c0,     12(m), 0, 0
2035         stfs        yS,     20(m)
2036         psq_st      c0,     24(m), 0, 0
2037         psq_st      c0,     32(m), 0, 0
2038         stfs        zS,     40(m)
2039         stfs        c0,     44(m)
2040     }
2041 }
2042 #endif  // GEKKO
2043 
2044 /*---------------------------------------------------------------------*
2045 
2046 Name:           MTXScaleApply
2047 
2048 Description:    This function performs the operation equivalent to
2049                 MTXScale + MTXConcat
2050 
2051 Arguments:      src       matrix to be operated.
2052 
2053                 dst       resultant matrix from concat.
2054 
2055                 xS        x scale factor.
2056 
2057                 yS        y scale factor.
2058 
2059                 zS        z scale factor.
2060 
2061 Return:         none
2062 
2063  *---------------------------------------------------------------------*/
2064 /*---------------------------------------------------------------------*
2065     C version
2066  *---------------------------------------------------------------------*/
C_MTXScaleApply(const Mtx src,Mtx dst,f32 xS,f32 yS,f32 zS)2067 void C_MTXScaleApply ( const Mtx src, Mtx dst, f32 xS, f32 yS, f32 zS )
2068 {
2069     ASSERTMSG( (src != 0), MTX_SCALEAPPLY_1 );
2070     ASSERTMSG( (dst != 0), MTX_SCALEAPPLY_2 );
2071 
2072     dst[0][0] = src[0][0] * xS;     dst[0][1] = src[0][1] * xS;
2073     dst[0][2] = src[0][2] * xS;     dst[0][3] = src[0][3] * xS;
2074 
2075     dst[1][0] = src[1][0] * yS;     dst[1][1] = src[1][1] * yS;
2076     dst[1][2] = src[1][2] * yS;     dst[1][3] = src[1][3] * yS;
2077 
2078     dst[2][0] = src[2][0] * zS;     dst[2][1] = src[2][1] * zS;
2079     dst[2][2] = src[2][2] * zS;     dst[2][3] = src[2][3] * zS;
2080 }
2081 
2082 /*---------------------------------------------------------------------*
2083     Paired-Single assembler version
2084  *---------------------------------------------------------------------*
2085                 Note that this performs NO error checking.
2086  *---------------------------------------------------------------------*/
2087 #ifdef  GEKKO
PSMTXScaleApply(const register Mtx src,register Mtx dst,register f32 xS,register f32 yS,register f32 zS)2088 asm void PSMTXScaleApply (
2089     const register Mtx src,
2090     register Mtx dst,
2091     register f32 xS,
2092     register f32 yS,
2093     register f32 zS )
2094 {
2095     nofralloc;
2096     frsp        xS, xS;                     // to make sure xS = single precision
2097     psq_l       fp4, 0(src),        0, 0;
2098     frsp        yS, yS;                     // to make sure yS = single precision
2099     psq_l       fp5, 8(src),        0, 0;
2100     frsp        zS, zS;                     // to make sure zS = single precision
2101     ps_muls0    fp4, fp4, xS;
2102     psq_l       fp6, 16(src),       0, 0;
2103     ps_muls0    fp5, fp5, xS;
2104     psq_l       fp7, 24(src),       0, 0;
2105     ps_muls0    fp6, fp6, yS;
2106     psq_l       fp8, 32(src),       0, 0;
2107     psq_st      fp4, 0(dst),        0, 0;
2108     ps_muls0    fp7, fp7, yS;
2109     psq_l       fp2, 40(src),       0, 0;
2110     psq_st      fp5, 8(dst),        0, 0;
2111     ps_muls0    fp8, fp8, zS;
2112     psq_st      fp6, 16(dst),       0, 0;
2113     ps_muls0    fp2, fp2, zS;
2114     psq_st      fp7, 24(dst),       0, 0;
2115     psq_st      fp8, 32(dst),       0, 0;
2116     psq_st      fp2, 40(dst),       0, 0;
2117     blr;
2118 }
2119 #endif  // GEKKO
2120 
2121 /*---------------------------------------------------------------------*
2122 
2123 Name:            MTXQuat
2124 
2125 Description:     sets a rotation matrix from a quaternion.
2126 
2127 
2128 Arguments:       m        matrix to be set
2129 
2130                  q        ptr to quaternion.
2131 
2132 Return:          none
2133 
2134  *---------------------------------------------------------------------*/
2135 /*---------------------------------------------------------------------*
2136     C version
2137  *---------------------------------------------------------------------*/
C_MTXQuat(Mtx m,const Quaternion * q)2138 void C_MTXQuat ( Mtx m, const Quaternion *q )
2139 {
2140 
2141     f32 s,xs,ys,zs,wx,wy,wz,xx,xy,xz,yy,yz,zz;
2142 
2143 
2144     ASSERTMSG( (m != 0),                         MTX_QUAT_1     );
2145     ASSERTMSG( (q != 0),                         MTX_QUAT_2     );
2146     ASSERTMSG( ( q->x || q->y || q->z || q->w ), MTX_QUAT_3     );
2147 
2148 
2149     s = 2.0f / ( (q->x * q->x) + (q->y * q->y) + (q->z * q->z) + (q->w * q->w) );
2150 
2151     xs = q->x *  s;     ys = q->y *  s;  zs = q->z *  s;
2152     wx = q->w * xs;     wy = q->w * ys;  wz = q->w * zs;
2153     xx = q->x * xs;     xy = q->x * ys;  xz = q->x * zs;
2154     yy = q->y * ys;     yz = q->y * zs;  zz = q->z * zs;
2155 
2156 
2157     m[0][0] = 1.0f - (yy + zz);
2158     m[0][1] = xy   - wz;
2159     m[0][2] = xz   + wy;
2160     m[0][3] = 0.0f;
2161 
2162     m[1][0] = xy   + wz;
2163     m[1][1] = 1.0f - (xx + zz);
2164     m[1][2] = yz   - wx;
2165     m[1][3] = 0.0f;
2166 
2167     m[2][0] = xz   - wy;
2168     m[2][1] = yz   + wx;
2169     m[2][2] = 1.0f - (xx + yy);
2170     m[2][3] = 0.0f;
2171 
2172 }
2173 
2174 /*---------------------------------------------------------------------*
2175     Paired-Single assembler version
2176  *---------------------------------------------------------------------*
2177                 Note that this performs NO error checking.
2178  *---------------------------------------------------------------------*/
2179 #ifdef GEKKO
PSMTXQuat(register Mtx m,const register Quaternion * q)2180 void PSMTXQuat ( register Mtx m, const register Quaternion *q )
2181 {
2182     register f32    c_zero, c_one, c_two, scale;
2183     register f32    tmp0, tmp1, tmp2, tmp3, tmp4;
2184     register f32    tmp5, tmp6, tmp7, tmp8, tmp9;
2185 
2186     c_one = 1.0F;
2187 
2188     asm
2189     {
2190         // tmp0 = [qx][qy] : LOAD
2191         psq_l       tmp0, 0(q), 0, 0
2192         // tmp1 = [qz][qw] : LOAD
2193         psq_l       tmp1, 8(q), 0, 0
2194         // c_zero = [0.0F][0.0F]
2195         fsubs       c_zero, c_one, c_one
2196         // c_two  = [2.0F][2.0F]
2197         fadds       c_two, c_one, c_one
2198         // tmp2 = [qx*qx][qy*qy]
2199         ps_mul      tmp2, tmp0, tmp0
2200         // tmp5 = [qy][qx]
2201         ps_merge10  tmp5, tmp0, tmp0
2202         // tmp4 = [qx*qx+qz*qz][qy*qy+qw*qw]
2203         ps_madd     tmp4, tmp1, tmp1, tmp2
2204         // tmp3 = [qz*qz][qw*qw]
2205         ps_mul      tmp3, tmp1, tmp1
2206             // scale = [qx*qx+qy*qy+qz*qz+qw*qw][?]
2207             ps_sum0     scale, tmp4, tmp4, tmp4
2208         // tmp7 = [qy*qw][qx*qw]
2209         ps_muls1    tmp7, tmp5, tmp1
2210             // Newton-Rapson refinment (1/X) : E' = 2E-X*E*E
2211             // tmp9 = [E = Est.(1/X)]
2212             fres        tmp9, scale
2213         // tmp4 = [qx*qx+qz*qz][qy*qy+qz*qz]
2214         ps_sum1     tmp4, tmp3, tmp4, tmp2
2215             // scale = [2-X*E]
2216             ps_nmsub    scale, scale, tmp9, c_two
2217         // tmp6 = [qz*qw][?]
2218         ps_muls1    tmp6, tmp1, tmp1
2219             // scale = [E(2-scale*E) = E']
2220             ps_mul      scale, tmp9, scale
2221         // tmp2 = [qx*qx+qy*qy]
2222         ps_sum0     tmp2, tmp2, tmp2, tmp2
2223             // scale = [s = 2E' = 2.0F/(qx*qx+qy*qy+qz*qz+qw*qw)]
2224             fmuls       scale, scale, c_two
2225         // tmp8 = [qx*qy+qz*qw][?]
2226         ps_madd     tmp8, tmp0, tmp5, tmp6
2227         // tmp6 = [qx*qy-qz*qw][?]
2228         ps_msub     tmp6, tmp0, tmp5, tmp6
2229             // c_zero [m03] : STORE
2230             psq_st      c_zero, 12(m), 1, 0
2231         // tmp2 = [1-s(qx*qx+qy*qy)]   : [m22]
2232         ps_nmsub    tmp2, tmp2, scale, c_one
2233         // tmp4 = [1-s(qx*qx+qz*qz)][1-s(qy*qy+qz*qz)] : [m11][m00]
2234         ps_nmsub    tmp4, tmp4, scale, c_one
2235             // c_zero [m23] : STORE
2236             psq_st      c_zero, 44(m), 1, 0
2237         // tmp8 = [s(qx*qy+qz*qw)][?]  : [m10]
2238         ps_mul      tmp8, tmp8, scale
2239         // tmp6 = [s(qx*qy-qz*qw)][?]  : [m01]
2240         ps_mul      tmp6, tmp6, scale
2241             // tmp2 [m22] : STORE
2242             psq_st      tmp2, 40(m), 1, 0
2243         // tmp5 = [qx*qz+qy*qw][qy*qz+qx*qw]
2244         ps_madds0   tmp5, tmp0, tmp1, tmp7
2245         // tmp1 = [m10][m11]
2246         ps_merge00  tmp1, tmp8, tmp4
2247         // tmp7 = [qx*qz-qy*qw][qy*qz-qx*qw]
2248         ps_nmsub    tmp7, tmp7, c_two, tmp5
2249         // tmp0 = [m00][m01]
2250         ps_merge10  tmp0, tmp4, tmp6
2251             // tmp1 [m10][m11] : STORE
2252             psq_st      tmp1, 16(m), 0, 0
2253         // tmp5 = [s(qx*qz+qy*qw)][s(qy*qz+qx*qw)] : [m02][m21]
2254         ps_mul      tmp5, tmp5, scale
2255         // tmp7 = [s(qx*qz-qy*qw)][s(qy*qz-qx*qw)] : [m20][m12]
2256         ps_mul      tmp7, tmp7, scale
2257             // tmp0 [m00][m01] : STORE
2258             psq_st      tmp0,  0(m), 0, 0
2259             // tmp5 [m02] : STORE
2260             psq_st      tmp5,  8(m), 1, 0
2261         // tmp3 = [m12][m13]
2262         ps_merge10  tmp3, tmp7, c_zero
2263         // tmp9 = [m20][m21]
2264         ps_merge01  tmp9, tmp7, tmp5
2265             // tmp3 [m12][m13] : STORE
2266             psq_st      tmp3, 24(m), 0, 0
2267             // tmp9 [m20][m21] : STORE
2268             psq_st      tmp9, 32(m), 0, 0
2269 
2270     }
2271 }
2272 #endif // GEKKO
2273 
2274 /*---------------------------------------------------------------------*
2275 
2276 Name:           MTXReflect
2277 
2278 Description:    reflect a rotation matrix with respect to a plane.
2279 
2280 
2281 Arguments:      m        matrix to be set
2282 
2283                 p        point on the planar reflector.
2284 
2285                 n       normal of the planar reflector.
2286 
2287 Return:         none
2288 
2289  *---------------------------------------------------------------------*/
2290 /*---------------------------------------------------------------------*
2291     C version
2292  *---------------------------------------------------------------------*/
C_MTXReflect(Mtx m,const Vec * p,const Vec * n)2293 void C_MTXReflect ( Mtx m, const Vec *p, const Vec *n )
2294 {
2295     f32 vxy, vxz, vyz, pdotn;
2296 
2297     vxy   = -2.0f * n->x * n->y;
2298     vxz   = -2.0f * n->x * n->z;
2299     vyz   = -2.0f * n->y * n->z;
2300     pdotn = 2.0f * C_VECDotProduct(p, n);
2301 
2302     m[0][0] = 1.0f - 2.0f * n->x * n->x;
2303     m[0][1] = vxy;
2304     m[0][2] = vxz;
2305     m[0][3] = pdotn * n->x;
2306 
2307     m[1][0] = vxy;
2308     m[1][1] = 1.0f - 2.0f * n->y * n->y;
2309     m[1][2] = vyz;
2310     m[1][3] = pdotn * n->y;
2311 
2312     m[2][0] = vxz;
2313     m[2][1] = vyz;
2314     m[2][2] = 1.0f - 2.0f * n->z * n->z;
2315     m[2][3] = pdotn * n->z;
2316 }
2317 
2318 /*---------------------------------------------------------------------*
2319     Paired-Single assembler version
2320  *---------------------------------------------------------------------*/
2321 #ifdef GEKKO
PSMTXReflect(register Mtx m,const register Vec * p,const register Vec * n)2322 void PSMTXReflect( register Mtx m, const register Vec *p, const register Vec *n )
2323 {
2324     register f32    c_one = 1.0F;
2325     register f32    vn_xy, vn_z1, n2vn_xy, n2vn_z1, pdotn;
2326     register f32    tmp0, tmp1, tmp2, tmp3;
2327     register f32    tmp4, tmp5, tmp6, tmp7;
2328 
2329     asm
2330     {
2331             // vn_z1 = [nz][1.0F] : LOAD
2332             psq_l       vn_z1, 8(n), 1, 0
2333             // vn_xy = [nx][ny]   : LOAD
2334             psq_l       vn_xy, 0(n), 0, 0
2335 
2336             // tmp0 = [px][py]   : LOAD
2337             psq_l       tmp0,  0(p), 0, 0
2338         // n2vn_z1 = [-2nz][-2.0F]
2339         ps_nmadd    n2vn_z1, vn_z1, c_one, vn_z1
2340             // tmp1 = [pz][1.0F] : LOAD
2341             psq_l       tmp1,  8(p), 1, 0
2342         // n2vn_xy = [-2nx][-2ny]
2343         ps_nmadd    n2vn_xy, vn_xy, c_one, vn_xy
2344 
2345         // tmp4 = [-2nx*nz][-2ny*nz]   : [m20][m21]
2346         ps_muls0    tmp4, vn_xy, n2vn_z1
2347         // pdotn = [-2(px*nx)][-2(py*ny)]
2348         ps_mul      pdotn, n2vn_xy, tmp0
2349         // tmp2 = [-2nx*nx][-2nx*ny]
2350         ps_muls0    tmp2, vn_xy, n2vn_xy
2351         // pdotn = [-2(px*nx+py*ny)][?]
2352         ps_sum0     pdotn, pdotn, pdotn, pdotn
2353         // tmp3 = [-2nx*ny][-2ny*ny]
2354         ps_muls1    tmp3, vn_xy, n2vn_xy
2355             // tmp4 = [m20][m21] : STORE
2356             psq_st      tmp4, 32(m), 0, 0
2357         // tmp2 = [1-2nx*nx][-2nx*ny]  : [m00][m01]
2358         ps_sum0     tmp2, tmp2, tmp2, c_one
2359         // pdotn = [2(px*nx+py*ny+pz*nz)][?]
2360         ps_nmadd    pdotn, n2vn_z1, tmp1, pdotn
2361         // tmp3 = [-2nx*ny][1-2ny*ny]  : [m10][m11]
2362         ps_sum1     tmp3, c_one, tmp3, tmp3
2363             // tmp2 = [m00][m01] : STORE
2364             psq_st      tmp2,  0(m), 0, 0
2365         // tmp5 = [pdotn*nx][pdotn*ny]
2366         ps_muls0    tmp5, vn_xy, pdotn
2367         // tmp6 = [-2nz][pdotn]
2368         ps_merge00  tmp6, n2vn_z1, pdotn
2369             // tmp3 = [m10][m11] : STORE
2370             psq_st      tmp3, 16(m), 0, 0
2371 
2372         // tmp7 = [-2nx*nz][pdotn*nx]  : [m02][m03]
2373         ps_merge00  tmp7, tmp4, tmp5
2374         // tmp6 = [-2nz*nz][pdotn*nz]
2375         ps_muls0    tmp6, tmp6, vn_z1
2376         // tmp5 = [-2ny*nz][pdotn*ny]  : [m12][m13]
2377         ps_merge11  tmp5, tmp4, tmp5
2378             // tmp7 = [m02][m03] : STORE
2379             psq_st      tmp7,  8(m), 0, 0
2380         // tmp6 = [1-2nz*nz][pdotn*nz] : [m22][m23]
2381         ps_sum0     tmp6, tmp6, tmp6, c_one
2382             // tmp5 = [m12][m13] : STORE
2383             psq_st      tmp5, 24(m), 0, 0
2384             // tmp6 = [m22][m23] : STORE
2385             psq_st      tmp6, 40(m), 0, 0
2386     }
2387 }
2388 #endif // GEKKO
2389 
2390 /*---------------------------------------------------------------------*
2391 
2392 
2393 
2394 
2395 
2396                              VIEW SECTION
2397 
2398 
2399 
2400 
2401 
2402 *---------------------------------------------------------------------*/
2403 
2404 
2405 
2406 
2407 
2408 
2409 /*---------------------------------------------------------------------*
2410 
2411 Name:           MTXLookAt
2412 
2413 Description:    compute a matrix to transform points to camera coordinates.
2414 
2415 
2416 Arguments:      m        matrix to be set
2417 
2418                 camPos   camera position.
2419 
2420                 camUp    camera 'up' direction.
2421 
2422                 target   camera aim point.
2423 
2424 
2425 Return:         none
2426 
2427 *---------------------------------------------------------------------*/
2428 /*---------------------------------------------------------------------*
2429     C version
2430  *---------------------------------------------------------------------*/
C_MTXLookAt(Mtx m,const Point3d * camPos,const Vec * camUp,const Point3d * target)2431 void C_MTXLookAt ( Mtx m, const Point3d *camPos, const Vec *camUp, const Point3d *target )
2432 {
2433 
2434     Vec vLook,vRight,vUp;
2435 
2436 
2437     ASSERTMSG( (m != 0),      MTX_LOOKAT_1    );
2438     ASSERTMSG( (camPos != 0), MTX_LOOKAT_2    );
2439     ASSERTMSG( (camUp  != 0), MTX_LOOKAT_3    );
2440     ASSERTMSG( (target != 0), MTX_LOOKAT_4    );
2441 
2442 
2443     // compute unit target vector
2444     // use negative value to look down (-Z) axis
2445     vLook.x = camPos->x - target->x;
2446     vLook.y = camPos->y - target->y;
2447     vLook.z = camPos->z - target->z;
2448     VECNormalize( &vLook,&vLook );
2449 
2450 
2451     // vRight = camUp x vLook
2452     VECCrossProduct    ( camUp, &vLook, &vRight );
2453     VECNormalize( &vRight,&vRight );
2454 
2455 
2456     // vUp = vLook x vRight
2457     VECCrossProduct( &vLook, &vRight, &vUp );
2458     // Don't need to normalize vUp since it should already be unit length
2459     // VECNormalize( &vUp, &vUp );
2460 
2461 
2462     m[0][0] = vRight.x;
2463     m[0][1] = vRight.y;
2464     m[0][2] = vRight.z;
2465     m[0][3] = -( camPos->x * vRight.x + camPos->y * vRight.y + camPos->z * vRight.z );
2466 
2467     m[1][0] = vUp.x;
2468     m[1][1] = vUp.y;
2469     m[1][2] = vUp.z;
2470     m[1][3] = -( camPos->x * vUp.x + camPos->y * vUp.y + camPos->z * vUp.z );
2471 
2472     m[2][0] = vLook.x;
2473     m[2][1] = vLook.y;
2474     m[2][2] = vLook.z;
2475     m[2][3] = -( camPos->x * vLook.x + camPos->y * vLook.y + camPos->z * vLook.z );
2476 
2477 
2478 }
2479 
2480 /*---------------------------------------------------------------------*
2481 
2482 
2483 
2484 
2485 
2486                        TEXTURE PROJECTION SECTION
2487 
2488 
2489 
2490 
2491 
2492 *---------------------------------------------------------------------*/
2493 
2494 
2495 /*---------------------------------------------------------------------*
2496 
2497 Name:           MTXLightFrustum
2498 
2499 Description:    Compute a 3x4 projection matrix for texture projection
2500 
2501 
2502 Arguments:      m        3x4 matrix to be set
2503 
2504                 t        top coord. of view volume at the near clipping plane
2505 
2506                 b        bottom coord of view volume at the near clipping plane
2507 
2508                 l        left coord. of view volume at near clipping plane
2509 
2510                 r        right coord. of view volume at near clipping plane
2511 
2512                 n        positive distance from camera to near clipping plane
2513 
2514                 scaleS   scale in the S direction for projected coordinates
2515                          (usually 0.5)
2516 
2517                 scaleT   scale in the T direction for projected coordinates
2518                          (usually 0.5)
2519 
2520                 transS   translate in the S direction for projected coordinates
2521                          (usually 0.5)
2522 
2523                 transT   translate in the T direction for projected coordinates
2524                          (usually 0.5)
2525 
2526 
2527 Return:         none.
2528 
2529  *---------------------------------------------------------------------*/
2530 /*---------------------------------------------------------------------*
2531     C version
2532  *---------------------------------------------------------------------*/
C_MTXLightFrustum(Mtx m,float t,float b,float l,float r,float n,float scaleS,float scaleT,float transS,float transT)2533 void C_MTXLightFrustum  ( Mtx m, float t, float b, float l, float r, float n,
2534                           float scaleS, float scaleT, float transS,
2535                           float transT )
2536 {
2537     f32 tmp;
2538 
2539 
2540     ASSERTMSG( (m != 0),  MTX_LIGHT_FRUSTUM_1  );
2541     ASSERTMSG( (t != b),  MTX_LIGHT_FRUSTUM_2  );
2542     ASSERTMSG( (l != r),  MTX_LIGHT_FRUSTUM_3  );
2543 
2544 
2545     // NOTE: Be careful about "l" vs. "1" below!!!
2546 
2547     tmp     =  1.0f / (r - l);
2548     m[0][0] =  ((2*n) * tmp) * scaleS;
2549     m[0][1] =  0.0f;
2550     m[0][2] =  (((r + l) * tmp) * scaleS) - transS;
2551     m[0][3] =  0.0f;
2552 
2553     tmp     =  1.0f / (t - b);
2554     m[1][0] =  0.0f;
2555     m[1][1] =  ((2*n) * tmp) * scaleT;
2556     m[1][2] =  (((t + b) * tmp) * scaleT) - transT;
2557     m[1][3] =  0.0f;
2558 
2559     m[2][0] =  0.0f;
2560     m[2][1] =  0.0f;
2561     m[2][2] = -1.0f;
2562     m[2][3] =  0.0f;
2563 }
2564 
2565 /*---------------------------------------------------------------------*
2566 
2567 Name:           MTXLightPerspective
2568 
2569 Description:    compute a 3x4 perspective projection matrix from
2570                 field of view and aspect ratio for texture projection.
2571 
2572 
2573 Arguments:      m        3x4 matrix to be set
2574 
2575                 fovy     total field of view in in degrees in the YZ plane
2576 
2577                 aspect   ratio of view window width:height (X / Y)
2578 
2579                 scaleS   scale in the S direction for projected coordinates
2580                          (usually 0.5)
2581 
2582                 scaleT   scale in the T direction for projected coordinates
2583                          (usually 0.5)
2584 
2585                 transS   translate in the S direction for projected coordinates
2586                          (usually 0.5)
2587 
2588                 transT   translate in the T direction for projected coordinates
2589                          (usually 0.5)
2590 
2591 
2592 Return:         none
2593 
2594  *---------------------------------------------------------------------*/
2595 /*---------------------------------------------------------------------*
2596     C version
2597  *---------------------------------------------------------------------*/
C_MTXLightPerspective(Mtx m,f32 fovY,f32 aspect,float scaleS,float scaleT,float transS,float transT)2598 void C_MTXLightPerspective  ( Mtx m, f32 fovY, f32 aspect, float scaleS,
2599                               float scaleT, float transS, float transT )
2600 {
2601     f32 angle;
2602     f32 cot;
2603 
2604     ASSERTMSG( (m != 0),                            MTX_LIGHT_PERSPECTIVE_1  );
2605     ASSERTMSG( ( (fovY > 0.0) && ( fovY < 180.0) ), MTX_LIGHT_PERSPECTIVE_2  );
2606     ASSERTMSG( (aspect != 0),                       MTX_LIGHT_PERSPECTIVE_3  );
2607 
2608     // find the cotangent of half the (YZ) field of view
2609     angle = fovY * 0.5f;
2610     angle = MTXDegToRad( angle );
2611 
2612     cot = 1.0f / tanf(angle);
2613 
2614     m[0][0] =    (cot / aspect) * scaleS;
2615     m[0][1] =    0.0f;
2616     m[0][2] =    -transS;
2617     m[0][3] =    0.0f;
2618 
2619     m[1][0] =    0.0f;
2620     m[1][1] =    cot * scaleT;
2621     m[1][2] =    -transT;
2622     m[1][3] =    0.0f;
2623 
2624     m[2][0] =    0.0f;
2625     m[2][1] =    0.0f;
2626     m[2][2] =   -1.0f;
2627     m[2][3] =    0.0f;
2628 }
2629 
2630 /*---------------------------------------------------------------------*
2631 
2632 Name:           MTXLightOrtho
2633 
2634 Description:    compute a 3x4 orthographic projection matrix.
2635 
2636 
2637 Arguments:      m        matrix to be set
2638 
2639                 t        top coord. of parallel view volume
2640 
2641                 b        bottom coord of parallel view volume
2642 
2643                 l        left coord. of parallel view volume
2644 
2645                 r        right coord. of parallel view volume
2646 
2647                 scaleS   scale in the S direction for projected coordinates
2648                          (usually 0.5)
2649 
2650                 scaleT   scale in the T direction for projected coordinates
2651                          (usually 0.5)
2652 
2653                 transS   translate in the S direction for projected coordinates
2654                          (usually 0.5)
2655 
2656                 transT   translate in the T direction for projected coordinates
2657                          (usually 0.5)
2658 
2659 
2660 Return:         none
2661 
2662  *---------------------------------------------------------------------*/
2663 /*---------------------------------------------------------------------*
2664     C version
2665  *---------------------------------------------------------------------*/
C_MTXLightOrtho(Mtx m,f32 t,f32 b,f32 l,f32 r,float scaleS,float scaleT,float transS,float transT)2666 void C_MTXLightOrtho ( Mtx m, f32 t, f32 b, f32 l, f32 r, float scaleS,
2667                               float scaleT, float transS, float transT )
2668 {
2669     f32 tmp;
2670 
2671 
2672     ASSERTMSG( (m != 0),  MTX_LIGHT_ORTHO_1     );
2673     ASSERTMSG( (t != b),  MTX_LIGHT_ORTHO_2     );
2674     ASSERTMSG( (l != r),  MTX_LIGHT_ORTHO_3     );
2675 
2676 
2677     // NOTE: Be careful about "l" vs. "1" below!!!
2678 
2679     tmp     =  1.0f / (r - l);
2680     m[0][0] =  (2.0f * tmp * scaleS);
2681     m[0][1] =  0.0f;
2682     m[0][2] =  0.0f;
2683     m[0][3] =  ((-(r + l) * tmp) * scaleS) + transS;
2684 
2685     tmp     =  1.0f / (t - b);
2686     m[1][0] =  0.0f;
2687     m[1][1] =  (2.0f * tmp) * scaleT;
2688     m[1][2] =  0.0f;
2689     m[1][3] =  ((-(t + b) * tmp)* scaleT) + transT;
2690 
2691     m[2][0] =  0.0f;
2692     m[2][1] =  0.0f;
2693     m[2][2] =  0.0f;
2694     m[2][3] =  1.0f;
2695 }
2696 
2697 /*===========================================================================*/
2698 
2699 #if ( __MWERKS__ == 0x00004100 )
2700 #pragma defer_codegen reset
2701 #endif
2702 
2703