1 /*---------------------------------------------------------------------------*
2   Project: matrix vector Library
3   File:    mtx44.c
4 
5   Copyright 1998-2006 Nintendo.  All rights reserved.
6 
7   These coded instructions, statements, and computer programs contain
8   proprietary information of Nintendo of America Inc. and/or Nintendo
9   Company Ltd., and are protected by Federal copyright law.  They may
10   not be disclosed to third parties or copied or duplicated in any form,
11   in whole or in part, without the prior written consent of Nintendo.
12 
13 
14   $Log: mtx44.c,v $
15   Revision 1.4  02/20/2006 04:25:42  mitu
16   changed include path from dolphin/ to revolution/.
17 
18   Revision 1.3  01/11/2006 12:17:58  hirose
19   Removed obsolete flags.
20 
21   Revision 1.2  12/30/2005 06:01:44  hirose
22   Temporary workaround for CW3.0 Alpha3 problem.
23 
24 
25     9     02/04/11 13:10 Hirose
26     const type specifier support. (worked by Hiratsu@IRD)
27 
28     8     02/02/18 9:27 Hirose
29     Workaround for floating-point precision mismatch issue.
30 
31     7     9/18/01 1:38p Hirose
32     Fixed PSMTX44RotRad definition. Corrected wrong argument types.
33 
34     6     8/30/01 10:36p Hirose
35     Added C_MTX44Inverse.
36 
37     5     7/30/01 11:00p Hirose
38     Fixed MAC build errors regarding missing "#ifdef GEKKO".
39 
40     4     7/30/01 10:22p Hirose
41     Added 4x4 model matrix section.
42 
43     3     7/24/01 6:03p Hirose
44 
45     2     7/09/01 11:18p Hirose
46     added general 4x4 matrix functions. (original made by Ohki-san@NTSC.)
47 
48     1     2/22/01 11:54p Hirose
49     This section is moved from mtx.c
50 
51   $NoKeywords: $
52  *---------------------------------------------------------------------------*/
53 
54 #include <math.h>
55 #include <revolution/mtx.h>
56 #include <revolution/mtx/mtx44ext.h>
57 #include "mtxAssert.h"
58 #include "mtx44extAssert.h"
59 
60 static f32 mtxUnit[] = {0.0f, 1.0f, 0.5f, 3.0f};
61 
62 #if ( __MWERKS__ == 0x00004100 )
63 #pragma defer_codegen on    // This will be removed when compiler is fixed.
64 #endif
65 
66 /*---------------------------------------------------------------------*
67 
68 
69 
70 
71 
72                              PROJECTION SECTION
73 
74 
75 
76 
77 
78  *---------------------------------------------------------------------*/
79 
80 
81 /*---------------------------------------------------------------------*
82 
83 Name:           MTXFrustum
84 
85 Description:    compute a 4x4 perspective projection matrix from a
86                 specified view volume.
87 
88 
89 Arguments:      m:        	4x4 matrix to be set
90 
91                 t:        	top coord. of view volume at the near clipping plane
92 
93                 b:        	bottom coord of view volume at the near clipping plane
94 
95                 l:        	left coord. of view volume at near clipping plane
96 
97                 r:        	right coord. of view volume at near clipping plane
98 
99                 n:        	positive distance from camera to near clipping plane
100 
101                 f:        	positive distance from camera to far clipping plane
102 
103 
104 Return:         none
105 
106  *---------------------------------------------------------------------*/
107 /*---------------------------------------------------------------------*
108     C version
109  *---------------------------------------------------------------------*/
C_MTXFrustum(Mtx44 m,f32 t,f32 b,f32 l,f32 r,f32 n,f32 f)110 void C_MTXFrustum ( Mtx44 m, f32 t, f32 b, f32 l, f32 r, f32 n, f32 f )
111 {
112     f32 tmp;
113 
114 
115     ASSERTMSG( (m != 0),  MTX_FRUSTUM_1     );
116     ASSERTMSG( (t != b),  MTX_FRUSTUM_2     );
117     ASSERTMSG( (l != r),  MTX_FRUSTUM_3     );
118     ASSERTMSG( (n != f),  MTX_FRUSTUM_4     );
119 
120 
121     // NOTE: Be careful about "l" vs. "1" below!!!
122 
123     tmp     =  1.0f / (r - l);
124     m[0][0] =  (2*n) * tmp;
125     m[0][1] =  0.0f;
126     m[0][2] =  (r + l) * tmp;
127     m[0][3] =  0.0f;
128 
129     tmp     =  1.0f / (t - b);
130     m[1][0] =  0.0f;
131     m[1][1] =  (2*n) * tmp;
132     m[1][2] =  (t + b) * tmp;
133     m[1][3] =  0.0f;
134 
135     m[2][0] =  0.0f;
136     m[2][1] =  0.0f;
137 
138     tmp     =  1.0f / (f - n);
139 
140     // scale z to (-w, 0) range
141     m[2][2] = -(n) * tmp;
142     m[2][3] = -(f*n) * tmp;
143 
144     m[3][0] =  0.0f;
145     m[3][1] =  0.0f;
146     m[3][2] = -1.0f;
147     m[3][3] =  0.0f;
148 
149 
150 }
151 
152 /*---------------------------------------------------------------------*
153 
154 Name:           MTXPerspective
155 
156 Description:    compute a 4x4 perspective projection matrix from
157                 field of view and aspect ratio.
158 
159 
160 Arguments:      m:       	4x4 matrix to be set
161 
162                 fovy:    	total field of view in in degrees in the YZ plane
163 
164                 aspect:  	ratio of view window width:height (X / Y)
165 
166                 n:       	positive distance from camera to near clipping plane
167 
168                 f:       	positive distance from camera to far clipping plane
169 
170 
171 Return:         none
172 
173  *---------------------------------------------------------------------*/
174 /*---------------------------------------------------------------------*
175     C version
176  *---------------------------------------------------------------------*/
C_MTXPerspective(Mtx44 m,f32 fovY,f32 aspect,f32 n,f32 f)177 void C_MTXPerspective ( Mtx44 m, f32 fovY, f32 aspect, f32 n, f32 f )
178 {
179     f32 angle;
180     f32 cot;
181     f32 tmp;
182 
183 
184     ASSERTMSG( (m != 0),                             MTX_PERSPECTIVE_1    );
185     ASSERTMSG( ( (fovY > 0.0) && ( fovY < 180.0) ),  MTX_PERSPECTIVE_2    );
186     ASSERTMSG( (aspect != 0),                        MTX_PERSPECTIVE_3    );
187 
188 
189     // find the cotangent of half the (YZ) field of view
190     angle = fovY * 0.5f;
191     angle = MTXDegToRad( angle );
192 
193     cot = 1.0f / tanf(angle);
194 
195 
196     m[0][0] =  cot / aspect;
197     m[0][1] =  0.0f;
198     m[0][2] =  0.0f;
199     m[0][3] =  0.0f;
200 
201     m[1][0] =  0.0f;
202     m[1][1] =   cot;
203     m[1][2] =  0.0f;
204     m[1][3] =  0.0f;
205 
206     m[2][0] =  0.0f;
207     m[2][1] =  0.0f;
208 
209     tmp     = 1.0f / (f - n);
210 
211     // scale z to (-w, 0) range
212     m[2][2] = -(n) * tmp;
213     m[2][3] = -(f*n) * tmp;
214 
215     m[3][0] =  0.0f;
216     m[3][1] =  0.0f;
217     m[3][2] = -1.0f;
218     m[3][3] =  0.0f;
219 }
220 
221 /*---------------------------------------------------------------------*
222 
223 Name:           MTXOrtho
224 
225 Description:    compute a 4x4 orthographic projection matrix.
226 
227 
228 Arguments:      m:        	4x4 matrix to be set
229 
230                 t:        	top coord. of parallel view volume
231 
232                 b:        	bottom coord of parallel view volume
233 
234                 l:        	left coord. of parallel view volume
235 
236                 r:        	right coord. of parallel view volume
237 
238                 n:        	positive distance from camera to near clipping plane
239 
240                 f:        	positive distance from camera to far clipping plane
241 
242 
243 Return:         none
244 
245  *---------------------------------------------------------------------*/
246 /*---------------------------------------------------------------------*
247     C version
248  *---------------------------------------------------------------------*/
C_MTXOrtho(Mtx44 m,f32 t,f32 b,f32 l,f32 r,f32 n,f32 f)249 void C_MTXOrtho ( Mtx44 m, f32 t, f32 b, f32 l, f32 r, f32 n, f32 f )
250 {
251     f32 tmp;
252 
253 
254     ASSERTMSG( (m != 0),  MTX_ORTHO_1  );
255     ASSERTMSG( (t != b),  MTX_ORTHO_2  );
256     ASSERTMSG( (l != r),  MTX_ORTHO_3  );
257     ASSERTMSG( (n != f),  MTX_ORTHO_4  );
258 
259 
260     // NOTE: Be careful about "l" vs. "1" below!!!
261 
262     tmp     =  1.0f / (r - l);
263     m[0][0] =  2.0f * tmp;
264     m[0][1] =  0.0f;
265     m[0][2] =  0.0f;
266     m[0][3] = -(r + l) * tmp;
267 
268     tmp     =  1.0f / (t - b);
269     m[1][0] =  0.0f;
270     m[1][1] =  2.0f * tmp;
271     m[1][2] =  0.0f;
272     m[1][3] = -(t + b) * tmp;
273 
274     m[2][0] =  0.0f;
275     m[2][1] =  0.0f;
276 
277     tmp     =  1.0f / (f - n);
278 
279     // scale z to (-1, 0) range
280     m[2][2] = -(1.0f) * tmp;
281     m[2][3] = -(f) * tmp;
282 
283     m[3][0] =  0.0f;
284     m[3][1] =  0.0f;
285     m[3][2] =  0.0f;
286     m[3][3] =  1.0f;
287 }
288 
289 /*---------------------------------------------------------------------*
290 
291 
292 
293 
294 
295                              GENERAL SECTION
296 
297 
298 
299 
300  *---------------------------------------------------------------------*/
301 /* NOTE: Prototypes for these functions are defined in "mtx44ext.h".   */
302 
303 
304 /*---------------------------------------------------------------------*
305 Name:           MTX44Identity
306 
307 Description:    sets a matrix to identity
308 
309 Arguments:      m :  matrix to be set
310 
311 Return:         none
312 
313  *---------------------------------------------------------------------*/
314 /*---------------------------------------------------------------------*
315     C version
316  *---------------------------------------------------------------------*/
C_MTX44Identity(Mtx44 m)317 void C_MTX44Identity( Mtx44 m )
318 {
319 
320     ASSERTMSG( (m != 0), MTX44_IDENTITY_1 );
321 
322 
323     m[0][0] = 1.0f; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = 0.0f;
324 
325     m[1][0] = 0.0f; m[1][1] = 1.0f; m[1][2] = 0.0f; m[1][3] = 0.0f;
326 
327     m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = 1.0f; m[2][3] = 0.0f;
328 
329     m[3][0] = 0.0f; m[3][1] = 0.0f; m[3][2] = 0.0f; m[3][3] = 1.0f;
330 
331 }
332 
333 /*---------------------------------------------------------------------*
334     Paired-Single assembler version
335  *---------------------------------------------------------------------*
336             Note that this performs NO error checking.
337             Actually there is not much performance advantage.
338  *---------------------------------------------------------------------*/
339 #ifdef  GEKKO
PSMTX44Identity(register Mtx44 m)340 void PSMTX44Identity( register Mtx44 m )
341 {
342     register f32 c1 = 1.0F;
343     register f32 c0 = 0.0F;
344 
345     asm
346     {
347         stfs        c1,  0(m);
348         psq_st      c0,  4(m), 0, 0;
349         psq_st      c0, 12(m), 0, 0;
350         stfs        c1, 20(m);
351         psq_st      c0, 24(m), 0, 0;
352         psq_st      c0, 32(m), 0, 0;
353         stfs        c1, 40(m);
354         psq_st      c0, 44(m), 0, 0;
355         psq_st      c0, 52(m), 0, 0;
356         stfs        c1, 60(m);
357     }
358 }
359 #endif // GEKKO
360 
361 /*---------------------------------------------------------------------*
362 Name:           MTX44Copy
363 
364 Description:    copies the contents of one matrix into another
365 
366 Arguments:      src:        	source matrix for copy
367                 dst:        	destination matrix for copy
368 
369 
370 Return:         none
371  *---------------------------------------------------------------------*/
372 /*---------------------------------------------------------------------*
373     C version
374  *---------------------------------------------------------------------*/
C_MTX44Copy(const Mtx44 src,Mtx44 dst)375 void C_MTX44Copy( const Mtx44 src, Mtx44 dst )
376 {
377 
378     ASSERTMSG( (src != 0) , MTX44_COPY_1 );
379     ASSERTMSG( (dst != 0) , MTX44_COPY_2 );
380 
381     if( src == dst )
382     {
383         return;
384     }
385 
386 
387     dst[0][0] = src[0][0]; dst[0][1] = src[0][1]; dst[0][2] = src[0][2]; dst[0][3] = src[0][3];
388 
389     dst[1][0] = src[1][0]; dst[1][1] = src[1][1]; dst[1][2] = src[1][2]; dst[1][3] = src[1][3];
390 
391     dst[2][0] = src[2][0]; dst[2][1] = src[2][1]; dst[2][2] = src[2][2]; dst[2][3] = src[2][3];
392 
393     dst[3][0] = src[3][0]; dst[3][1] = src[3][1]; dst[3][2] = src[3][2]; dst[3][3] = src[3][3];
394 
395 }
396 
397 /*---------------------------------------------------------------------*
398     Paired-Single assembler version
399  *---------------------------------------------------------------------*
400                 Note that this performs NO error checking.
401  *---------------------------------------------------------------------*/
402 #ifdef  GEKKO
PSMTX44Copy(const register Mtx44 src,register Mtx44 dst)403 asm void PSMTX44Copy( const register Mtx44 src, register Mtx44 dst )
404 {
405     nofralloc;
406     psq_l       fp1,  0(src), 0, 0;
407     psq_st      fp1,  0(dst), 0, 0;
408     psq_l       fp1,  8(src), 0, 0;
409     psq_st      fp1,  8(dst), 0, 0;
410     psq_l       fp1, 16(src), 0, 0;
411     psq_st      fp1, 16(dst), 0, 0;
412     psq_l       fp1, 24(src), 0, 0;
413     psq_st      fp1, 24(dst), 0, 0;
414     psq_l       fp1, 32(src), 0, 0;
415     psq_st      fp1, 32(dst), 0, 0;
416     psq_l       fp1, 40(src), 0, 0;
417     psq_st      fp1, 40(dst), 0, 0;
418     psq_l       fp1, 48(src), 0, 0;
419     psq_st      fp1, 48(dst), 0, 0;
420     psq_l       fp1, 56(src), 0, 0;
421     psq_st      fp1, 56(dst), 0, 0;
422     blr;
423 }
424 #endif  // GEKKO
425 
426 
427 /*---------------------------------------------------------------------*
428 Name:           MTX44Concat
429 
430 Description:    concatenates two matrices.
431                 order of operation is A x B = AB.
432                 ok for any of ab == a == b.
433 
434                 saves a MTXCopy operation if ab != to a or b.
435 
436 Arguments:      a:        	first matrix for concat.
437                 b:        	second matrix for concat.
438                 ab:       	resultant matrix from concat.
439 
440 Return:         none
441  *---------------------------------------------------------------------*/
442 /*---------------------------------------------------------------------*
443     C version
444  *---------------------------------------------------------------------*/
C_MTX44Concat(const Mtx44 a,const Mtx44 b,Mtx44 ab)445 void C_MTX44Concat( const Mtx44 a, const Mtx44 b, Mtx44 ab )
446 {
447     Mtx44       mTmp;
448     Mtx44Ptr    m;
449 
450     ASSERTMSG( (a  != 0), MTX44_CONCAT_1 );
451     ASSERTMSG( (b  != 0), MTX44_CONCAT_2 );
452     ASSERTMSG( (ab != 0), MTX44_CONCAT_3 );
453 
454     if( (ab == a) || (ab == b) )
455     {
456         m = mTmp;
457     }
458     else
459     {
460         m = ab;
461     }
462 
463     // compute (a x b) -> m
464 
465     m[0][0] = a[0][0]*b[0][0] + a[0][1]*b[1][0] + a[0][2]*b[2][0] + a[0][3]*b[3][0];
466     m[0][1] = a[0][0]*b[0][1] + a[0][1]*b[1][1] + a[0][2]*b[2][1] + a[0][3]*b[3][1];
467     m[0][2] = a[0][0]*b[0][2] + a[0][1]*b[1][2] + a[0][2]*b[2][2] + a[0][3]*b[3][2];
468     m[0][3] = a[0][0]*b[0][3] + a[0][1]*b[1][3] + a[0][2]*b[2][3] + a[0][3]*b[3][3];
469 
470     m[1][0] = a[1][0]*b[0][0] + a[1][1]*b[1][0] + a[1][2]*b[2][0] + a[1][3]*b[3][0];
471     m[1][1] = a[1][0]*b[0][1] + a[1][1]*b[1][1] + a[1][2]*b[2][1] + a[1][3]*b[3][1];
472     m[1][2] = a[1][0]*b[0][2] + a[1][1]*b[1][2] + a[1][2]*b[2][2] + a[1][3]*b[3][2];
473     m[1][3] = a[1][0]*b[0][3] + a[1][1]*b[1][3] + a[1][2]*b[2][3] + a[1][3]*b[3][3];
474 
475     m[2][0] = a[2][0]*b[0][0] + a[2][1]*b[1][0] + a[2][2]*b[2][0] + a[2][3]*b[3][0];
476     m[2][1] = a[2][0]*b[0][1] + a[2][1]*b[1][1] + a[2][2]*b[2][1] + a[2][3]*b[3][1];
477     m[2][2] = a[2][0]*b[0][2] + a[2][1]*b[1][2] + a[2][2]*b[2][2] + a[2][3]*b[3][2];
478     m[2][3] = a[2][0]*b[0][3] + a[2][1]*b[1][3] + a[2][2]*b[2][3] + a[2][3]*b[3][3];
479 
480     m[3][0] = a[3][0]*b[0][0] + a[3][1]*b[1][0] + a[3][2]*b[2][0] + a[3][3]*b[3][0];
481     m[3][1] = a[3][0]*b[0][1] + a[3][1]*b[1][1] + a[3][2]*b[2][1] + a[3][3]*b[3][1];
482     m[3][2] = a[3][0]*b[0][2] + a[3][1]*b[1][2] + a[3][2]*b[2][2] + a[3][3]*b[3][2];
483     m[3][3] = a[3][0]*b[0][3] + a[3][1]*b[1][3] + a[3][2]*b[2][3] + a[3][3]*b[3][3];
484 
485     // overwrite a or b if needed
486     if(m == mTmp)
487     {
488         C_MTX44Copy( mTmp, ab );
489     }
490 
491 }
492 
493 
494 /*---------------------------------------------------------------------*
495     Paired-Single assembler version
496  *---------------------------------------------------------------------*
497                 Note that this performs NO error checking.
498  *---------------------------------------------------------------------*/
499 #ifdef  GEKKO
PSMTX44Concat(const register Mtx44 a,const register Mtx44 b,register Mtx44 ab)500 asm void PSMTX44Concat(
501     const register Mtx44 a,
502     const register Mtx44 b,
503           register Mtx44 ab
504 )
505 {
506     nofralloc;
507     psq_l       fp0 ,  0(a), 0, 0;          // a00,a01
508     psq_l       fp2 ,  0(b), 0, 0;          // b00,b01
509     ps_muls0    fp6 ,   fp2,  fp0;          // b00a00,b01a00
510     psq_l       fp3 , 16(b), 0, 0;          // b10,b11
511     psq_l       fp4 , 32(b), 0, 0;          // b20,b21
512     ps_madds1   fp6 ,   fp3,  fp0,  fp6;    // b00a00+b10a01,b01a00+b11a01
513     psq_l       fp1 ,  8(a), 0, 0;          // a02,a03
514     psq_l       fp5 , 48(b), 0, 0;          // b30,b31
515 
516     // b00a00+b10a01+b20a02,b01a00+b11a01+b21a02
517     ps_madds0   fp6 ,   fp4,  fp1,  fp6;
518     psq_l       fp0 , 16(a), 0, 0;          // a10,a11
519 
520     // b00a00+b10a01+b20a02+b30a03,b01a00+b11a01+b21a02+b31a03
521     ps_madds1   fp6 ,   fp5,  fp1,  fp6;
522     psq_l       fp1 , 24(a), 0, 0;          // a12,a13
523     ps_muls0    fp8 ,   fp2,  fp0;          // b00a10,b01a10
524     ps_madds1   fp8 ,   fp3,  fp0,  fp8;    // b00a10+b10a11,b01a11+b11a11
525     psq_l       fp0 , 32(a), 0, 0;          // a20,a21
526 
527     // b00a10+b10a11+b20a12,b01a11+b11a11+b21a12
528     ps_madds0   fp8 ,   fp4,  fp1,  fp8;
529 
530     // b00a10+b10a11+b20a12+b30a13,b01a10+b11a11+b21a12+b31a13
531     ps_madds1   fp8 ,   fp5,  fp1,  fp8;
532     psq_l       fp1 , 40(a), 0, 0;          // a22,a23
533     ps_muls0    fp10,   fp2,  fp0;          // b00a20,b01a20
534     ps_madds1   fp10,   fp3,  fp0, fp10;    // b00a20+b10a21,b01a20+b11a21
535     psq_l       fp0 , 48(a), 0, 0;          // a30,a31
536 
537     // b00a20+b10a21+b20a22,b01a20+b11a21+b21a22
538     ps_madds0   fp10,   fp4,  fp1, fp10;
539 
540     // b00a20+b10a21+b20a22+b30a23,b01a20+b11a21+b21a22+b31a23
541     ps_madds1   fp10,   fp5,  fp1, fp10;
542     psq_l       fp1 , 56(a), 0, 0;          // a32,a33
543 
544     ps_muls0    fp12,   fp2,  fp0;          // b00a30,b01a30
545     psq_l       fp2 ,  8(b), 0, 0;          // b02,b03
546     ps_madds1   fp12,   fp3,  fp0, fp12;    // b00a30+b10a31,b01a30+b11a31
547     psq_l       fp0 ,  0(a), 0, 0;          // a00,a01
548 
549     // b00a30+b10a31+b20a32,b01a30+b11a31+b21a32
550     ps_madds0   fp12,   fp4,  fp1, fp12;
551     psq_l       fp3 , 24(b), 0, 0;          // b12,b13
552 
553     // b00a30+b10a31+b20a32+b30a33,b01a30+b11a31+b21a32+b31a33
554     ps_madds1   fp12,   fp5,  fp1, fp12;
555     psq_l       fp1 ,  8(a), 0, 0;          // a02,a03
556 
557     ps_muls0    fp7 ,   fp2,  fp0;          // b02a00,b03a00
558     psq_l       fp4 , 40(b), 0, 0;          // b22,b23
559     ps_madds1   fp7 ,   fp3,  fp0, fp7;     // b02a00+b12a01,b03a00+b13a01
560     psq_l       fp5 , 56(b), 0, 0;          // b32,b33
561 
562     // b02a00+b12a01+b22a02,b03a00+b13a01+b23a02
563     ps_madds0   fp7 ,   fp4,  fp1, fp7;
564 
565     psq_l       fp0 , 16(a), 0, 0;          // a10,a11
566 
567     // b02a00+b12a01+b22a02+b32a03,b03a00+b13a01+b23a02+b33a03
568     ps_madds1   fp7 ,   fp5,  fp1, fp7;
569     psq_l       fp1 , 24(a), 0, 0;          // a12,a13
570 
571     ps_muls0    fp9 ,   fp2,  fp0;          // b02a10,b03a10
572     psq_st      fp6 , 0(ab), 0, 0;          // ab00,ab01
573     ps_madds1   fp9 ,   fp3,  fp0, fp9;     // b02a10+b12a11,b03a10+b13a11
574     psq_l       fp0 , 32(a), 0, 0;          // a20,a21
575 
576     // b02a10+b12a11+b22a12,b03a10+b13a11+b23a12
577     ps_madds0   fp9,    fp4,  fp1, fp9;
578     psq_st      fp8 ,16(ab), 0, 0;          // ab10,ab11
579 
580     // b02a10+b12a11+b22a12+b32a13,b03a10+b13a11+b23a12+b33a13
581     ps_madds1   fp9 ,   fp5,  fp1, fp9;
582     psq_l       fp1 , 40(a), 0, 0;          // a22,a23
583     ps_muls0    fp11,   fp2,  fp0;          // b02a20,b03a20
584     psq_st      fp10,32(ab), 0, 0;          // ab20,ab21
585     ps_madds1   fp11,   fp3,  fp0, fp11;    // b02a20+b12a21,b03a20+b13a21
586     psq_l       fp0 , 48(a), 0, 0;          // a30,a31
587 
588     // b02a20+b12a21+b22a22,b03a20+b13a21+b23a22
589     ps_madds0   fp11,   fp4,  fp1, fp11;
590     psq_st      fp12,48(ab), 0, 0;          // ab30,ab31
591 
592     // b02a20+b12a21+b22a22+b32a23,b03a20+b13a21+b23a22+b33a23
593     ps_madds1   fp11,   fp5,  fp1, fp11;
594 
595     psq_l       fp1,  56(a), 0, 0;          // a32,a33
596     ps_muls0    fp13,   fp2,  fp0;          // b02a30,b03a30
597     psq_st      fp7 , 8(ab), 0, 0;          // ab02,ab03
598     ps_madds1   fp13,   fp3,  fp0, fp13;    // b02a30+b12a31,b03a30+b13a31
599     psq_st      fp9 ,24(ab), 0, 0;          // ab12,ab13
600 
601     // b02a30+b12a31+b22a32,b03a30+b13a31+b23a32
602     ps_madds0   fp13,   fp4,  fp1, fp13;
603     psq_st      fp11,40(ab), 0, 0;          // ab22,ab23
604 
605     // b02a30+b12a31+b22a32+b32a33,b03a30+b13a31+b23a32+b33a33
606     ps_madds1   fp13,   fp5,  fp1, fp13;
607 
608     psq_st      fp13,56(ab), 0, 0;          // ab32,ab33
609     blr
610 
611 }
612 #endif  // GEKKO
613 
614 /*---------------------------------------------------------------------*
615 Name:           MTX44Transpose
616 
617 Description:    computes the transpose of a matrix.
618 
619 Arguments:      src:       	source matrix.
620                 xPose:     	destination (transposed) matrix.
621                           ok if src == xPose.
622 
623 Return:         none
624  *---------------------------------------------------------------------*/
625 /*---------------------------------------------------------------------*
626     C version
627  *---------------------------------------------------------------------*/
C_MTX44Transpose(const Mtx44 src,Mtx44 xPose)628 void C_MTX44Transpose ( const Mtx44 src, Mtx44 xPose )
629 {
630     Mtx44       mTmp;
631     Mtx44Ptr    m;
632 
633     ASSERTMSG( (src   != 0), MTX44_TRANSPOSE_1  );
634     ASSERTMSG( (xPose != 0), MTX44_TRANSPOSE_2  );
635 
636     if(src == xPose)
637     {
638         m = mTmp;
639     }
640     else
641     {
642         m = xPose;
643     }
644 
645 
646     m[0][0] = src[0][0];    m[0][1] = src[1][0];    m[0][2] = src[2][0];    m[0][3] = src[3][0];
647     m[1][0] = src[0][1];    m[1][1] = src[1][1];    m[1][2] = src[2][1];    m[1][3] = src[3][1];
648     m[2][0] = src[0][2];    m[2][1] = src[1][2];    m[2][2] = src[2][2];    m[2][3] = src[3][2];
649     m[3][0] = src[0][3];    m[3][1] = src[1][3];    m[3][2] = src[2][3];    m[3][3] = src[3][3];
650 
651     // copy back if needed
652     if( m == mTmp )
653     {
654         MTX44Copy( mTmp, xPose );
655     }
656 }
657 /*---------------------------------------------------------------------*
658     Paired-Single assembler version
659  *---------------------------------------------------------------------*
660                 Note that this performs NO error checking.
661  *---------------------------------------------------------------------*/
662 #ifdef  GEKKO
PSMTX44Transpose(const register Mtx44 src,register Mtx44 xPose)663 asm void PSMTX44Transpose ( const register Mtx44 src, register Mtx44 xPose )
664 {
665     nofralloc;
666     psq_l       fp0,  0(src), 0, 0;     // fp0 <= s00,s01
667     psq_l       fp1, 16(src), 0, 0;     // fp1 <= s10,s11
668 
669     ps_merge00  fp4, fp0, fp1;              // fp4 <= t00,t10
670     psq_l       fp2,  8(src), 0, 0;     // fp2 <= s02,s03
671     psq_st      fp4,  0(xPose), 0, 0;
672 
673     ps_merge11  fp5, fp0, fp1;              // fp5 <= t01,t11
674     psq_l       fp3, 24(src), 0, 0;     // fp3 <= s12,s13
675     psq_st      fp5, 16(xPose), 0, 0;
676 
677     ps_merge00  fp4, fp2, fp3;              // fp4 <= t02,t12
678     psq_l       fp0, 32(src), 0, 0;     // fp0 <= s20,s21
679     psq_st      fp4, 32(xPose), 0, 0;
680 
681     ps_merge11  fp5, fp2, fp3;              // fp5 <= t03,t13
682     psq_l       fp1, 48(src), 0, 0;     // fp1 <= s30,s31
683     psq_st      fp5, 48(xPose), 0, 0;
684 
685     ps_merge00  fp4, fp0, fp1;              // fp4 <= t20,t30
686     psq_l       fp2, 40(src), 0, 0;     // fp2 <= s22,s23
687     psq_st      fp4,  8(xPose), 0, 0;
688 
689     ps_merge11  fp5, fp0, fp1;              // fp5 <= t21,t31
690     psq_l       fp3, 56(src), 0, 0;     // fp2 <= s32,s33
691     psq_st      fp5, 24(xPose), 0, 0;
692 
693     ps_merge00  fp4, fp2, fp3;              // fp4 <= s22,s32
694     psq_st      fp4, 40(xPose), 0, 0;
695 
696     ps_merge11  fp5, fp2, fp3;              // fp5 <= s23,s33
697     psq_st      fp5, 56(xPose), 0, 0;
698 
699     blr;
700 
701 }
702 #endif  // GEKKO
703 
704 
705 /*---------------------------------------------------------------------*
706 Name:           MTX44Inverse
707 
708 Description:    computes a fast inverse of a matrix.
709                 uses Gauss-Jordan(with partial pivoting)
710 
711 Arguments:      src:       	source matrix.
712                 inv:       	destination (inverse) matrix.
713                           ok if src == inv.
714 
715 Return:         0 if src is not invertible.
716                 1 on success.
717  *---------------------------------------------------------------------*/
718 /*---------------------------------------------------------------------*
719     C version only
720  *---------------------------------------------------------------------*/
721 #define NUM         4
722 #define SWAPF(a,b)  { f32 tmp; tmp = (a); (a) = (b); (b)=tmp; }
723 
C_MTX44Inverse(const Mtx44 src,Mtx44 inv)724 u32 C_MTX44Inverse( const Mtx44 src, Mtx44 inv )
725 {
726     Mtx44       gjm;
727     s32         i, j, k;
728     f32         w;
729 
730     ASSERTMSG( (src != 0), MTX44_INVERSE_1 );
731     ASSERTMSG( (inv != 0), MTX44_INVERSE_2 );
732 
733     MTX44Copy(src, gjm);
734     MTX44Identity(inv);
735 
736     for ( i = 0 ; i < NUM ; ++i )
737     {
738         f32 max = 0.0f;
739         s32 swp = i;
740 
741         // ---- partial pivoting -----
742         for( k = i ; k < NUM ; k++ )
743         {
744             f32 ftmp;
745             ftmp = fabsf(gjm[k][i]);
746             if ( ftmp > max )
747             {
748                 max = ftmp;
749                 swp = k;
750             }
751         }
752 
753         // check singular matrix
754         //(or can't solve inverse matrix with this algorithm)
755         if ( max == 0.0f )
756         {
757             return 0;
758         }
759 
760         // swap row
761         if( swp != i )
762         {
763             for ( k = 0 ; k < NUM ; k++ )
764             {
765                 SWAPF(gjm[i][k], gjm[swp][k]);
766                 SWAPF(inv[i][k], inv[swp][k]);
767             }
768         }
769 
770         // ---- pivoting end ----
771 
772         w = 1.0F / gjm[i][i];
773         for ( j = 0 ; j < NUM ; ++j )
774         {
775             gjm[i][j] *= w;
776             inv[i][j] *= w;
777         }
778 
779         for ( k = 0 ; k < NUM ; ++k )
780         {
781             if ( k == i )
782                 continue;
783 
784             w = gjm[k][i];
785             for ( j = 0 ; j < NUM ; ++j )
786             {
787                 gjm[k][j] -= gjm[i][j] * w;
788                 inv[k][j] -= inv[i][j] * w;
789             }
790         }
791 
792 
793     }
794 
795     return 1;
796 }
797 
798 #undef SWAPF
799 #undef NUM
800 
801 /*---------------------------------------------------------------------*
802 
803 
804 
805 
806                              MODEL SECTION
807 
808 
809 
810 
811  *---------------------------------------------------------------------*/
812 /* NOTE: Prototypes for these functions are defined in "mtx44ext.h".   */
813 
814 /*---------------------------------------------------------------------*
815 Name:           MTX44Trans
816 
817 Description:    sets a translation matrix.
818 
819 Arguments:       m:        	matrix to be set
820                 xT:        	x component of translation.
821                 yT:        	y component of translation.
822                 zT:        	z component of translation.
823 
824 Return:         none
825  *---------------------------------------------------------------------*/
826 /*---------------------------------------------------------------------*
827     C version
828  *---------------------------------------------------------------------*/
C_MTX44Trans(Mtx44 m,f32 xT,f32 yT,f32 zT)829 void C_MTX44Trans ( Mtx44 m, f32 xT, f32 yT, f32 zT )
830 {
831     ASSERTMSG( (m != 0), MTX44_TRANS_1 );
832 
833 
834     m[0][0] = 1.0f;     m[0][1] = 0.0f;  m[0][2] = 0.0f;  m[0][3] =  xT;
835     m[1][0] = 0.0f;     m[1][1] = 1.0f;  m[1][2] = 0.0f;  m[1][3] =  yT;
836     m[2][0] = 0.0f;     m[2][1] = 0.0f;  m[2][2] = 1.0f;  m[2][3] =  zT;
837     m[3][0] = 0.0f;     m[3][1] = 0.0f;  m[3][2] = 0.0f;  m[3][3] =  1.0f;
838 
839 }
840 
841 /*---------------------------------------------------------------------*
842     Paired-Single assembler version
843  *---------------------------------------------------------------------*
844                 Note that this performs NO error checking.
845  *---------------------------------------------------------------------*/
846 #ifdef  GEKKO
PSMTX44Trans(register Mtx44 m,register f32 xT,register f32 yT,register f32 zT)847 void PSMTX44Trans (
848     register Mtx44 m,
849     register f32 xT,
850     register f32 yT,
851     register f32 zT
852 )
853 {
854     register f32 c_zero = 0.0F;
855     register f32 c_one  = 1.0F;
856     register f32 c_01;
857 
858     asm
859     {
860         stfs        xT,     12(m);              // m03
861         stfs        yT,     28(m);              // m13
862         ps_merge00  c_01,   c_zero, c_one;      // c_01 <- 0.0, 1.0
863         stfs        zT,     44(m);              // m23
864         psq_st      c_one,   0(m), 1, 0;        // m00
865         psq_st      c_zero,  4(m), 0, 0;        // m01,m02
866         psq_st      c_01,   16(m), 0, 0;        // m10,m11
867         psq_st      c_zero, 24(m), 1, 0;        // m12
868         psq_st      c_zero, 32(m), 0, 0;        // m20,m21
869         psq_st      c_one,  40(m), 1, 0;        // m22
870         psq_st      c_zero, 48(m), 0, 0;        // m30,m31
871         psq_st      c_01,   56(m), 0, 0;        // m32,m33
872     }
873 }
874 #endif  // GEKKO
875 
876 /*---------------------------------------------------------------------*
877 Name:           MTX44TransApply
878 
879 Description:    This function performs the operation equivalent to
880                 MTXTrans + MTXConcat.
881 
882 Arguments:      src:       	matrix to be operated.
883                 dst:       	resultant matrix from concat.
884                 xT:        	x component of translation.
885                 yT:        	y component of translation.
886                 zT:        	z component of translation.
887 
888 Return:         none
889  *---------------------------------------------------------------------*/
890 /*---------------------------------------------------------------------*
891     C version
892  *---------------------------------------------------------------------*/
C_MTX44TransApply(const Mtx44 src,Mtx44 dst,f32 xT,f32 yT,f32 zT)893 void C_MTX44TransApply ( const Mtx44 src, Mtx44 dst, f32 xT, f32 yT, f32 zT )
894 {
895     ASSERTMSG( (src != 0), MTX44_TRANSAPPLY_1 );
896     ASSERTMSG( (dst != 0), MTX44_TRANSAPPLY_1 );
897 
898     if ( src != dst )
899     {
900         dst[0][0] = src[0][0];    dst[0][1] = src[0][1];    dst[0][2] = src[0][2];
901         dst[1][0] = src[1][0];    dst[1][1] = src[1][1];    dst[1][2] = src[1][2];
902         dst[2][0] = src[2][0];    dst[2][1] = src[2][1];    dst[2][2] = src[2][2];
903         dst[3][0] = src[3][0];    dst[3][1] = src[3][1];    dst[3][2] = src[3][2];
904         dst[3][3] = src[3][3];
905     }
906 
907     dst[0][3] = src[0][3] + xT;
908     dst[1][3] = src[1][3] + yT;
909     dst[2][3] = src[2][3] + zT;
910 
911 }
912 
913 /*---------------------------------------------------------------------*
914     Paired-Single assembler version
915  *---------------------------------------------------------------------*
916                 Note that this performs NO error checking.
917  *---------------------------------------------------------------------*/
918 #ifdef  GEKKO
PSMTX44TransApply(const register Mtx44 src,register Mtx44 dst,register f32 xT,register f32 yT,register f32 zT)919 asm void PSMTX44TransApply (
920     const register Mtx44 src,
921           register Mtx44 dst,
922           register f32 xT,
923           register f32 yT,
924           register f32 zT
925 )
926 {
927     nofralloc;
928     psq_l       fp4, 0(src),     0, 0;
929     frsp        xT, xT;                         // to make sure xS = single precision
930     psq_l       fp5, 8(src),     0, 0;
931     frsp        yT, yT;                         // to make sure yS = single precision
932     psq_l       fp6, 16(src),    0, 0;
933     frsp        zT, zT;                         // to make sure zS = single precision
934     psq_l       fp7, 24(src),    0, 0;
935     psq_st      fp4, 0(dst),     0, 0;
936     ps_sum1     fp5, xT, fp5, fp5;
937     psq_l       fp4, 40(src),    0, 0;
938     psq_st      fp6, 16(dst),    0, 0;
939     ps_sum1     fp7, yT, fp7, fp7;
940     psq_l       fp8, 32(src),    0, 0;
941     psq_st      fp5, 8(dst),     0, 0;
942     ps_sum1     fp4, zT, fp4, fp4;
943     psq_st      fp7, 24(dst),    0, 0;
944     psq_st      fp8, 32(dst),    0, 0;
945     psq_l       fp5, 48(src),    0, 0;
946     psq_l       fp6, 56(src),    0, 0;
947     psq_st      fp4, 40(dst),    0, 0;
948     psq_st      fp5, 48(dst),    0, 0;
949     psq_st      fp6, 56(dst),    0, 0;
950     blr;
951 
952 }
953 #endif  // GEKKO
954 
955 /*---------------------------------------------------------------------*
956 Name:            MTX44Scale
957 
958 Description:     sets a scaling matrix.
959 
960 Arguments:       m:        	matrix to be set
961                 xS:        	x scale factor.
962                 yS:        	y scale factor.
963                 zS:        	z scale factor.
964 
965 Return:         none
966  *---------------------------------------------------------------------*/
967 /*---------------------------------------------------------------------*
968     C version
969  *---------------------------------------------------------------------*/
C_MTX44Scale(Mtx44 m,f32 xS,f32 yS,f32 zS)970 void C_MTX44Scale ( Mtx44 m, f32 xS, f32 yS, f32 zS )
971 {
972     ASSERTMSG( (m != 0), MTX44_SCALE_1 );
973 
974 
975     m[0][0] = xS;      m[0][1] = 0.0f;  m[0][2] = 0.0f;  m[0][3] = 0.0f;
976     m[1][0] = 0.0f;    m[1][1] = yS;    m[1][2] = 0.0f;  m[1][3] = 0.0f;
977     m[2][0] = 0.0f;    m[2][1] = 0.0f;  m[2][2] = zS;    m[2][3] = 0.0f;
978     m[3][0] = 0.0f;    m[3][1] = 0.0f;  m[3][2] = 0.0f;  m[3][3] = 1.0f;
979 }
980 
981 /*---------------------------------------------------------------------*
982     Paired-Single assembler version
983  *---------------------------------------------------------------------*
984                 Note that this performs NO error checking.
985  *---------------------------------------------------------------------*/
986 #ifdef  GEKKO
PSMTX44Scale(register Mtx44 m,register f32 xS,register f32 yS,register f32 zS)987 void PSMTX44Scale (
988     register Mtx44 m,
989     register f32 xS,
990     register f32 yS,
991     register f32 zS
992 )
993 {
994     register f32 c_zero = 0.0F;
995     register f32 c_one  = 1.0F;
996 
997     asm
998     {
999         stfs        xS,      0(m);
1000         psq_st      c_zero,  4(m), 0, 0;        // m01,m02
1001         psq_st      c_zero, 12(m), 0, 0;        // m03,m10
1002         stfs        yS,     20(m);              // m11
1003         psq_st      c_zero, 24(m), 0, 0;        // m12,m13
1004         psq_st      c_zero, 32(m), 0, 0;        // m20,m21
1005         stfs        zS,     40(m);              // m22
1006         psq_st      c_zero, 44(m), 0, 0;        // m23,m30
1007         psq_st      c_zero, 52(m), 0, 0;        // m31,m32
1008         stfs        c_one,  60(m);              // m33
1009     }
1010 }
1011 #endif  // GEKKO
1012 
1013 /*---------------------------------------------------------------------*
1014 Name:           MTX44ScaleApply
1015 
1016 Description:    This function performs the operation equivalent to
1017                 MTXScale + MTXConcat
1018 
1019 Arguments:      src:       	matrix to be operated.
1020                 dst:       	resultant matrix from concat.
1021                 xS:        	x scale factor.
1022                 yS:        	y scale factor.
1023                 zS:        	z scale factor.
1024 
1025 Return:         none
1026 *---------------------------------------------------------------------*/
1027 /*---------------------------------------------------------------------*
1028     C version
1029  *---------------------------------------------------------------------*/
C_MTX44ScaleApply(const Mtx44 src,Mtx44 dst,f32 xS,f32 yS,f32 zS)1030 void C_MTX44ScaleApply ( const Mtx44 src, Mtx44 dst, f32 xS, f32 yS, f32 zS )
1031 {
1032     ASSERTMSG( (src != 0), MTX44_SCALEAPPLY_1 );
1033     ASSERTMSG( (dst != 0), MTX44_SCALEAPPLY_2 );
1034 
1035     dst[0][0] = src[0][0] * xS;     dst[0][1] = src[0][1] * xS;
1036     dst[0][2] = src[0][2] * xS;     dst[0][3] = src[0][3] * xS;
1037 
1038     dst[1][0] = src[1][0] * yS;     dst[1][1] = src[1][1] * yS;
1039     dst[1][2] = src[1][2] * yS;     dst[1][3] = src[1][3] * yS;
1040 
1041     dst[2][0] = src[2][0] * zS;     dst[2][1] = src[2][1] * zS;
1042     dst[2][2] = src[2][2] * zS;     dst[2][3] = src[2][3] * zS;
1043 
1044     dst[3][0] = src[3][0] ; dst[3][1] = src[3][1];
1045     dst[3][2] = src[3][2] ; dst[3][3] = src[3][3];
1046 }
1047 
1048 /*---------------------------------------------------------------------*
1049     Paired-Single assembler version
1050  *---------------------------------------------------------------------*
1051                 Note that this performs NO error checking.
1052  *---------------------------------------------------------------------*/
1053 #ifdef  GEKKO
PSMTX44ScaleApply(const register Mtx44 src,register Mtx44 dst,register f32 xS,register f32 yS,register f32 zS)1054 asm void  PSMTX44ScaleApply (
1055     const register Mtx44 src,
1056           register Mtx44 dst,
1057           register f32 xS,
1058           register f32 yS,
1059           register f32 zS
1060 )
1061 {
1062     nofralloc;
1063     psq_l       fp4,     0(src), 0, 0;          // fp4 <- src00,src01
1064     frsp        xS, xS;                         // to make sure xS = single precision
1065     psq_l       fp5,     8(src), 0, 0;          // fp5 <- src02,src03
1066     frsp        yS, yS;                         // to make sure yS = single precision
1067     psq_l       fp6,    16(src), 0, 0;          // fp6 <- src10,src11
1068     ps_muls0    fp4,    fp4, xS;                // fp4 <- src00*xS,src01*xS
1069     psq_l       fp7,    24(src), 0, 0;          // fp7 <- src12,src13
1070     ps_muls0    fp5,    fp5, xS;                // fp5 <- src02*xS,src03*xS
1071     psq_l       fp8,    32(src), 0, 0;          // fp8 <- src20,src21
1072     frsp        zS, zS;                         // to make sure zS = single precision
1073     psq_st      fp4,     0(dst), 0, 0;          // dst00,dst01
1074     ps_muls0    fp6,    fp6, yS;                // fp6 <- src10*yS,src11*yS
1075     psq_l       fp9,    40(src), 0, 0;          // fp9 <- src22,src23
1076     psq_st      fp5,     8(dst), 0, 0;          // dst02,dst03
1077     ps_muls0    fp7,    fp7, yS;                // fp7 <- src12*yS,src13*yS
1078     psq_l       fp10,   48(src), 0, 0;          // fp10 <- src30src31
1079     psq_st      fp6,    16(dst), 0, 0;          // dst10,dst11
1080     ps_muls0    fp8,    fp8, zS;                // fp8 <- src20*zS,src21*zS
1081     psq_l       fp11,   56(src), 0, 0;          // fp11 <- src32,src33
1082     psq_st      fp7,    24(dst), 0, 0;          // dst12,dst13
1083     ps_muls0    fp9,    fp9, zS;                // fp9 <- src22*zS,src23*zS
1084     psq_st      fp8,    32(dst), 0, 0;          // dst20,dst21
1085     psq_st      fp9,    40(dst), 0, 0;          // dst22,dst23
1086     psq_st      fp10,   48(dst), 0, 0;          // dst30,dst31
1087     psq_st      fp11,   56(dst), 0, 0;          // dst32,dst33
1088     blr;
1089 }
1090 #endif  // GEKKO
1091 
1092 /*---------------------------------------------------------------------*
1093 Name:           MTX44RotRad
1094 
1095 Description:    sets a rotation matrix about one of the X, Y or Z axes
1096 
1097 Arguments:      m:       	matrix to be set
1098                 axis:    	major axis about which to rotate.
1099                         axis is passed in as a character.
1100                         it must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1101                 deg:     	rotation angle in radians.
1102                         note:  counter-clockwise rotation is positive.
1103 
1104 Return:         none
1105  *---------------------------------------------------------------------*/
1106 /*---------------------------------------------------------------------*
1107     C version
1108  *---------------------------------------------------------------------*/
C_MTX44RotRad(Mtx44 m,char axis,f32 rad)1109 void C_MTX44RotRad ( Mtx44 m, char axis, f32 rad )
1110 {
1111 
1112     f32 sinA, cosA;
1113 
1114     ASSERTMSG( (m != 0), MTX44_ROTRAD_1 );
1115 
1116     // verification of "axis" will occur in MTXRotTrig
1117 
1118     sinA = sinf(rad);
1119     cosA = cosf(rad);
1120 
1121     C_MTX44RotTrig( m, axis, sinA, cosA );
1122 }
1123 
1124 /*---------------------------------------------------------------------*
1125     Paired-Single assembler version
1126  *---------------------------------------------------------------------*
1127                 Note that this performs NO error checking.
1128  *---------------------------------------------------------------------*/
1129 #ifdef GEKKO
PSMTX44RotRad(Mtx44 m,char axis,f32 rad)1130 void PSMTX44RotRad ( Mtx44 m, char axis, f32 rad )
1131 {
1132     f32 sinA, cosA;
1133 
1134     sinA = sinf(rad);
1135     cosA = cosf(rad);
1136 
1137     PSMTX44RotTrig( m, axis, sinA, cosA );
1138 }
1139 #endif // GEKKO
1140 
1141 /*---------------------------------------------------------------------*
1142 Name:           MTX44RotTrig
1143 
1144 Arguments:      m:       	matrix to be set
1145                 axis:    	major axis about which to rotate.
1146                         axis is passed in as a character.
1147                         It must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1148                 sinA:    	sine of rotation angle.
1149                 cosA:    	cosine of rotation angle.
1150                         note:  counter-clockwise rotation is positive.
1151 
1152 Return:         none
1153  *---------------------------------------------------------------------*/
1154 /*---------------------------------------------------------------------*
1155     C version
1156  *---------------------------------------------------------------------*/
C_MTX44RotTrig(Mtx44 m,char axis,f32 sinA,f32 cosA)1157 void C_MTX44RotTrig ( Mtx44 m, char axis, f32 sinA, f32 cosA )
1158 {
1159     ASSERTMSG( (m != 0), MTX44_ROTTRIG_1 );
1160 
1161     axis |= 0x20;
1162     switch(axis)
1163     {
1164 
1165     case 'x':
1166         m[0][0] =  1.0f;  m[0][1] =  0.0f;    m[0][2] =  0.0f;  m[0][3] = 0.0f;
1167         m[1][0] =  0.0f;  m[1][1] =  cosA;    m[1][2] = -sinA;  m[1][3] = 0.0f;
1168         m[2][0] =  0.0f;  m[2][1] =  sinA;    m[2][2] =  cosA;  m[2][3] = 0.0f;
1169         m[3][0] =  0.0f;  m[3][1] =  0.0f;    m[3][2] =  0.0f;  m[3][3] = 1.0f;
1170         break;
1171 
1172     case 'y':
1173         m[0][0] =  cosA;  m[0][1] =  0.0f;    m[0][2] =  sinA;  m[0][3] = 0.0f;
1174         m[1][0] =  0.0f;  m[1][1] =  1.0f;    m[1][2] =  0.0f;  m[1][3] = 0.0f;
1175         m[2][0] = -sinA;  m[2][1] =  0.0f;    m[2][2] =  cosA;  m[2][3] = 0.0f;
1176         m[3][0] =  0.0f;  m[3][1] =  0.0f;    m[3][2] =  0.0f;  m[3][3] = 1.0f;
1177         break;
1178 
1179     case 'z':
1180         m[0][0] =  cosA;  m[0][1] = -sinA;    m[0][2] =  0.0f;  m[0][3] = 0.0f;
1181         m[1][0] =  sinA;  m[1][1] =  cosA;    m[1][2] =  0.0f;  m[1][3] = 0.0f;
1182         m[2][0] =  0.0f;  m[2][1] =  0.0f;    m[2][2] =  1.0f;  m[2][3] = 0.0f;
1183         m[3][0] =  0.0f;  m[3][1] =  0.0f;    m[3][2] =  0.0f;  m[3][3] = 1.0f;
1184         break;
1185 
1186     default:
1187         ASSERTMSG( 0, MTX44_ROTTRIG_2 );
1188         break;
1189     }
1190 }
1191 
1192 /*---------------------------------------------------------------------*
1193     Paired-Single assembler version
1194  *---------------------------------------------------------------------*
1195                 Note that this performs NO error checking.
1196  *---------------------------------------------------------------------*/
1197 #ifdef  GEKKO
PSMTX44RotTrig(register Mtx44 m,register char axis,register f32 sinA,register f32 cosA)1198 void PSMTX44RotTrig(
1199     register Mtx44  m,
1200     register char   axis,
1201     register f32    sinA,
1202     register f32    cosA
1203 )
1204 {
1205     register f32 ftmp0, ftmp1, ftmp2, ftmp3, ftmp4;
1206     register f32 c_zero, c_one;
1207 
1208     c_zero = 0.0F;
1209     c_one  = 1.0F;
1210 
1211     asm
1212     {
1213         frsp        sinA, sinA      // to make sure sinA = single precision
1214 
1215         // always lower case
1216         ori         axis, axis, 0x20
1217 
1218         frsp        cosA, cosA      // to make sure cosA = single precision
1219 
1220         // branches
1221         cmplwi      axis, 'x';                  // if 'x'
1222         beq         _case_x;
1223         cmplwi      axis, 'y';                  // if 'y'
1224         beq         _case_y;
1225         cmplwi      axis, 'z';                  // if 'z'
1226         beq         _case_z;
1227         b           _end;
1228 
1229     _case_x:
1230         psq_st      c_one,   0(m), 1, 0;        // m00 <= 1.0
1231         psq_st      c_zero,  4(m), 0, 0;        // m01,m02 <= 0.0,0.0
1232         ps_neg      ftmp0, sinA;                // ftmp0 <= -sinA
1233         psq_st      c_zero, 12(m), 0, 0;        // m03,m10 <= 0.0,0.0
1234         ps_merge00  ftmp1, sinA, cosA;          // ftmp1 <= sinA,cosA
1235         psq_st      c_zero, 28(m), 0, 0;        // m13,m20 <= 0.0,0.0
1236         ps_merge00  ftmp0, cosA, ftmp0;         // ftmp0 <= cosA,-sinA
1237         psq_st      c_zero, 44(m), 0, 0;        // m23,m30 <= 0.0,0.0
1238         psq_st      c_zero, 52(m), 0, 0;        // m23,m30 <= 0.0,0.0
1239         psq_st      ftmp1,  36(m), 0, 0;        // m21,m22 <= sinA,cosA
1240         psq_st      ftmp0,  20(m), 0, 0;        // m11,m12 <= cosA,-sinA
1241         psq_st      c_one,  60(m), 1, 0;        // m33 <= 0.0
1242         b           _end;
1243 
1244     _case_y:
1245         ps_merge00  ftmp1, cosA, c_zero;        // ftmp1 <= cosA,0.0
1246         psq_st      c_zero, 48(m), 0, 0;        // m30,m31 <= 0.0,0.0
1247         ps_neg      ftmp0, sinA;                // ftmp0 <= -sinA
1248         psq_st      c_zero, 24(m), 0, 0;        // m12,m13 <= 0.0,0.0
1249         ps_merge00  ftmp3, c_zero, c_one;       // ftmp3 <= 0.0,1.0
1250         psq_st      ftmp1,   0(m), 0, 0;        // m00,m01 <= cosA,0.0
1251         ps_merge00  ftmp4, ftmp0, c_zero;       // ftmp4 <= -sinA,0.0
1252         ps_merge00  ftmp2, sinA,  c_zero;       // ftmp2 <= sinA,0.0
1253         psq_st      ftmp3,  16(m), 0, 0;        // m10,m11 <= 0.0,1.0
1254         psq_st      ftmp2,   8(m), 0, 0;        // m02,m03 <= sinA,0.0
1255         psq_st      ftmp4,  32(m), 0, 0;        // m20,m21 <= -sinA,0.0
1256         psq_st      ftmp1,  40(m), 0, 0;        // m22,m23 <= cosA,0.0
1257         psq_st      ftmp3,  56(m), 0, 0;        // m32,m33 <= 0.0,1.0
1258         b           _end;
1259 
1260     _case_z:
1261         psq_st      c_zero,  8(m), 0, 0;        // m02,m03 <= 0.0,0.0
1262         ps_neg      ftmp0, sinA;                // ftmp0 <= -sinA
1263         psq_st      c_zero, 24(m), 0, 0;        // m12,m13 <= 0.0,0.0
1264         ps_merge00  ftmp1, sinA, cosA;          // ftmp1 <= sinA,cosA
1265         psq_st      c_zero, 32(m), 0, 0;        // m20,m21 <= 0.0,0.0
1266         ps_merge00  ftmp2, c_one, c_zero;       // ftmp2 <= 1.0,0.0
1267         psq_st      c_zero, 48(m), 0, 0;        // m30,m31 <= 0.0,0.0
1268         ps_merge00  ftmp3, c_zero, c_one;       // ftmp2 <= 0.0,1.0
1269         psq_st      ftmp1,  16(m), 0, 0;        // m10,m11 <= sinA,cosA
1270         ps_merge00  ftmp4, cosA, ftmp0;         // ftmp4 <= cosA, -sinA
1271         psq_st      ftmp2,  40(m), 0, 0;        // m22,m23 <= 1.0,0.0
1272         psq_st      ftmp3,  56(m), 0, 0;        // m32,m33 <= 0.0,1.0
1273         psq_st      ftmp4,   0(m), 0, 0;        // m00,m00 <= cosA,-sinA
1274 
1275     _end:
1276 
1277     }
1278 }
1279 #endif  // GEKKO
1280 
1281 /*---------------------------------------------------------------------*
1282 Name:           C_MTX44RotAxisRad
1283  *---------------------------------------------------------------------*/
1284 /*---------------------------------------------------------------------*
1285     C version
1286  *---------------------------------------------------------------------*/
C_MTX44RotAxisRad(Mtx44 m,const Vec * axis,f32 rad)1287 void C_MTX44RotAxisRad( Mtx44 m, const Vec *axis, f32 rad )
1288 {
1289     Vec vN;
1290     f32 s, c;             // sinTheta, cosTheta
1291     f32 t;                // ( 1 - cosTheta )
1292     f32 x, y, z;          // x, y, z components of normalized axis
1293     f32 xSq, ySq, zSq;    // x, y, z squared
1294 
1295 
1296     ASSERTMSG( (m    != 0), MTX44_ROTAXIS_1  );
1297     ASSERTMSG( (axis != 0), MTX44_ROTAXIS_2  );
1298 
1299     s = sinf(rad);
1300     c = cosf(rad);
1301     t = 1.0f - c;
1302 
1303     C_VECNormalize( axis, &vN );
1304 
1305     x = vN.x;
1306     y = vN.y;
1307     z = vN.z;
1308 
1309     xSq = x * x;
1310     ySq = y * y;
1311     zSq = z * z;
1312 
1313     m[0][0] = ( t * xSq )   + ( c );
1314     m[0][1] = ( t * x * y ) - ( s * z );
1315     m[0][2] = ( t * x * z ) + ( s * y );
1316     m[0][3] =    0.0f;
1317 
1318     m[1][0] = ( t * x * y ) + ( s * z );
1319     m[1][1] = ( t * ySq )   + ( c );
1320     m[1][2] = ( t * y * z ) - ( s * x );
1321     m[1][3] =    0.0f;
1322 
1323     m[2][0] = ( t * x * z ) - ( s * y );
1324     m[2][1] = ( t * y * z ) + ( s * x );
1325     m[2][2] = ( t * zSq )   + ( c );
1326     m[2][3] =    0.0f;
1327 
1328     m[3][0] = 0.0f;
1329     m[3][1] = 0.0f;
1330     m[3][2] = 0.0f;
1331     m[3][3] = 1.0f;
1332 
1333 }
1334 
1335 /*---------------------------------------------------------------------*
1336     Paired-Single assembler version
1337  *---------------------------------------------------------------------*
1338                 Note that this performs NO error checking.
1339  *---------------------------------------------------------------------*/
1340 #ifdef GEKKO
1341 
__PSMTX44RotAxisRadInternal(register Mtx44 m,const register Vec * axis,register f32 sT,register f32 cT)1342 static void __PSMTX44RotAxisRadInternal(
1343           register Mtx44  m,
1344     const register Vec   *axis,
1345           register f32    sT,
1346           register f32    cT )
1347 {
1348     register f32    tT, fc0;
1349     register f32    tmp0, tmp1, tmp2, tmp3, tmp4;
1350     register f32    tmp5, tmp6, tmp7, tmp8, tmp9;
1351 
1352     tmp9 = 0.5F;
1353     tmp8 = 3.0F;
1354 
1355     asm
1356     {
1357         // to make sure cT = (single precision float value)
1358         frsp        cT, cT
1359         // tmp0 = [x][y] : LOAD
1360         psq_l       tmp0, 0(axis), 0, 0
1361         // to make sure sT = (single precision float value)
1362         frsp        sT, sT
1363         // tmp1 = [z][z] : LOAD
1364         lfs         tmp1, 8(axis)
1365 
1366         // tmp2 = [x*x][y*y]
1367         ps_mul      tmp2, tmp0, tmp0
1368         // tmp7 = [1.0F]
1369         fadds       tmp7, tmp9, tmp9
1370         // tmp3 = [x*x+z*z][y*y+z*z]
1371         ps_madd     tmp3, tmp1, tmp1, tmp2
1372         // fc0 = [0.0F]
1373         fsubs       fc0, tmp9, tmp9
1374         // tmp4 = [S = x*x+y*y+z*z][z]
1375         ps_sum0     tmp4, tmp3, tmp1, tmp2
1376 
1377         // tT = 1.0F - cT
1378         fsubs       tT, tmp7, cT
1379 
1380         // tmp5 = [1.0/sqrt(S)] :estimation[E]
1381         frsqrte     tmp5, tmp4
1382         // tmp7 = [0][1]
1383         ps_merge00  tmp7, fc0, tmp7
1384         // Newton-Rapson refinement step
1385         // E' = E/2(3.0 - E*E*S)
1386         fmuls       tmp2, tmp5, tmp5            // E*E
1387         fmuls       tmp3, tmp5, tmp9            // E/2
1388             // fc0 [m30=0][m31=0] : STORE
1389             psq_st      fc0, 48(m), 0, 0
1390         fnmsubs     tmp2, tmp2, tmp4, tmp8      // (3-E*E*S)
1391         fmuls       tmp5, tmp2, tmp3            // (E/2)(3-E*E*S)
1392             // tmp7 [m32=0][m33=1] : STORE
1393             psq_st      tmp7, 56(m), 0, 0
1394         // cT = [c][c]
1395         ps_merge00  cT, cT, cT
1396 
1397         // tmp0 = [nx = x/sqrt(S)][ny = y/sqrt(S)]
1398         ps_muls0    tmp0, tmp0, tmp5
1399         // tmp1 = [nz = z/sqrt(S)][nz = z/sqrt(S)]
1400         ps_muls0    tmp1, tmp1, tmp5
1401         // tmp4 = [t*nx][t*ny]
1402         ps_muls0    tmp4, tmp0, tT
1403         // tmp9 = [s*nx][s*ny]
1404         ps_muls0    tmp9, tmp0, sT
1405         // tmp5 = [t*nz][t*nz]
1406         ps_muls0    tmp5, tmp1, tT
1407         // tmp3 = [t*nx*ny][t*ny*ny]
1408         ps_muls1    tmp3, tmp4, tmp0
1409         // tmp2 = [t*nx*nx][t*ny*nx]
1410         ps_muls0    tmp2, tmp4, tmp0
1411         // tmp4 = [t*nx*nz][t*ny*nz]
1412         ps_muls0    tmp4, tmp4, tmp1
1413 
1414         // tmp6 = [t*nx*ny-s*nz][t*nx*ny-s*nz]
1415         fnmsubs     tmp6, tmp1, sT, tmp3
1416         // tmp7 = [t*nx*ny+s*nz][t*ny*ny+s*nz]
1417         fmadds      tmp7, tmp1, sT, tmp3
1418 
1419         // tmp0 = [-s*nx][-s*ny]
1420         ps_neg      tmp0, tmp9
1421         // tmp8 = [t*nx*nz+s*ny][0] == [m02][m03]
1422         ps_sum0     tmp8, tmp4, fc0, tmp9
1423         // tmp2 = [t*nx*nx+c][t*nx*ny-s*nz] == [m00][m01]
1424         ps_sum0     tmp2, tmp2, tmp6, cT
1425         // tmp3 = [t*nx*ny+s*nz][t*ny*ny+c] == [m10][m11]
1426         ps_sum1     tmp3, cT, tmp7, tmp3
1427         // tmp6 = [t*ny*nz-s*nx][0] == [m12][m13]
1428         ps_sum0     tmp6, tmp0, fc0 ,tmp4
1429 
1430             // tmp8 [m02][m03] : STORE
1431             psq_st      tmp8, 8(m), 0, 0
1432         // tmp0 = [t*nx*nz-s*ny][t*ny*nz]
1433         ps_sum0     tmp0, tmp4, tmp4, tmp0
1434             // tmp2 [m00][m01] : STORE
1435             psq_st      tmp2, 0(m), 0, 0
1436         // tmp5 = [t*nz*nz][t*nz*nz]
1437         ps_muls0    tmp5, tmp5, tmp1
1438             // tmp3 [m10][m11] : STORE
1439             psq_st      tmp3, 16(m), 0, 0
1440         // tmp4 = [t*nx*nz-s*ny][t*ny*nz+s*nx] == [m20][m21]
1441         ps_sum1     tmp4, tmp9, tmp0, tmp4
1442             // tmp6 [m12][m13] : STORE
1443             psq_st      tmp6, 24(m), 0, 0
1444         // tmp5 = [t*nz*nz+c][0]   == [m22][m23]
1445         ps_sum0     tmp5, tmp5, fc0, cT
1446             // tmp4 [m20][m21] : STORE
1447             psq_st      tmp4, 32(m), 0, 0
1448             // tmp5 [m22][m23] : STORE
1449             psq_st      tmp5, 40(m), 0, 0
1450     }
1451 }
1452 
PSMTX44RotAxisRad(Mtx44 m,const Vec * axis,f32 rad)1453 void PSMTX44RotAxisRad(
1454     Mtx44           m,
1455     const Vec      *axis,
1456     f32             rad )
1457 {
1458     f32     sinT, cosT;
1459 
1460     sinT = sinf(rad);
1461     cosT = cosf(rad);
1462 
1463     __PSMTX44RotAxisRadInternal(m, axis, sinT, cosT);
1464 }
1465 
1466 #endif // GEKKO
1467 
1468 #if ( __MWERKS__ == 0x00004100 )
1469 #pragma defer_codegen reset
1470 #endif
1471 
1472 /*===========================================================================*/
1473