1 /*---------------------------------------------------------------------------*
2   Project: matrix vector Library
3   File:    mtx44.c
4 
5   Copyright 1998-2011 Nintendo.  All rights reserved.
6 
7   These coded instructions, statements, and computer programs contain
8   proprietary information of Nintendo of America Inc. and/or Nintendo
9   Company Ltd., and are protected by Federal copyright law.     They may
10   not be disclosed to third parties or copied or duplicated in any form,
11   in whole or in part, without the prior written consent of Nintendo.
12 
13  *---------------------------------------------------------------------------*/
14 
15 #include <math.h>
16 #include <stdio.h>
17 #include <cafe/mtx.h>
18 #include <cafe/mtx/mtx44.h>
19 #include "mtxAssert.h"
20 #include "mtx44Assert.h"
21 
22 /*---------------------------------------------------------------------*
23     Constants
24  *---------------------------------------------------------------------*/
25 static const f32x2 c00 = {0.0F, 0.0F};
26 static const f32x2 c01 = {0.0F, 1.0F};
27 static const f32x2 c10 = {1.0F, 0.0F};
28 static const f32x2 c11 = {1.0F, 1.0F};
29 //static const f32x2 c22 = {2.0F, 2.0F};
30 static const f32x2 c33 = {3.0F, 3.0F};
31 static const f32x2 c0505 = {0.5F, 0.5F};
32 
33 /*---------------------------------------------------------------------*
34 
35 
36                              PROJECTION SECTION
37 
38 
39  *---------------------------------------------------------------------*/
40 
41 /*---------------------------------------------------------------------*
42 
43 Name:           MTXFrustum
44 
45 Description:    compute a 4x4 perspective projection matrix from a
46                 specified view volume.
47 
48 
49 Arguments:      m        4x4 matrix to be set
50 
51                 t        top coord. of view volume at the near clipping plane
52 
53                 b        bottom coord of view volume at the near clipping plane
54 
55                 lf       left coord. of view volume at near clipping plane
56 
57                 r        right coord. of view volume at near clipping plane
58 
59                 n        positive distance from camera to near clipping plane
60 
61                 f        positive distance from camera to far clipping plane
62 
63 
64 Return:         none
65 
66  *---------------------------------------------------------------------*/
67 /*---------------------------------------------------------------------*
68     C version
69  *---------------------------------------------------------------------*/
C_MTXFrustum(Mtx44 m,f32 t,f32 b,f32 lf,f32 r,f32 n,f32 f)70 void C_MTXFrustum ( Mtx44 m, f32 t, f32 b, f32 lf, f32 r, f32 n, f32 f )
71 {
72     f32 tmp;
73 
74     ASSERTMSG( (m != 0),  MTX_FRUSTUM_1     );
75     ASSERTMSG( (t != b),  MTX_FRUSTUM_2     );
76     ASSERTMSG( (lf != r), MTX_FRUSTUM_3     );
77     ASSERTMSG( (n != f),  MTX_FRUSTUM_4     );
78 
79     tmp     =  1.0f / (r - lf);
80     m[0][0] =  (2*n) * tmp;
81     m[0][1] =  0.0f;
82     m[0][2] =  (r + lf) * tmp;
83     m[0][3] =  0.0f;
84 
85     tmp     =  1.0f / (t - b);
86     m[1][0] =  0.0f;
87     m[1][1] =  (2*n) * tmp;
88     m[1][2] =  (t + b) * tmp;
89     m[1][3] =  0.0f;
90 
91     m[2][0] =  0.0f;
92     m[2][1] =  0.0f;
93 
94     tmp     =  1.0f / (f - n);
95 
96     // scale z to (-w, w) range (different than Wii's -w...0 range)
97     m[2][2] = -(f + n) * tmp;
98     m[2][3] = -(2*f*n) * tmp;
99 
100     m[3][0] =  0.0f;
101     m[3][1] =  0.0f;
102     m[3][2] = -1.0f;
103     m[3][3] =  0.0f;
104 }
105 
106 /*---------------------------------------------------------------------*
107 
108 Name:           MTXPerspective
109 
110 Description:    compute a 4x4 perspective projection matrix from
111                 field of view and aspect ratio.
112 
113 
114 Arguments:      m       4x4 matrix to be set
115 
116                 fovy    total field of view in in degrees in the YZ plane
117 
118                 aspect  ratio of view window width:height (X / Y)
119 
120                 n       positive distance from camera to near clipping plane
121 
122                 f       positive distance from camera to far clipping plane
123 
124 
125 Return:         none
126 
127  *---------------------------------------------------------------------*/
128 /*---------------------------------------------------------------------*
129     C version
130  *---------------------------------------------------------------------*/
C_MTXPerspective(Mtx44 m,f32 fovY,f32 aspect,f32 n,f32 f)131 void C_MTXPerspective ( Mtx44 m, f32 fovY, f32 aspect, f32 n, f32 f )
132 {
133     f32 angle;
134     f32 cot;
135     f32 tmp;
136 
137     ASSERTMSG( (m != 0),                             MTX_PERSPECTIVE_1    );
138     ASSERTMSG( ( (fovY > 0.0) && ( fovY < 180.0) ),  MTX_PERSPECTIVE_2    );
139     ASSERTMSG( (aspect != 0),                        MTX_PERSPECTIVE_3    );
140 
141     // find the cotangent of half the (YZ) field of view
142     angle = fovY * 0.5f;
143     angle = MTXDegToRad( angle );
144 
145     cot = 1.0f / tanf(angle);
146 
147     m[0][0] =  cot / aspect;
148     m[0][1] =  0.0f;
149     m[0][2] =  0.0f;
150     m[0][3] =  0.0f;
151 
152     m[1][0] =  0.0f;
153     m[1][1] =   cot;
154     m[1][2] =  0.0f;
155     m[1][3] =  0.0f;
156 
157     m[2][0] =  0.0f;
158     m[2][1] =  0.0f;
159 
160     tmp     = 1.0f / (f - n);
161 
162     // scale z to (-w, +w) range (different than Wii's -w...0 range)
163     m[2][2] = -(f + n) * tmp;
164     m[2][3] = -(2*f*n) * tmp;
165 
166     m[3][0] =  0.0f;
167     m[3][1] =  0.0f;
168     m[3][2] = -1.0f;
169     m[3][3] =  0.0f;
170 }
171 
172 /*---------------------------------------------------------------------*
173 
174 Name:           MTXOrtho
175 
176 Description:    compute a 4x4 orthographic projection matrix.
177 
178 
179 Arguments:      m        4x4 matrix to be set
180 
181                 t        top coord. of parallel view volume
182 
183                 b        bottom coord of parallel view volume
184 
185                 lf       left coord. of parallel view volume
186 
187                 r        right coord. of parallel view volume
188 
189                 n        positive distance from camera to near clipping plane
190 
191                 f        positive distance from camera to far clipping plane
192 
193 
194 Return:         none
195 
196  *---------------------------------------------------------------------*/
197 /*---------------------------------------------------------------------*
198     C version
199  *---------------------------------------------------------------------*/
C_MTXOrtho(Mtx44 m,f32 t,f32 b,f32 lf,f32 r,f32 n,f32 f)200 void C_MTXOrtho ( Mtx44 m, f32 t, f32 b, f32 lf, f32 r, f32 n, f32 f )
201 {
202     f32 tmp;
203 
204     ASSERTMSG( (m != 0),  MTX_ORTHO_1  );
205     ASSERTMSG( (t != b),  MTX_ORTHO_2  );
206     ASSERTMSG( (lf != r), MTX_ORTHO_3  );
207     ASSERTMSG( (n != f),  MTX_ORTHO_4  );
208 
209     tmp     =  1.0f / (r - lf);
210     m[0][0] =  2.0f * tmp;
211     m[0][1] =  0.0f;
212     m[0][2] =  0.0f;
213     m[0][3] = -(r + lf) * tmp;
214 
215     tmp     =  1.0f / (t - b);
216     m[1][0] =  0.0f;
217     m[1][1] =  2.0f * tmp;
218     m[1][2] =  0.0f;
219     m[1][3] = -(t + b) * tmp;
220 
221     m[2][0] =  0.0f;
222     m[2][1] =  0.0f;
223 
224     tmp     =  1.0f / (f - n);
225 
226     // scale z to (-1, 1) range (different than Wii's -1...0 range)
227     m[2][2] = -2.0f * tmp;
228     m[2][3] = -(f + n) * tmp;
229 
230     m[3][0] =  0.0f;
231     m[3][1] =  0.0f;
232     m[3][2] =  0.0f;
233     m[3][3] =  1.0f;
234 }
235 
236 /*---------------------------------------------------------------------*
237 
238 
239                              GENERAL SECTION
240 
241 
242  *---------------------------------------------------------------------*/
243 
244 /* NOTE: Prototypes for these functions are defined in "mtx44ext.h".   */
245 
246 /*---------------------------------------------------------------------*
247 Name:           MTX44Identity
248 
249 Description:    sets a matrix to identity
250 
251 Arguments:      m :  matrix to be set
252 
253 Return:         none
254 
255  *---------------------------------------------------------------------*/
256 /*---------------------------------------------------------------------*
257     C version
258  *---------------------------------------------------------------------*/
C_MTX44Identity(Mtx44 m)259 void C_MTX44Identity( Mtx44 m )
260 {
261     ASSERTMSG( (m != 0), MTX44_IDENTITY_1 );
262 
263     m[0][0] = 1.0f; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = 0.0f;
264     m[1][0] = 0.0f; m[1][1] = 1.0f; m[1][2] = 0.0f; m[1][3] = 0.0f;
265     m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = 1.0f; m[2][3] = 0.0f;
266     m[3][0] = 0.0f; m[3][1] = 0.0f; m[3][2] = 0.0f; m[3][3] = 1.0f;
267 }
268 
269 #if !defined(WIN32) && !defined(WIN64)
270 /*---------------------------------------------------------------------*
271     Paired-Single assembler version
272  *---------------------------------------------------------------------*
273                 Note that this performs NO error checking.
274  *---------------------------------------------------------------------*/
275 
PSMTX44Identity(register Mtx44 m)276 void PSMTX44Identity( register Mtx44 m )
277 {
278     __PSQ_ST(m, c10, 0, 0);
279     __PSQ_STX(m,  8, c00, 0, 0);
280     __PSQ_STX(m, 16, c01, 0, 0);
281     __PSQ_STX(m, 24, c00, 0, 0);
282     __PSQ_STX(m, 32, c00, 0, 0);
283     __PSQ_STX(m, 40, c10, 0, 0);
284     __PSQ_STX(m, 48, c00, 0, 0);
285     __PSQ_STX(m, 56, c01, 0, 0);
286 }
287 #endif
288 
289 /*---------------------------------------------------------------------*
290 Name:           MTX44Copy
291 
292 Description:    copies the contents of one matrix into another
293 
294 Arguments:      src        source matrix for copy
295                 dst        destination matrix for copy
296 
297 
298 Return:         none
299  *---------------------------------------------------------------------*/
300 /*---------------------------------------------------------------------*
301     C version
302  *---------------------------------------------------------------------*/
C_MTX44Copy(MTX_CONST Mtx44 src,Mtx44 dst)303 void C_MTX44Copy( MTX_CONST Mtx44 src, Mtx44 dst )
304 {
305     ASSERTMSG( (src != 0) , MTX44_COPY_1 );
306     ASSERTMSG( (dst != 0) , MTX44_COPY_2 );
307 
308     if( src == dst )
309     {
310         return;
311     }
312 
313     dst[0][0] = src[0][0]; dst[0][1] = src[0][1]; dst[0][2] = src[0][2]; dst[0][3] = src[0][3];
314     dst[1][0] = src[1][0]; dst[1][1] = src[1][1]; dst[1][2] = src[1][2]; dst[1][3] = src[1][3];
315     dst[2][0] = src[2][0]; dst[2][1] = src[2][1]; dst[2][2] = src[2][2]; dst[2][3] = src[2][3];
316     dst[3][0] = src[3][0]; dst[3][1] = src[3][1]; dst[3][2] = src[3][2]; dst[3][3] = src[3][3];
317 }
318 
319 #if !defined(WIN32) && !defined(WIN64)
320 /*---------------------------------------------------------------------*
321     Paired-Single assembler version
322  *---------------------------------------------------------------------*
323                 Note that this performs NO error checking.
324  *---------------------------------------------------------------------*/
PSMTX44Copy(MTX_CONST Mtx44 src,Mtx44 dst)325 void PSMTX44Copy( MTX_CONST Mtx44 src, Mtx44 dst )
326 {
327     f32x2 fp1;
328 
329     //psq_l       fp1,  0(src), 0, 0;
330     fp1 = __PSQ_L(src, 0, 0);
331 
332     //psq_st      fp1,  0(dst), 0, 0;
333     __PSQ_ST(dst, fp1, 0, 0);
334 
335     //psq_l       fp1,  8(src), 0, 0;
336     fp1 = __PSQ_LX(src, 8, 0, 0);
337 
338     //psq_st      fp1,  8(dst), 0, 0;
339     __PSQ_STX(dst, 8, fp1, 0, 0);
340 
341     //psq_l       fp1, 16(src), 0, 0;
342     fp1 = __PSQ_LX(src, 16, 0, 0);
343 
344     //psq_st      fp1, 16(dst), 0, 0;
345     __PSQ_STX(dst, 16, fp1, 0, 0);
346 
347     //psq_l       fp1, 24(src), 0, 0;
348     fp1 = __PSQ_LX(src, 24, 0, 0);
349 
350     //psq_st      fp1, 24(dst), 0, 0;
351     __PSQ_STX(dst, 24, fp1, 0, 0);
352 
353     //psq_l       fp1, 32(src), 0, 0;
354     fp1 = __PSQ_LX(src, 32, 0, 0);
355 
356     //psq_st      fp1, 32(dst), 0, 0;
357     __PSQ_STX(dst, 32, fp1, 0, 0);
358 
359     //psq_l       fp1, 40(src), 0, 0;
360     fp1 = __PSQ_LX(src, 40, 0, 0);
361 
362     //psq_st      fp1, 40(dst), 0, 0;
363     __PSQ_STX(dst, 40, fp1, 0, 0);
364 
365     //psq_l       fp1, 48(src), 0, 0;
366     fp1 = __PSQ_LX(src, 48, 0, 0);
367 
368     //psq_st      fp1, 48(dst), 0, 0;
369     __PSQ_STX(dst, 48, fp1, 0, 0);
370 
371     //psq_l       fp1, 56(src), 0, 0;
372     fp1 = __PSQ_LX(src, 56, 0, 0);
373 
374     //psq_st      fp1, 56(dst), 0, 0;
375     __PSQ_STX(dst, 56, fp1, 0, 0);
376 }
377 #endif
378 
379 /*---------------------------------------------------------------------*
380 Name:           MTX44Concat
381 
382 Description:    concatenates two matrices.
383                 order of operation is A x B = AB.
384                 ok for any of ab == a == b.
385 
386                 saves a MTXCopy operation if ab != to a or b.
387 
388 Arguments:      a        first matrix for concat.
389                 b        second matrix for concat.
390                 ab       resultant matrix from concat.
391 
392 Return:         none
393  *---------------------------------------------------------------------*/
394 /*---------------------------------------------------------------------*
395     C version
396  *---------------------------------------------------------------------*/
C_MTX44Concat(MTX_CONST Mtx44 a,MTX_CONST Mtx44 b,Mtx44 ab)397 void C_MTX44Concat( MTX_CONST Mtx44 a, MTX_CONST Mtx44 b, Mtx44 ab )
398 {
399     Mtx44       mTmp;
400     Mtx44Ptr    m;
401 
402     ASSERTMSG( (a  != 0), MTX44_CONCAT_1 );
403     ASSERTMSG( (b  != 0), MTX44_CONCAT_2 );
404     ASSERTMSG( (ab != 0), MTX44_CONCAT_3 );
405 
406     if( (ab == a) || (ab == b) )
407     {
408         m = mTmp;
409     }
410     else
411     {
412         m = ab;
413     }
414 
415     // compute (a x b) -> m
416 
417     m[0][0] = a[0][0]*b[0][0] + a[0][1]*b[1][0] + a[0][2]*b[2][0] + a[0][3]*b[3][0];
418     m[0][1] = a[0][0]*b[0][1] + a[0][1]*b[1][1] + a[0][2]*b[2][1] + a[0][3]*b[3][1];
419     m[0][2] = a[0][0]*b[0][2] + a[0][1]*b[1][2] + a[0][2]*b[2][2] + a[0][3]*b[3][2];
420     m[0][3] = a[0][0]*b[0][3] + a[0][1]*b[1][3] + a[0][2]*b[2][3] + a[0][3]*b[3][3];
421 
422     m[1][0] = a[1][0]*b[0][0] + a[1][1]*b[1][0] + a[1][2]*b[2][0] + a[1][3]*b[3][0];
423     m[1][1] = a[1][0]*b[0][1] + a[1][1]*b[1][1] + a[1][2]*b[2][1] + a[1][3]*b[3][1];
424     m[1][2] = a[1][0]*b[0][2] + a[1][1]*b[1][2] + a[1][2]*b[2][2] + a[1][3]*b[3][2];
425     m[1][3] = a[1][0]*b[0][3] + a[1][1]*b[1][3] + a[1][2]*b[2][3] + a[1][3]*b[3][3];
426 
427     m[2][0] = a[2][0]*b[0][0] + a[2][1]*b[1][0] + a[2][2]*b[2][0] + a[2][3]*b[3][0];
428     m[2][1] = a[2][0]*b[0][1] + a[2][1]*b[1][1] + a[2][2]*b[2][1] + a[2][3]*b[3][1];
429     m[2][2] = a[2][0]*b[0][2] + a[2][1]*b[1][2] + a[2][2]*b[2][2] + a[2][3]*b[3][2];
430     m[2][3] = a[2][0]*b[0][3] + a[2][1]*b[1][3] + a[2][2]*b[2][3] + a[2][3]*b[3][3];
431 
432     m[3][0] = a[3][0]*b[0][0] + a[3][1]*b[1][0] + a[3][2]*b[2][0] + a[3][3]*b[3][0];
433     m[3][1] = a[3][0]*b[0][1] + a[3][1]*b[1][1] + a[3][2]*b[2][1] + a[3][3]*b[3][1];
434     m[3][2] = a[3][0]*b[0][2] + a[3][1]*b[1][2] + a[3][2]*b[2][2] + a[3][3]*b[3][2];
435     m[3][3] = a[3][0]*b[0][3] + a[3][1]*b[1][3] + a[3][2]*b[2][3] + a[3][3]*b[3][3];
436 
437     // overwrite a or b if needed
438     if(m == mTmp)
439     {
440         C_MTX44Copy( *((MTX_CONST Mtx44 *)&mTmp), ab );
441     }
442 }
443 
444 
445 #if !defined(WIN32) && !defined(WIN64)
446 /*---------------------------------------------------------------------*
447     Paired-Single assembler version
448  *---------------------------------------------------------------------*
449                 Note that this performs NO error checking.
450  *---------------------------------------------------------------------*/
451 
PSMTX44Concat(MTX_CONST Mtx44 a,MTX_CONST Mtx44 b,Mtx44 ab)452 void PSMTX44Concat( MTX_CONST Mtx44 a, MTX_CONST Mtx44 b, Mtx44 ab )
453 {
454     f32x2 fp0, fp1, fp2, fp3, fp4, fp5, fp6, fp7, fp8, fp9, fp10, fp11, fp12, fp13;
455 
456     //psq_l       fp0 ,  0(a), 0, 0;          // a00,a01
457     //fp0[0] = a[0][0];
458     //fp0[1] = a[0][1];
459     fp0 = __PSQ_L(a, 0, 0);
460 
461     //psq_l       fp2 ,  0(b), 0, 0;          // b00,b01
462     //fp2[0] = b[0][0];
463     //fp2[1] = b[0][1];
464     fp2 = __PSQ_L(b, 0, 0);
465 
466     //ps_muls0    fp6 ,   fp2,  fp0;          // b00a00,b01a00
467     fp6 = __PS_MULS0(fp2, fp0);
468 
469     //psq_l       fp3 , 16(b), 0, 0;          // b10,b11
470     //fp3[0] = b[1][0];
471     //fp3[1] = b[1][1];
472     fp3 = __PSQ_LX(b, 16, 0, 0);
473 
474     //psq_l       fp4 , 32(b), 0, 0;          // b20,b21
475     //fp4[0] = b[2][0];
476     //fp4[1] = b[2][1];
477     fp4 = __PSQ_LX(b, 32, 0, 0);
478 
479     //ps_madds1   fp6 ,   fp3,  fp0,  fp6;    // b00a00+b10a01,b01a00+b11a01
480     fp6 = __PS_MADDS1(fp3, fp0, fp6);
481 
482     //psq_l       fp1 ,  8(a), 0, 0;          // a02,a03
483     //fp1[0] = a[0][2];
484     //fp1[1] = a[0][3];
485     fp1 = __PSQ_LX(a,  8, 0, 0);
486 
487     //psq_l       fp5 , 48(b), 0, 0;          // b30,b31
488     //fp5[0] = b[3][0];
489     //fp5[1] = b[3][1];
490     fp5 = __PSQ_LX(b, 48, 0, 0);
491 
492     // b00a00+b10a01+b20a02,b01a00+b11a01+b21a02
493     //ps_madds0   fp6 ,   fp4,  fp1,  fp6;
494     fp6 = __PS_MADDS0(fp4, fp1, fp6);
495 
496     //psq_l       fp0 , 16(a), 0, 0;          // a10,a11
497     //fp0[0] = a[1][0];
498     //fp0[1] = a[1][1];
499     fp0 = __PSQ_LX(a,  16, 0, 0);
500 
501     // b00a00+b10a01+b20a02+b30a03,b01a00+b11a01+b21a02+b31a03
502     //ps_madds1   fp6 ,   fp5,  fp1,  fp6;
503     fp6 = __PS_MADDS1(fp5, fp1, fp6);
504 
505     //psq_l       fp1 , 24(a), 0, 0;          // a12,a13
506     //fp1[0] = a[1][2];
507     //fp1[1] = a[1][3];
508     fp1 = __PSQ_LX(a,  24, 0, 0);
509 
510     //ps_muls0    fp8 ,   fp2,  fp0;          // b00a10,b01a10
511     fp8 = __PS_MULS0(fp2, fp0);
512 
513     //ps_madds1   fp8 ,   fp3,  fp0,  fp8;    // b00a10+b10a11,b01a11+b11a11
514     fp8 = __PS_MADDS1(fp3, fp0, fp8);
515 
516     //psq_l       fp0 , 32(a), 0, 0;          // a20,a21
517     //fp0[0] = a[2][0];
518     //fp0[1] = a[2][1];
519     fp0 = __PSQ_LX(a,  32, 0, 0);
520 
521     // b00a10+b10a11+b20a12,b01a11+b11a11+b21a12
522     //ps_madds0   fp8 ,   fp4,  fp1,  fp8;
523     fp8 = __PS_MADDS0(fp4, fp1, fp8);
524 
525     // b00a10+b10a11+b20a12+b30a13,b01a10+b11a11+b21a12+b31a13
526     //ps_madds1   fp8 ,   fp5,  fp1,  fp8;
527     fp8 = __PS_MADDS1(fp5, fp1, fp8);
528 
529     //psq_l       fp1 , 40(a), 0, 0;          // a22,a23
530     //fp1[0] = a[2][2];
531     //fp1[1] = a[2][3];
532     fp1 = __PSQ_LX(a, 40, 0, 0);
533 
534     //ps_muls0    fp10,   fp2,  fp0;          // b00a20,b01a20
535     fp10 = __PS_MULS0(fp2, fp0);
536 
537     //ps_madds1   fp10,   fp3,  fp0, fp10;    // b00a20+b10a21,b01a20+b11a21
538     fp10 = __PS_MADDS1(fp3, fp0, fp10);
539 
540     //psq_l       fp0 , 48(a), 0, 0;          // a30,a31
541     //fp0[0] = a[3][0];
542     //fp0[1] = a[3][1];
543     fp0 = __PSQ_LX(a, 48, 0, 0);
544 
545     // b00a20+b10a21+b20a22,b01a20+b11a21+b21a22
546     //ps_madds0   fp10,   fp4,  fp1, fp10;
547     fp10 = __PS_MADDS0(fp4, fp1, fp10);
548 
549     // b00a20+b10a21+b20a22+b30a23,b01a20+b11a21+b21a22+b31a23
550     //ps_madds1   fp10,   fp5,  fp1, fp10;
551     fp10 = __PS_MADDS1(fp5, fp1, fp10);
552 
553     //psq_l       fp1 , 56(a), 0, 0;          // a32,a33
554     //fp1[0] = a[3][2];
555     //fp1[1] = a[3][3];
556     fp1 = __PSQ_LX(a,  56, 0, 0);
557 
558     //ps_muls0    fp12,   fp2,  fp0;          // b00a30,b01a30
559     fp12 = __PS_MULS0(fp2, fp0);
560 
561     //psq_l       fp2 ,  8(b), 0, 0;          // b02,b03
562     //fp2[0] = b[0][2];
563     //fp2[1] = b[0][3];
564     fp2 = __PSQ_LX(b,  8, 0, 0);
565 
566     //ps_madds1   fp12,   fp3,  fp0, fp12;    // b00a30+b10a31,b01a30+b11a31
567     fp12 = __PS_MADDS1(fp3, fp0, fp12);
568 
569     //psq_l       fp0 ,  0(a), 0, 0;          // a00,a01
570     //fp0[0] = a[0][0];
571     //fp0[1] = a[0][1];
572     fp0 = __PSQ_LX(a,  0, 0, 0);
573 
574     // b00a30+b10a31+b20a32,b01a30+b11a31+b21a32
575     //ps_madds0   fp12,   fp4,  fp1, fp12;
576     fp12 = __PS_MADDS0(fp4, fp1, fp12);
577 
578     //psq_l       fp3 , 24(b), 0, 0;          // b12,b13
579     //fp3[0] = b[1][2];
580     //fp3[1] = b[1][3];
581     fp3 = __PSQ_LX(b,  24, 0, 0);
582 
583     // b00a30+b10a31+b20a32+b30a33,b01a30+b11a31+b21a32+b31a33
584     //ps_madds1   fp12,   fp5,  fp1, fp12;
585     fp12 = __PS_MADDS1(fp5, fp1, fp12);
586 
587     //psq_l       fp1 ,  8(a), 0, 0;          // a02,a03
588     //fp1[0] = a[0][2];
589     //fp1[1] = a[0][3];
590     fp1 = __PSQ_LX(a,  8, 0, 0);
591 
592     //ps_muls0    fp7 ,   fp2,  fp0;          // b02a00,b03a00
593     fp7 = __PS_MULS0(fp2, fp0);
594 
595     //psq_l       fp4 , 40(b), 0, 0;          // b22,b23
596     //fp4[0] = b[2][2];
597     //fp4[1] = b[2][3];
598     fp4 = __PSQ_LX(b,  40, 0, 0);
599 
600     //ps_madds1   fp7 ,   fp3,  fp0, fp7;     // b02a00+b12a01,b03a00+b13a01
601     fp7 = __PS_MADDS1(fp3, fp0, fp7);
602 
603     //psq_l       fp5 , 56(b), 0, 0;          // b32,b33
604     //fp5[0] = b[3][2];
605     //fp5[1] = b[3][3];
606     fp5 = __PSQ_LX(b, 56, 0, 0);
607 
608     // b02a00+b12a01+b22a02,b03a00+b13a01+b23a02
609     //ps_madds0   fp7 ,   fp4,  fp1, fp7;
610     fp7 = __PS_MADDS0(fp4, fp1, fp7);
611 
612     //psq_l       fp0 , 16(a), 0, 0;          // a10,a11
613     //fp0[0] = a[1][0];
614     //fp0[1] = a[1][1];
615     fp0 = __PSQ_LX(a, 16, 0, 0);
616 
617     // b02a00+b12a01+b22a02+b32a03,b03a00+b13a01+b23a02+b33a03
618     //ps_madds1   fp7 ,   fp5,  fp1, fp7;
619     fp7 = __PS_MADDS1(fp5, fp1, fp7);
620 
621     //psq_l       fp1 , 24(a), 0, 0;          // a12,a13
622     //fp1[0] = a[1][2];
623     //fp1[1] = a[1][3];
624     fp1 = __PSQ_LX(a,  24, 0, 0);
625 
626     //ps_muls0    fp9 ,   fp2,  fp0;          // b02a10,b03a10
627     fp9 = __PS_MULS0(fp2, fp0);
628 
629     //psq_st      fp6 , 0(ab), 0, 0;          // ab00,ab01
630     //ab[0][0] = fp6[0];
631     //ab[0][1] = fp6[1];
632     __PSQ_STX(ab, 0, fp6, 0, 0);
633 
634     //ps_madds1   fp9 ,   fp3,  fp0, fp9;     // b02a10+b12a11,b03a10+b13a11
635     fp9 = __PS_MADDS1(fp3, fp0, fp9);
636 
637     //psq_l       fp0 , 32(a), 0, 0;          // a20,a21
638     //fp0[0] = a[2][0];
639     //fp0[1] = a[2][1];
640     fp0 = __PSQ_LX(a, 32, 0, 0);
641 
642     // b02a10+b12a11+b22a12,b03a10+b13a11+b23a12
643     //ps_madds0   fp9,    fp4,  fp1, fp9;
644     fp9 = __PS_MADDS0(fp4, fp1, fp9);
645 
646     //psq_st      fp8 ,16(ab), 0, 0;          // ab10,ab11
647     //ab[1][0] = fp8[0];
648     //ab[1][1] = fp8[1];
649     __PSQ_STX(ab, 16, fp8, 0, 0);
650 
651     // b02a10+b12a11+b22a12+b32a13,b03a10+b13a11+b23a12+b33a13
652     //ps_madds1   fp9 ,   fp5,  fp1, fp9;
653     fp9 = __PS_MADDS1(fp5, fp1, fp9);
654 
655     //psq_l       fp1 , 40(a), 0, 0;          // a22,a23
656     //fp1[0] = a[2][2];
657     //fp1[1] = a[2][3];
658     fp1 = __PSQ_LX(a, 40, 0, 0);
659 
660     //ps_muls0    fp11,   fp2,  fp0;          // b02a20,b03a20
661     fp11 = __PS_MULS0(fp2, fp0);
662 
663     //psq_st      fp10,32(ab), 0, 0;          // ab20,ab21
664     //ab[2][0] = fp10[0];
665     //ab[2][1] = fp10[1];
666     __PSQ_STX(ab, 32, fp10, 0, 0);
667 
668     //ps_madds1   fp11,   fp3,  fp0, fp11;    // b02a20+b12a21,b03a20+b13a21
669     fp11 = __PS_MADDS1(fp3, fp0, fp11);
670 
671     //psq_l       fp0 , 48(a), 0, 0;          // a30,a31
672     //fp0[0] = a[3][0];
673     //fp0[1] = a[3][1];
674     fp0 = __PSQ_LX(a, 48, 0, 0);
675 
676     // b02a20+b12a21+b22a22,b03a20+b13a21+b23a22
677     //ps_madds0   fp11,   fp4,  fp1, fp11;
678     fp11 = __PS_MADDS0(fp4, fp1, fp11);
679 
680     //psq_st      fp12,48(ab), 0, 0;          // ab30,ab31
681     //ab[3][0] = fp12[0];
682     //ab[3][1] = fp12[1];
683     __PSQ_STX(ab, 48, fp12, 0, 0);
684 
685     // b02a20+b12a21+b22a22+b32a23,b03a20+b13a21+b23a22+b33a23
686     //ps_madds1   fp11,   fp5,  fp1, fp11;
687     fp11 = __PS_MADDS1(fp5, fp1, fp11);
688 
689     //psq_l       fp1,  56(a), 0, 0;          // a32,a33
690     //fp1[0] = a[3][2];
691     //fp1[1] = a[3][3];
692     fp1 = __PSQ_LX(a, 56, 0, 0);
693 
694     //ps_muls0    fp13,   fp2,  fp0;          // b02a30,b03a30
695     fp13 = __PS_MULS0(fp2, fp0);
696 
697     //psq_st      fp7 , 8(ab), 0, 0;          // ab02,ab03
698     //ab[0][2] = fp7[0];
699     //ab[0][3] = fp7[1];
700     __PSQ_STX(ab, 8, fp7, 0, 0);
701 
702     //ps_madds1   fp13,   fp3,  fp0, fp13;    // b02a30+b12a31,b03a30+b13a31
703     fp13 = __PS_MADDS1(fp3, fp0, fp13);
704 
705     //psq_st      fp9 ,24(ab), 0, 0;          // ab12,ab13
706     //ab[1][2] = fp9[0];
707     //ab[1][3] = fp9[1];
708     __PSQ_STX(ab, 24, fp9, 0, 0);
709 
710     // b02a30+b12a31+b22a32,b03a30+b13a31+b23a32
711     //ps_madds0   fp13,   fp4,  fp1, fp13;
712     fp13 = __PS_MADDS0(fp4, fp1, fp13);
713 
714     //psq_st      fp11,40(ab), 0, 0;          // ab22,ab23
715     //ab[2][2] = fp11[0];
716     //ab[2][3] = fp11[1];
717     __PSQ_STX(ab, 40, fp11, 0, 0);
718 
719     // b02a30+b12a31+b22a32+b32a33,b03a30+b13a31+b23a32+b33a33
720     //ps_madds1   fp13,   fp5,  fp1, fp13;
721     fp13 = __PS_MADDS1(fp5, fp1, fp13);
722 
723     //psq_st      fp13,56(ab), 0, 0;          // ab32,ab33
724     //ab[3][2] = fp13[0];
725     //ab[3][3] = fp13[1];
726     __PSQ_STX(ab, 56, fp13, 0, 0);
727 }
728 #endif
729 
730 
731 /*---------------------------------------------------------------------*
732 Name:           MTX44Transpose
733 
734 Description:    computes the transpose of a matrix.
735 
736 Arguments:      src       source matrix.
737                 xPose     destination (transposed) matrix.
738                           ok if src == xPose.
739 
740 Return:         none
741  *---------------------------------------------------------------------*/
742 /*---------------------------------------------------------------------*
743     C version
744  *---------------------------------------------------------------------*/
C_MTX44Transpose(MTX_CONST Mtx44 src,Mtx44 xPose)745 void C_MTX44Transpose ( MTX_CONST Mtx44 src, Mtx44 xPose )
746 {
747     Mtx44       mTmp;
748     Mtx44Ptr    m;
749 
750     ASSERTMSG( (src   != 0), MTX44_TRANSPOSE_1  );
751     ASSERTMSG( (xPose != 0), MTX44_TRANSPOSE_2  );
752 
753     if(src == xPose)
754     {
755         m = mTmp;
756     }
757     else
758     {
759         m = xPose;
760     }
761 
762     m[0][0] = src[0][0];    m[0][1] = src[1][0];    m[0][2] = src[2][0];    m[0][3] = src[3][0];
763     m[1][0] = src[0][1];    m[1][1] = src[1][1];    m[1][2] = src[2][1];    m[1][3] = src[3][1];
764     m[2][0] = src[0][2];    m[2][1] = src[1][2];    m[2][2] = src[2][2];    m[2][3] = src[3][2];
765     m[3][0] = src[0][3];    m[3][1] = src[1][3];    m[3][2] = src[2][3];    m[3][3] = src[3][3];
766 
767     // copy back if needed
768     if( m == mTmp )
769     {
770         C_MTX44Copy( *((MTX_CONST Mtx44 *)&mTmp), xPose );
771     }
772 }
773 
774 #if !defined(WIN32) && !defined(WIN64)
775 /*---------------------------------------------------------------------*
776     Paired-Single assembler version
777  *---------------------------------------------------------------------*
778                 Note that this performs NO error checking.
779  *---------------------------------------------------------------------*/
PSMTX44Transpose(MTX_CONST Mtx44 src,Mtx44 xPose)780 void PSMTX44Transpose ( MTX_CONST Mtx44 src, Mtx44 xPose )
781 {
782     f32x2 fp0, fp1, fp2, fp3, fp4, fp5;
783 
784     //psq_l       fp0,  0(src), 0, 0;     // fp0 <= s00,s01
785     fp0 = __PSQ_L(src, 0, 0);
786 
787     //psq_l       fp1, 16(src), 0, 0;     // fp1 <= s10,s11
788     fp1 = __PSQ_LX(src, 16, 0, 0);
789 
790     //ps_merge00  fp4, fp0, fp1;              // fp4 <= t00,t10
791     fp4 = __PS_MERGE00(fp0, fp1);
792 
793     //psq_l       fp2,  8(src), 0, 0;     // fp2 <= s02,s03
794     fp2 = __PSQ_LX(src, 8, 0, 0);
795 
796     //psq_st      fp4,  0(xPose), 0, 0;
797     __PSQ_ST(xPose, fp4, 0, 0);
798 
799     //ps_merge11  fp5, fp0, fp1;              // fp5 <= t01,t11
800     fp5 = __PS_MERGE11(fp0, fp1);
801 
802     //psq_l       fp3, 24(src), 0, 0;     // fp3 <= s12,s13
803     fp3 = __PSQ_LX(src, 24, 0, 0);
804 
805     //psq_st      fp5, 16(xPose), 0, 0;
806     __PSQ_STX(xPose, 16, fp5, 0, 0);
807 
808     //ps_merge00  fp4, fp2, fp3;              // fp4 <= t02,t12
809     fp4 = __PS_MERGE00(fp2, fp3);
810 
811     //psq_l       fp0, 32(src), 0, 0;     // fp0 <= s20,s21
812     fp0 = __PSQ_LX(src, 32, 0, 0);
813 
814     //psq_st      fp4, 32(xPose), 0, 0;
815     __PSQ_STX(xPose, 32, fp4, 0, 0);
816 
817     //ps_merge11  fp5, fp2, fp3;              // fp5 <= t03,t13
818     fp5 = __PS_MERGE11(fp2, fp3);
819 
820     //psq_l       fp1, 48(src), 0, 0;     // fp1 <= s30,s31
821     fp1 = __PSQ_LX(src, 48, 0, 0);
822 
823     //psq_st      fp5, 48(xPose), 0, 0;
824     __PSQ_STX(xPose, 48, fp5, 0, 0);
825 
826     //ps_merge00  fp4, fp0, fp1;              // fp4 <= t20,t30
827     fp4 = __PS_MERGE00(fp0, fp1);
828 
829     //psq_l       fp2, 40(src), 0, 0;     // fp2 <= s22,s23
830     fp2 = __PSQ_LX(src, 40, 0, 0);
831 
832     //psq_st      fp4,  8(xPose), 0, 0;
833     __PSQ_STX(xPose, 8, fp4, 0, 0);
834 
835     //ps_merge11  fp5, fp0, fp1;              // fp5 <= t21,t31
836     fp5 = __PS_MERGE11(fp0, fp1);
837 
838     //psq_l       fp3, 56(src), 0, 0;     // fp2 <= s32,s33
839     fp3 = __PSQ_LX(src, 56, 0, 0);
840 
841     //psq_st      fp5, 24(xPose), 0, 0;
842     __PSQ_STX(xPose, 24, fp5, 0, 0);
843 
844     //ps_merge00  fp4, fp2, fp3;              // fp4 <= s22,s32
845     fp4 = __PS_MERGE00(fp2, fp3);
846 
847     //psq_st      fp4, 40(xPose), 0, 0;
848     __PSQ_STX(xPose, 40, fp4, 0, 0);
849 
850     //ps_merge11  fp5, fp2, fp3;              // fp5 <= s23,s33
851     fp5 = __PS_MERGE11(fp2, fp3);
852 
853     //psq_st      fp5, 56(xPose), 0, 0;
854     __PSQ_STX(xPose, 56, fp5, 0, 0);
855 }
856 #endif
857 
858 /*---------------------------------------------------------------------*
859 Name:           MTX44Inverse
860 
861 Description:    computes a fast inverse of a matrix.
862                 uses Gauss-Jordan(with partial pivoting)
863 
864 Arguments:      src       source matrix.
865                 inv       destination (inverse) matrix.
866                           ok if src == inv.
867 
868 Return:         0 if src is not invertible.
869                 1 on success.
870  *---------------------------------------------------------------------*/
871 /*---------------------------------------------------------------------*
872     C version only
873  *---------------------------------------------------------------------*/
874 #define NUM         4
875 #define SWAPF(a,b)  { f32 tmp; tmp = (a); (a) = (b); (b)=tmp; }
876 
C_MTX44Inverse(MTX_CONST Mtx44 src,Mtx44 inv)877 u32 C_MTX44Inverse( MTX_CONST Mtx44 src, Mtx44 inv )
878 {
879     Mtx44       gjm;
880     s32         i, j, k;
881     f32         w;
882 
883     ASSERTMSG( (src != 0), MTX44_INVERSE_1 );
884     ASSERTMSG( (inv != 0), MTX44_INVERSE_2 );
885 
886     C_MTX44Copy(src, gjm);
887     C_MTX44Identity(inv);
888 
889     for ( i = 0 ; i < NUM ; ++i )
890     {
891         f32 max = 0.0f;
892         s32 swp = i;
893 
894         // ---- partial pivoting -----
895         for( k = i ; k < NUM ; k++ )
896         {
897             f32 ftmp;
898             ftmp = fabsf(gjm[k][i]);
899             if ( ftmp > max )
900             {
901                 max = ftmp;
902                 swp = k;
903             }
904         }
905 
906         // check singular matrix
907         //(or can't solve inverse matrix with this algorithm)
908         if ( max == 0.0f )
909         {
910             return 0;
911         }
912 
913         // swap row
914         if( swp != i )
915         {
916             for ( k = 0 ; k < NUM ; k++ )
917             {
918                 SWAPF(gjm[i][k], gjm[swp][k]);
919                 SWAPF(inv[i][k], inv[swp][k]);
920             }
921         }
922 
923         // ---- pivoting end ----
924 
925         w = 1.0F / gjm[i][i];
926         for ( j = 0 ; j < NUM ; ++j )
927         {
928             gjm[i][j] *= w;
929             inv[i][j] *= w;
930         }
931 
932         for ( k = 0 ; k < NUM ; ++k )
933         {
934             if ( k == i )
935                 continue;
936 
937             w = gjm[k][i];
938             for ( j = 0 ; j < NUM ; ++j )
939             {
940                 gjm[k][j] -= gjm[i][j] * w;
941                 inv[k][j] -= inv[i][j] * w;
942             }
943         }
944     }
945 
946     return 1;
947 }
948 
949 #undef SWAPF
950 #undef NUM
951 
952 /*---------------------------------------------------------------------*
953 
954 
955                              MODEL SECTION
956 
957 
958  *---------------------------------------------------------------------*/
959 
960 /* NOTE: Prototypes for these functions are defined in "mtx44ext.h".   */
961 
962 /*---------------------------------------------------------------------*
963 Name:           MTX44Trans
964 
965 Description:    sets a translation matrix.
966 
967 Arguments:       m        matrix to be set
968                 xT        x component of translation.
969                 yT        y component of translation.
970                 zT        z component of translation.
971 
972 Return:         none
973  *---------------------------------------------------------------------*/
974 /*---------------------------------------------------------------------*
975     C version
976  *---------------------------------------------------------------------*/
C_MTX44Trans(Mtx44 m,f32 xT,f32 yT,f32 zT)977 void C_MTX44Trans ( Mtx44 m, f32 xT, f32 yT, f32 zT )
978 {
979     ASSERTMSG( (m != 0), MTX44_TRANS_1 );
980 
981     m[0][0] = 1.0f;     m[0][1] = 0.0f;  m[0][2] = 0.0f;  m[0][3] =  xT;
982     m[1][0] = 0.0f;     m[1][1] = 1.0f;  m[1][2] = 0.0f;  m[1][3] =  yT;
983     m[2][0] = 0.0f;     m[2][1] = 0.0f;  m[2][2] = 1.0f;  m[2][3] =  zT;
984     m[3][0] = 0.0f;     m[3][1] = 0.0f;  m[3][2] = 0.0f;  m[3][3] =  1.0f;
985 }
986 
987 #if !defined(WIN32) && !defined(WIN64)
988 /*---------------------------------------------------------------------*
989     Paired-Single assembler version
990  *---------------------------------------------------------------------*
991                 Note that this performs NO error checking.
992  *---------------------------------------------------------------------*/
PSMTX44Trans(Mtx44 m,f32 xT,f32 yT,f32 zT)993 void PSMTX44Trans( Mtx44 m, f32 xT, f32 yT, f32 zT )
994 {
995     f32x2 xT2 = {0.0F, xT};
996     f32x2 yT2 = {0.0F, yT};
997     f32x2 zT2 = {1.0F, zT};
998     __PSQ_ST(m, c10, 0, 0);
999     __PSQ_STX(m,  8, xT2, 0, 0);
1000     __PSQ_STX(m, 16, c01, 0, 0);
1001     __PSQ_STX(m, 24, yT2, 0, 0);
1002     __PSQ_STX(m, 32, c00, 0, 0);
1003     __PSQ_STX(m, 40, zT2, 0, 0);
1004     __PSQ_STX(m, 48, c00, 0, 0);
1005     __PSQ_STX(m, 56, c01, 0, 0);
1006 }
1007 #endif
1008 
1009 /*---------------------------------------------------------------------*
1010 Name:           MTX44TransApply
1011 
1012 Description:    This function performs the operation equivalent to
1013                 MTXTrans + MTXConcat.
1014 
1015 Arguments:      src       matrix to be operated.
1016                 dst       resultant matrix from concat.
1017                 xT        x component of translation.
1018                 yT        y component of translation.
1019                 zT        z component of translation.
1020 
1021 Return:         none
1022  *---------------------------------------------------------------------*/
1023 /*---------------------------------------------------------------------*
1024     C version
1025  *---------------------------------------------------------------------*/
C_MTX44TransApply(MTX_CONST Mtx44 src,Mtx44 dst,f32 xT,f32 yT,f32 zT)1026 void C_MTX44TransApply ( MTX_CONST Mtx44 src, Mtx44 dst, f32 xT, f32 yT, f32 zT )
1027 {
1028     ASSERTMSG( (src != 0), MTX44_TRANSAPPLY_1 );
1029     ASSERTMSG( (dst != 0), MTX44_TRANSAPPLY_1 );
1030 
1031     if ( src != dst )
1032     {
1033         dst[0][0] = src[0][0];    dst[0][1] = src[0][1];    dst[0][2] = src[0][2];
1034         dst[1][0] = src[1][0];    dst[1][1] = src[1][1];    dst[1][2] = src[1][2];
1035         dst[2][0] = src[2][0];    dst[2][1] = src[2][1];    dst[2][2] = src[2][2];
1036         dst[3][0] = src[3][0];    dst[3][1] = src[3][1];    dst[3][2] = src[3][2];
1037         dst[3][3] = src[3][3];
1038     }
1039 
1040     dst[0][3] = src[0][3] + xT;
1041     dst[1][3] = src[1][3] + yT;
1042     dst[2][3] = src[2][3] + zT;
1043 }
1044 
1045 #if !defined(WIN32) && !defined(WIN64)
1046 /*---------------------------------------------------------------------*
1047     Paired-Single assembler version
1048  *---------------------------------------------------------------------*
1049                 Note that this performs NO error checking.
1050  *---------------------------------------------------------------------*/
PSMTX44TransApply(MTX_CONST Mtx44 src,Mtx44 dst,f32 xT,f32 yT,f32 zT)1051 void PSMTX44TransApply ( MTX_CONST Mtx44 src, Mtx44 dst, f32 xT, f32 yT, f32 zT )
1052 {
1053     f32x2 fp4, fp5, fp6, fp7, fp8; //fp0, fp1, fp2, fp3,
1054     f32x2 xT2 = {xT, 0.0F};
1055     f32x2 yT2 = {yT, 0.0F};
1056     f32x2 zT2 = {zT, 0.0F};
1057 
1058     //psq_l       fp4, 0(src),     0, 0;
1059     fp4 = __PSQ_L(src, 0, 0);
1060 
1061     //frsp        xT, xT;                         // to make sure xS = single precision
1062     //psq_l       fp5, 8(src),     0, 0;
1063     fp5 = __PSQ_LX(src, 8, 0, 0);
1064 
1065     //frsp        yT, yT;                         // to make sure yS = single precision
1066     //psq_l       fp6, 16(src),    0, 0;
1067     fp6 = __PSQ_LX(src, 16, 0, 0);
1068 
1069     //frsp        zT, zT;                         // to make sure zS = single precision
1070     //psq_l       fp7, 24(src),    0, 0;
1071     fp7 = __PSQ_LX(src, 24, 0, 0);
1072 
1073     //psq_st      fp4, 0(dst),     0, 0;
1074     __PSQ_ST(dst, fp4, 0, 0);
1075 
1076     //ps_sum1     fp5, xT, fp5, fp5;
1077     fp5 = __PS_SUM1(xT2, fp5, fp5);
1078 
1079     //psq_l       fp4, 40(src),    0, 0;
1080     fp4 = __PSQ_LX(src, 40, 0, 0);
1081 
1082     //psq_st      fp6, 16(dst),    0, 0;
1083     __PSQ_STX(dst, 16, fp6, 0, 0);
1084 
1085     //ps_sum1     fp7, yT, fp7, fp7;
1086     fp7 = __PS_SUM1(yT2, fp7, fp7);
1087 
1088     //psq_l       fp8, 32(src),    0, 0;
1089     fp8 = __PSQ_LX(src, 32, 0, 0);
1090 
1091     //psq_st      fp5, 8(dst),     0, 0;
1092     __PSQ_STX(dst, 8, fp5, 0, 0);
1093 
1094     //ps_sum1     fp4, zT, fp4, fp4;
1095     fp4 = __PS_SUM1(zT2, fp4, fp4);
1096 
1097     //psq_st      fp7, 24(dst),    0, 0;
1098     __PSQ_STX(dst, 24, fp7, 0, 0);
1099 
1100     //psq_st      fp8, 32(dst),    0, 0;
1101     __PSQ_STX(dst, 32, fp8, 0, 0);
1102 
1103     //psq_l       fp5, 48(src),    0, 0;
1104     fp5 = __PSQ_LX(src, 48, 0, 0);
1105 
1106     //psq_l       fp6, 56(src),    0, 0;
1107     fp6 = __PSQ_LX(src, 56, 0, 0);
1108 
1109     //psq_st      fp4, 40(dst),    0, 0;
1110     __PSQ_STX(dst, 40, fp4, 0, 0);
1111 
1112     //psq_st      fp5, 48(dst),    0, 0;
1113     __PSQ_STX(dst, 48, fp5, 0, 0);
1114 
1115     //psq_st      fp6, 56(dst),    0, 0;
1116     __PSQ_STX(dst, 56, fp6, 0, 0);
1117 }
1118 #endif
1119 
1120 /*---------------------------------------------------------------------*
1121 Name:            MTX44Scale
1122 
1123 Description:     sets a scaling matrix.
1124 
1125 Arguments:       m        matrix to be set
1126                 xS        x scale factor.
1127                 yS        y scale factor.
1128                 zS        z scale factor.
1129 
1130 Return:         none
1131  *---------------------------------------------------------------------*/
1132 /*---------------------------------------------------------------------*
1133     C version
1134  *---------------------------------------------------------------------*/
C_MTX44Scale(Mtx44 m,f32 xS,f32 yS,f32 zS)1135 void C_MTX44Scale ( Mtx44 m, f32 xS, f32 yS, f32 zS )
1136 {
1137     ASSERTMSG( (m != 0), MTX44_SCALE_1 );
1138 
1139     m[0][0] = xS;      m[0][1] = 0.0f;  m[0][2] = 0.0f;  m[0][3] = 0.0f;
1140     m[1][0] = 0.0f;    m[1][1] = yS;    m[1][2] = 0.0f;  m[1][3] = 0.0f;
1141     m[2][0] = 0.0f;    m[2][1] = 0.0f;  m[2][2] = zS;    m[2][3] = 0.0f;
1142     m[3][0] = 0.0f;    m[3][1] = 0.0f;  m[3][2] = 0.0f;  m[3][3] = 1.0f;
1143 }
1144 
1145 
1146 #if !defined(WIN32) && !defined(WIN64)
1147 /*---------------------------------------------------------------------*
1148     Paired-Single assembler version
1149  *---------------------------------------------------------------------*
1150                 Note that this performs NO error checking.
1151  *---------------------------------------------------------------------*/
PSMTX44Scale(Mtx44 m,f32 xS,f32 yS,f32 zS)1152 void PSMTX44Scale( Mtx44 m, f32 xS, f32 yS, f32 zS )
1153 {
1154     f32x2 xS2 = {xS,   0.0F};
1155     f32x2 yS2 = {0.0F, yS};
1156     f32x2 zS2 = {zS, 0.0F};
1157 
1158     __PSQ_ST(m, xS2, 0, 0);
1159     __PSQ_STX(m,  8, c00, 0, 0);
1160     __PSQ_STX(m, 16, yS2, 0, 0);
1161     __PSQ_STX(m, 24, c00, 0, 0);
1162     __PSQ_STX(m, 32, c00, 0, 0);
1163     __PSQ_STX(m, 40, zS2, 0, 0);
1164     __PSQ_STX(m, 48, c00, 0, 0);
1165     __PSQ_STX(m, 56, c01, 0, 0);
1166 }
1167 #endif
1168 
1169 /*---------------------------------------------------------------------*
1170 Name:           MTX44ScaleApply
1171 
1172 Description:    This function performs the operation equivalent to
1173                 MTXScale + MTXConcat
1174 
1175 Arguments:      src       matrix to be operated.
1176                 dst       resultant matrix from concat.
1177                 xS        x scale factor.
1178                 yS        y scale factor.
1179                 zS        z scale factor.
1180 
1181 Return:         none
1182 *---------------------------------------------------------------------*/
1183 /*---------------------------------------------------------------------*
1184     C version
1185  *---------------------------------------------------------------------*/
C_MTX44ScaleApply(MTX_CONST Mtx44 src,Mtx44 dst,f32 xS,f32 yS,f32 zS)1186 void C_MTX44ScaleApply ( MTX_CONST Mtx44 src, Mtx44 dst, f32 xS, f32 yS, f32 zS )
1187 {
1188     ASSERTMSG( (src != 0), MTX44_SCALEAPPLY_1 );
1189     ASSERTMSG( (dst != 0), MTX44_SCALEAPPLY_2 );
1190 
1191     dst[0][0] = src[0][0] * xS;     dst[0][1] = src[0][1] * xS;
1192     dst[0][2] = src[0][2] * xS;     dst[0][3] = src[0][3] * xS;
1193 
1194     dst[1][0] = src[1][0] * yS;     dst[1][1] = src[1][1] * yS;
1195     dst[1][2] = src[1][2] * yS;     dst[1][3] = src[1][3] * yS;
1196 
1197     dst[2][0] = src[2][0] * zS;     dst[2][1] = src[2][1] * zS;
1198     dst[2][2] = src[2][2] * zS;     dst[2][3] = src[2][3] * zS;
1199 
1200     dst[3][0] = src[3][0] ; dst[3][1] = src[3][1];
1201     dst[3][2] = src[3][2] ; dst[3][3] = src[3][3];
1202 }
1203 
1204 #if !defined(WIN32) && !defined(WIN64)
1205 /*---------------------------------------------------------------------*
1206     Paired-Single assembler version
1207  *---------------------------------------------------------------------*
1208                 Note that this performs NO error checking.
1209  *---------------------------------------------------------------------*/
1210 
PSMTX44ScaleApply(MTX_CONST Mtx44 src,Mtx44 dst,f32 xS,f32 yS,f32 zS)1211 void PSMTX44ScaleApply ( MTX_CONST Mtx44 src, Mtx44 dst, f32 xS, f32 yS, f32 zS )
1212 {
1213     f32x2 fp4, fp5, fp6, fp7, fp8, fp9, fp10, fp11; //fp0, fp1, fp2, fp3,
1214     f32x2 xS2 = {xS, xS};
1215     f32x2 yS2 = {yS, yS};
1216     f32x2 zS2 = {zS, zS};
1217 
1218     //psq_l       fp4,     0(src), 0, 0;          // fp4 <- src00,src01
1219     //fp4[0] = src[0][0];
1220     //fp4[1] = src[0][1];
1221     fp4 = __PSQ_L(src, 0, 0);
1222 
1223     //frsp        xS, xS;                         // to make sure xS = single precision
1224     //psq_l       fp5,     8(src), 0, 0;          // fp5 <- src02,src03
1225     //fp5[0] = src[0][2];
1226     //fp5[1] = src[0][3];
1227     fp5 = __PSQ_LX(src,  8, 0, 0);
1228 
1229     //frsp        yS, yS;                         // to make sure yS = single precision
1230     //psq_l       fp6,    16(src), 0, 0;          // fp6 <- src10,src11
1231     //fp6[0] = src[1][0];
1232     //fp6[1] = src[1][1];
1233     fp6 = __PSQ_LX(src,  16, 0, 0);
1234 
1235     //ps_muls0    fp4,    fp4, xS;                // fp4 <- src00*xS,src01*xS
1236     fp4 = __PS_MULS0(fp4, xS2);
1237 
1238     //psq_l       fp7,    24(src), 0, 0;          // fp7 <- src12,src13
1239     //fp7[0] = src[1][2];
1240     //fp7[1] = src[1][3];
1241     fp7 = __PSQ_LX(src,  24, 0, 0);
1242 
1243     //ps_muls0    fp5,    fp5, xS;                // fp5 <- src02*xS,src03*xS
1244     fp5 = __PS_MULS0(fp5, xS2);
1245 
1246     //psq_l       fp8,    32(src), 0, 0;          // fp8 <- src20,src21
1247     //fp8[0] = src[2][0];
1248     //fp8[1] = src[2][1];
1249     fp8 = __PSQ_LX(src,  32, 0, 0);
1250 
1251     //frsp        zS, zS;                         // to make sure zS = single precision
1252     //psq_st      fp4,     0(dst), 0, 0;          // dst00,dst01
1253     //dst[0][0] = fp4[0];
1254     //dst[0][1] = fp4[1];
1255     __PSQ_ST(dst, fp4, 0, 0);
1256 
1257     //ps_muls0    fp6,    fp6, yS;                // fp6 <- src10*yS,src11*yS
1258     fp6 = __PS_MULS0(fp6, yS2);
1259 
1260     //psq_l       fp9,    40(src), 0, 0;          // fp9 <- src22,src23
1261     //fp9[0] = src[2][2];
1262     //fp9[1] = src[2][3];
1263     fp9 = __PSQ_LX(src, 40, 0, 0);
1264 
1265     //psq_st      fp5,     8(dst), 0, 0;          // dst02,dst03
1266     //dst[0][2] = fp5[0];
1267     //dst[0][3] = fp5[1];
1268     __PSQ_STX(dst, 8, fp5, 0, 0);
1269 
1270     //ps_muls0    fp7,    fp7, yS;                // fp7 <- src12*yS,src13*yS
1271     fp7 = __PS_MULS0(fp7, yS2);
1272 
1273     //psq_l       fp10,   48(src), 0, 0;          // fp10 <- src30src31
1274     //fp10[0] = src[3][0];
1275     //fp10[1] = src[3][1];
1276     fp10 = __PSQ_LX(src,  48, 0, 0);
1277 
1278     //psq_st      fp6,    16(dst), 0, 0;          // dst10,dst11
1279     //dst[1][0] = fp6[0];
1280     //dst[1][1] = fp6[1];
1281     __PSQ_STX(dst, 16, fp6, 0, 0);
1282 
1283     //ps_muls0    fp8,    fp8, zS;                // fp8 <- src20*zS,src21*zS
1284     fp8 = __PS_MULS0(fp8, zS2);
1285 
1286     //psq_l       fp11,   56(src), 0, 0;          // fp11 <- src32,src33
1287     //fp11[0] = src[3][2];
1288     //fp11[1] = src[3][3];
1289     fp11 = __PSQ_LX(src,  56, 0, 0);
1290 
1291     //psq_st      fp7,    24(dst), 0, 0;          // dst12,dst13
1292     //dst[1][2] = fp7[0];
1293     //dst[1][3] = fp7[1];
1294     __PSQ_STX(dst, 24, fp7, 0, 0);
1295 
1296     //ps_muls0    fp9,    fp9, zS;                // fp9 <- src22*zS,src23*zS
1297     fp9 = __PS_MULS0(fp9, zS2);
1298 
1299     //psq_st      fp8,    32(dst), 0, 0;          // dst20,dst21
1300     //dst[2][0] = fp8[0];
1301     //dst[2][1] = fp8[1];
1302     __PSQ_STX(dst, 32, fp8, 0, 0);
1303 
1304     //psq_st      fp9,    40(dst), 0, 0;          // dst22,dst23
1305     //dst[2][2] = fp9[0];
1306     //dst[2][3] = fp9[1];
1307     __PSQ_STX(dst, 40, fp9, 0, 0);
1308 
1309     //psq_st      fp10,   48(dst), 0, 0;          // dst30,dst31
1310     //dst[3][0] = fp10[0];
1311     //dst[3][1] = fp10[1];
1312     __PSQ_STX(dst, 48, fp10, 0, 0);
1313 
1314     //psq_st      fp11,   56(dst), 0, 0;          // dst32,dst33
1315     //dst[3][2] = fp11[0];
1316     //dst[3][3] = fp11[1];
1317     __PSQ_STX(dst, 56, fp11, 0, 0);
1318 }
1319 #endif
1320 
1321 
1322 /*---------------------------------------------------------------------*
1323 Name:           MTX44RotRad
1324 
1325 Description:    sets a rotation matrix about one of the X, Y or Z axes
1326 
1327 Arguments:      m       matrix to be set
1328                 axis    major axis about which to rotate.
1329                         axis is passed in as a character.
1330                         it must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1331                 deg     rotation angle in radians.
1332                         note:  counter-clockwise rotation is positive.
1333 
1334 Return:         none
1335  *---------------------------------------------------------------------*/
1336 /*---------------------------------------------------------------------*
1337     C version
1338  *---------------------------------------------------------------------*/
C_MTX44RotRad(Mtx44 m,char axis,f32 rad)1339 void C_MTX44RotRad ( Mtx44 m, char axis, f32 rad )
1340 {
1341     f32 sinA, cosA;
1342 
1343     ASSERTMSG( (m != 0), MTX44_ROTRAD_1 );
1344 
1345     // verification of "axis" will occur in MTXRotTrig
1346 
1347     sinA = sinf(rad);
1348     cosA = cosf(rad);
1349 
1350     C_MTX44RotTrig( m, axis, sinA, cosA );
1351 }
1352 
1353 #if !defined(WIN32) && !defined(WIN64)
1354 /*---------------------------------------------------------------------*
1355     Paired-Single assembler version
1356  *---------------------------------------------------------------------*
1357                 Note that this performs NO error checking.
1358  *---------------------------------------------------------------------*/
1359 
PSMTX44RotRad(Mtx44 m,char axis,f32 rad)1360 void PSMTX44RotRad ( Mtx44 m, char axis, f32 rad )
1361 {
1362     f32 sinA, cosA;
1363 
1364     sinA = sinf(rad);
1365     cosA = cosf(rad);
1366 
1367     PSMTX44RotTrig( m, axis, sinA, cosA );
1368 }
1369 #endif
1370 
1371 /*---------------------------------------------------------------------*
1372 Name:           MTX44RotTrig
1373 
1374 Arguments:      m       matrix to be set
1375                 axis    major axis about which to rotate.
1376                         axis is passed in as a character.
1377                         It must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1378                 sinA    sine of rotation angle.
1379                 cosA    cosine of rotation angle.
1380                         note:  counter-clockwise rotation is positive.
1381 
1382 Return:         none
1383  *---------------------------------------------------------------------*/
1384 /*---------------------------------------------------------------------*
1385     C version
1386  *---------------------------------------------------------------------*/
C_MTX44RotTrig(Mtx44 m,char axis,f32 sinA,f32 cosA)1387 void C_MTX44RotTrig ( Mtx44 m, char axis, f32 sinA, f32 cosA )
1388 {
1389     ASSERTMSG( (m != 0), MTX44_ROTTRIG_1 );
1390 
1391     axis |= 0x20;
1392     switch(axis)
1393     {
1394 
1395     case 'x':
1396         m[0][0] =  1.0f;  m[0][1] =  0.0f;    m[0][2] =  0.0f;  m[0][3] = 0.0f;
1397         m[1][0] =  0.0f;  m[1][1] =  cosA;    m[1][2] = -sinA;  m[1][3] = 0.0f;
1398         m[2][0] =  0.0f;  m[2][1] =  sinA;    m[2][2] =  cosA;  m[2][3] = 0.0f;
1399         m[3][0] =  0.0f;  m[3][1] =  0.0f;    m[3][2] =  0.0f;  m[3][3] = 1.0f;
1400         break;
1401 
1402     case 'y':
1403         m[0][0] =  cosA;  m[0][1] =  0.0f;    m[0][2] =  sinA;  m[0][3] = 0.0f;
1404         m[1][0] =  0.0f;  m[1][1] =  1.0f;    m[1][2] =  0.0f;  m[1][3] = 0.0f;
1405         m[2][0] = -sinA;  m[2][1] =  0.0f;    m[2][2] =  cosA;  m[2][3] = 0.0f;
1406         m[3][0] =  0.0f;  m[3][1] =  0.0f;    m[3][2] =  0.0f;  m[3][3] = 1.0f;
1407         break;
1408 
1409     case 'z':
1410         m[0][0] =  cosA;  m[0][1] = -sinA;    m[0][2] =  0.0f;  m[0][3] = 0.0f;
1411         m[1][0] =  sinA;  m[1][1] =  cosA;    m[1][2] =  0.0f;  m[1][3] = 0.0f;
1412         m[2][0] =  0.0f;  m[2][1] =  0.0f;    m[2][2] =  1.0f;  m[2][3] = 0.0f;
1413         m[3][0] =  0.0f;  m[3][1] =  0.0f;    m[3][2] =  0.0f;  m[3][3] = 1.0f;
1414         break;
1415 
1416     default:
1417         ASSERTMSG( 0, MTX44_ROTTRIG_2 );
1418         break;
1419     }
1420 }
1421 
1422 #if !defined(WIN32) && !defined(WIN64)
1423 /*---------------------------------------------------------------------*
1424     Paired-Single assembler version
1425  *---------------------------------------------------------------------*
1426                 Note that this performs NO error checking.
1427  *---------------------------------------------------------------------*/
PSMTX44RotTrig(Mtx44 m,char axis,f32 sinA,f32 cosA)1428 void PSMTX44RotTrig( Mtx44  m, char axis, f32 sinA, f32 cosA )
1429 {
1430     f32x2 ftmp0, ftmp1, ftmp4; //ftmp2, ftmp3,
1431     f32x2 sinA10 = {sinA, 0.0F};
1432     f32x2 cosA10 = {cosA, 0.0F};
1433 
1434     switch(axis)
1435     {
1436     case 'x':
1437     case 'X':
1438         //psq_st      c_one,   0(m), 1, 0;        // m00 <= 1.0
1439         __PSQ_ST(m, c11, 1, 0);
1440 
1441         //psq_st      c_zero,  4(m), 0, 0;        // m01,m02 <= 0.0,0.0
1442         __PSQ_STX(m, 4, c00, 0, 0);
1443 
1444         //ps_neg      ftmp0, sinA;                // ftmp0 <= -sinA
1445         ftmp0 = __PS_NEG(sinA10);
1446 
1447         //psq_st      c_zero, 12(m), 0, 0;        // m03,m10 <= 0.0,0.0
1448         __PSQ_STX(m, 12, c00, 0, 0);
1449 
1450         //ps_merge00  ftmp1, sinA, cosA;          // ftmp1 <= sinA,cosA
1451         ftmp1 = __PS_MERGE00(sinA10, cosA10);
1452 
1453         //psq_st      c_zero, 28(m), 0, 0;        // m13,m20 <= 0.0,0.0
1454         __PSQ_STX(m, 12, c00, 0, 0);
1455 
1456         //ps_merge00  ftmp0, cosA, ftmp0;         // ftmp0 <= cosA,-sinA
1457         ftmp0 = __PS_MERGE00(cosA10, ftmp0);
1458 
1459         //psq_st      c_zero, 44(m), 0, 0;        // m23,m30 <= 0.0,0.0
1460         __PSQ_STX(m, 44, c00, 0, 0);
1461 
1462         //psq_st      c_zero, 52(m), 0, 0;        // m23,m30 <= 0.0,0.0
1463         __PSQ_STX(m, 52, c00, 0, 0);
1464 
1465         //psq_st      ftmp1,  36(m), 0, 0;        // m21,m22 <= sinA,cosA
1466         __PSQ_STX(m, 36, ftmp1, 0, 0);
1467 
1468         //psq_st      ftmp0,  20(m), 0, 0;        // m11,m12 <= cosA,-sinA
1469         __PSQ_STX(m, 20, ftmp0, 0, 0);
1470 
1471         //psq_st      c_one,  60(m), 1, 0;        // m33 <= 0.0
1472         __PSQ_STX(m, 60, c11, 1, 0);
1473 
1474         break;
1475 
1476     case 'y':
1477     case 'Y':
1478 
1479         //psq_st      c_zero, 48(m), 0, 0;        // m30,m31 <= 0.0,0.0
1480         __PSQ_STX(m, 48, c00, 0, 0);
1481 
1482         //ps_neg      ftmp0, sinA;                // ftmp0 <= -sinA,0.0
1483         ftmp0 = __PS_NEG(sinA10);
1484 
1485         //psq_st      c_zero, 24(m), 0, 0;        // m12,m13 <= 0.0,0.0
1486         __PSQ_STX(m, 24, c00, 0, 0);
1487 
1488         //psq_st      cosA10,   0(m), 0, 0;        // m00,m01 <= cosA,0.0
1489         __PSQ_ST(m, cosA10, 0, 0);
1490 
1491         //psq_st      c01,  16(m), 0, 0;        // m10,m11 <= 0.0,1.0
1492         __PSQ_STX(m, 16, c01, 0, 0);
1493 
1494         //psq_st      sinA10,   8(m), 0, 0;        // m02,m03 <= sinA,0.0
1495         __PSQ_STX(m, 8, sinA10, 0, 0);
1496 
1497         //psq_st      ftmp0,  32(m), 0, 0;        // m20,m21 <= -sinA,0.0
1498         __PSQ_STX(m, 32, ftmp0, 0, 0);
1499 
1500         //psq_st      cosA10,  40(m), 0, 0;        // m22,m23 <= cosA,0.0
1501         __PSQ_STX(m, 40, cosA10, 0, 0);
1502 
1503         //psq_st      c01,  56(m), 0, 0;        // m32,m33 <= 0.0,1.0
1504         __PSQ_STX(m, 56, c01, 0, 0);
1505 
1506         break;
1507 
1508     case 'z':
1509     case 'Z':
1510         //psq_st      c_zero,  8(m), 0, 0;        // m02,m03 <= 0.0,0.0
1511         __PSQ_STX(m, 8, c00, 0, 0);
1512 
1513         //ps_neg      ftmp0, sinA;                // ftmp0 <= -sinA
1514         ftmp0 = __PS_NEG(sinA10);
1515 
1516         //psq_st      c_zero, 24(m), 0, 0;        // m12,m13 <= 0.0,0.0
1517         __PSQ_STX(m, 24, c00, 0, 0);
1518 
1519         //ps_merge00  ftmp1, sinA, cosA;          // ftmp1 <= sinA,cosA
1520         ftmp1 = __PS_MERGE00(sinA10, cosA10);
1521 
1522         //psq_st      c_zero, 32(m), 0, 0;        // m20,m21 <= 0.0,0.0
1523         __PSQ_STX(m, 32, c00, 0, 0);
1524 
1525         //psq_st      c_zero, 48(m), 0, 0;        // m30,m31 <= 0.0,0.0
1526         __PSQ_STX(m, 48, c00, 0, 0);
1527 
1528         //psq_st      ftmp1,  16(m), 0, 0;        // m10,m11 <= sinA,cosA
1529         __PSQ_STX(m, 16, ftmp1, 0, 0);
1530 
1531         //ps_merge00  ftmp4, cosA, ftmp0;         // ftmp4 <= cosA, -sinA
1532         ftmp4 = __PS_MERGE00(cosA10, ftmp0);
1533 
1534         //psq_st      ftmp2,  40(m), 0, 0;        // m22,m23 <= 1.0,0.0
1535         __PSQ_STX(m, 40, c10, 0, 0);
1536 
1537         //psq_st      ftmp3,  56(m), 0, 0;        // m32,m33 <= 0.0,1.0
1538         __PSQ_STX(m, 56, c01, 0, 0);
1539 
1540         //psq_st      ftmp4,   0(m), 0, 0;        // m00,m00 <= cosA,-sinA
1541         __PSQ_ST(m, ftmp4, 0, 0);
1542 
1543         break;
1544 
1545     default:
1546         ASSERTMSG( 0, MTX44_ROTTRIG_2 );
1547         break;
1548     }
1549 }
1550 #endif
1551 
1552 /*---------------------------------------------------------------------*
1553 Name:           C_MTX44RotAxisRad
1554  *---------------------------------------------------------------------*/
1555 /*---------------------------------------------------------------------*
1556     C version
1557  *---------------------------------------------------------------------*/
C_MTX44RotAxisRad(Mtx44 m,const Vec * axis,f32 rad)1558 void C_MTX44RotAxisRad( Mtx44 m, const Vec *axis, f32 rad )
1559 {
1560     Vec vN;
1561     f32 s, c;             // sinTheta, cosTheta
1562     f32 t;                // ( 1 - cosTheta )
1563     f32 x, y, z;          // x, y, z components of normalized axis
1564     f32 xSq, ySq, zSq;    // x, y, z squared
1565 
1566     ASSERTMSG( (m    != 0), MTX44_ROTAXIS_1  );
1567     ASSERTMSG( (axis != 0), MTX44_ROTAXIS_2  );
1568 
1569     s = sinf(rad);
1570     c = cosf(rad);
1571     t = 1.0f - c;
1572 
1573     C_VECNormalize( axis, &vN );
1574 
1575     x = vN.x;
1576     y = vN.y;
1577     z = vN.z;
1578 
1579     xSq = x * x;
1580     ySq = y * y;
1581     zSq = z * z;
1582 
1583     m[0][0] = ( t * xSq )   + ( c );
1584     m[0][1] = ( t * x * y ) - ( s * z );
1585     m[0][2] = ( t * x * z ) + ( s * y );
1586     m[0][3] =    0.0f;
1587 
1588     m[1][0] = ( t * x * y ) + ( s * z );
1589     m[1][1] = ( t * ySq )   + ( c );
1590     m[1][2] = ( t * y * z ) - ( s * x );
1591     m[1][3] =    0.0f;
1592 
1593     m[2][0] = ( t * x * z ) - ( s * y );
1594     m[2][1] = ( t * y * z ) + ( s * x );
1595     m[2][2] = ( t * zSq )   + ( c );
1596     m[2][3] =    0.0f;
1597 
1598     m[3][0] = 0.0f;
1599     m[3][1] = 0.0f;
1600     m[3][2] = 0.0f;
1601     m[3][3] = 1.0f;
1602 }
1603 
1604 #if !defined(WIN32) && !defined(WIN64)
1605 /*---------------------------------------------------------------------*
1606     Paired-Single assembler version
1607  *---------------------------------------------------------------------*
1608                 Note that this performs NO error checking.
1609  *---------------------------------------------------------------------*/
1610 
__PSMTX44RotAxisRadInternal(Mtx44 m,const Vec * axis,f32 sT,f32 cT)1611 static void __PSMTX44RotAxisRadInternal(
1612     Mtx44  m,
1613     const Vec   *axis,
1614     f32    sT,
1615     f32    cT )
1616 {
1617     f32x2    tT2;
1618     f32x2    sT2 = {sT, sT};
1619     f32x2    cT2 = {cT, cT};
1620     f32x2    tmp0, tmp1, tmp2, tmp3, tmp4;
1621     f32x2    tmp5, tmp6, tmp7, tmp8, tmp9;
1622 
1623     // tmp0 = [x][y] : LOAD
1624     //psq_l       tmp0, 0(axis), 0, 0
1625     //tmp0[0] = axis->x;
1626     //tmp0[1] = axis->y;
1627     tmp0 = __PSQ_LX(axis,  0, 0, 0);
1628 
1629     // tmp1 = [z][z] : LOAD
1630     //lfs         tmp1, 8(axis)
1631     tmp1[0] = axis->z;
1632     tmp1[1] = axis->z;
1633 
1634     // tmp2 = [x*x][y*y]
1635     //ps_mul      tmp2, tmp0, tmp0
1636     tmp2 = __PS_MUL(tmp0, tmp0);
1637 
1638     // tmp7 = [1.0F]
1639     //fadds       tmp7, tmp9, tmp9
1640 
1641     // tmp3 = [x*x+z*z][y*y+z*z]
1642     //ps_madd     tmp3, tmp1, tmp1, tmp2
1643     tmp3 = __PS_MADD(tmp1, tmp1, tmp2);
1644 
1645     // fc0 = [0.0F]
1646     //fsubs       fc0, tmp9, tmp9
1647 
1648     // tmp4 = [S = x*x+y*y+z*z][z]
1649     //ps_sum0     tmp4, tmp3, tmp1, tmp2
1650     tmp4 = __PS_SUM0(tmp3, tmp1, tmp2);
1651 
1652     // tT = 1.0F - cT
1653     //fsubs       tT, tmp7, cT
1654     tT2 = __PS_SUB(c11, cT2);
1655 
1656     // tmp5 = [1.0/sqrt(S)] :estimation[E]
1657     //frsqrte     tmp5, tmp4
1658     tmp5 = __PS_RSQRTE(tmp4);
1659 
1660     // tmp7 = [0][1]
1661     //ps_merge00  tmp7, fc0, tmp7
1662     tmp7 = __PS_MERGE00(c00, c11);
1663 
1664     // Newton-Rapson refinement step
1665     // E' = E/2(3.0 - E*E*S)
1666     //fmuls       tmp2, tmp5, tmp5            // E*E
1667     tmp2 = __PS_MUL(tmp5, tmp5);
1668 
1669     //fmuls       tmp3, tmp5, tmp9            // E/2
1670     tmp3 = __PS_MUL(tmp5, c0505);
1671 
1672     // fc0 [m30=0][m31=0] : STORE
1673     //psq_st      fc0, 48(m), 0, 0
1674     //m[3][0] = 0.0F;
1675     //m[3][1] = 0.0F;
1676     __PSQ_STX(m, 48, c00, 0, 0);
1677 
1678     //fnmsubs     tmp2, tmp2, tmp4, tmp8      // (3-E*E*S)
1679     tmp2 = __PS_NMSUB(tmp2, tmp4, c33);
1680 
1681     //fmuls       tmp5, tmp2, tmp3            // (E/2)(3-E*E*S)
1682     tmp5 = __PS_MUL(tmp2, tmp3);
1683 
1684     // tmp7 [m32=0][m33=1] : STORE
1685     //psq_st      tmp7, 56(m), 0, 0
1686     //m[3][2] = 0.0F;
1687     //m[3][3] = 1.0F;
1688     __PSQ_STX(m, 56, tmp7, 0, 0);
1689 
1690     // cT = [c][c]
1691     //ps_merge00  cT, cT, cT
1692 
1693     // tmp0 = [nx = x/sqrt(S)][ny = y/sqrt(S)]
1694     //ps_muls0    tmp0, tmp0, tmp5
1695     tmp0 = __PS_MULS0(tmp0, tmp5);
1696 
1697     // tmp1 = [nz = z/sqrt(S)][nz = z/sqrt(S)]
1698     //ps_muls0    tmp1, tmp1, tmp5
1699     tmp1 = __PS_MULS0(tmp1, tmp5);
1700 
1701     // tmp4 = [t*nx][t*ny]
1702     //ps_muls0    tmp4, tmp0, tT
1703     tmp4 = __PS_MULS0(tmp0, tT2);
1704 
1705     // tmp9 = [s*nx][s*ny]
1706     //ps_muls0    tmp9, tmp0, sT
1707     tmp9 = __PS_MULS0(tmp0, sT2);
1708 
1709     // tmp5 = [t*nz][t*nz]
1710     //ps_muls0    tmp5, tmp1, tT
1711     tmp5 = __PS_MULS0(tmp1, tT2);
1712 
1713     // tmp3 = [t*nx*ny][t*ny*ny]
1714     //ps_muls1    tmp3, tmp4, tmp0
1715     tmp3 = __PS_MULS1(tmp4, tmp0);
1716 
1717     // tmp2 = [t*nx*nx][t*ny*nx]
1718     //ps_muls0    tmp2, tmp4, tmp0
1719     tmp2 = __PS_MULS0(tmp4, tmp0);
1720 
1721     // tmp4 = [t*nx*nz][t*ny*nz]
1722     //ps_muls0    tmp4, tmp4, tmp1
1723     tmp4 = __PS_MULS0(tmp4, tmp1);
1724 
1725     // tmp6 = [t*nx*nx-s*nz][t*ny*ny-s*nz]
1726     //fnmsubs     tmp6, tmp1, sT, tmp2
1727     tmp6 = __PS_NMSUB(tmp1, sT2, tmp2);
1728 
1729     // tmp7 = [t*nx*ny+s*nz][t*ny*ny+s*nz]
1730     //fmadds      tmp7, tmp1, sT, tmp3
1731     tmp7 = __PS_MADD(tmp1, sT2, tmp3);
1732 
1733     // tmp0 = [-s*nx][-s*ny]
1734     //ps_neg      tmp0, tmp9
1735     tmp0 = __PS_NEG(tmp9);
1736 
1737     // tmp8 = [t*nx*nz+s*ny][0] == [m02][m03]
1738     //ps_sum0     tmp8, tmp4, fc0, tmp9
1739     tmp8 = __PS_SUM0(tmp4, c00, tmp9);
1740 
1741     // tmp2 = [t*nx*nx+c][t*nx*ny-s*nz] == [m00][m01]
1742     //ps_sum0     tmp2, tmp2, tmp6, cT
1743     tmp2 = __PS_SUM0(tmp2, tmp6, cT2);
1744 
1745     // tmp3 = [t*nx*ny+s*nz][t*ny*ny+c] == [m10][m11]
1746     //ps_sum1     tmp3, cT, tmp7, tmp3
1747     tmp3 = __PS_SUM1(cT2, tmp7, tmp3);
1748 
1749     // tmp6 = [t*ny*nz-s*nx][0] == [m12][m13]
1750     //ps_sum0     tmp6, tmp0, fc0 ,tmp4
1751     tmp6 = __PS_SUM0(tmp0, c00, tmp4);
1752 
1753     // tmp8 [m02][m03] : STORE
1754     //psq_st      tmp8, 8(m), 0, 0
1755     //m[0][2] = tmp8[0];
1756     //m[0][3] = tmp8[1];
1757     __PSQ_STX(m, 8, tmp8, 0, 0);
1758 
1759     // tmp0 = [t*nx*nz-s*ny][t*ny*nz]
1760     //ps_sum0     tmp0, tmp4, tmp4, tmp0
1761     tmp0 = __PS_SUM0(tmp4, tmp4, tmp0);
1762 
1763     // tmp2 [m00][m01] : STORE
1764     //psq_st      tmp2, 0(m), 0, 0
1765     //m[0][0] = tmp2[0];
1766     //m[0][1] = tmp2[1];
1767     __PSQ_STX(m, 0, tmp2, 0, 0);
1768 
1769     // tmp5 = [t*nz*nz][t*nz*nz]
1770     //ps_muls0    tmp5, tmp5, tmp1
1771     tmp5 = __PS_MULS0(tmp5, tmp1);
1772 
1773     // tmp3 [m10][m11] : STORE
1774     //psq_st      tmp3, 16(m), 0, 0
1775     //m[1][0] = tmp3[0];
1776     //m[1][1] = tmp3[1];
1777     __PSQ_STX(m, 16, tmp3, 0, 0);
1778 
1779     // tmp4 = [t*nx*nz-s*ny][t*ny*nz+s*nx] == [m20][m21]
1780     //ps_sum1     tmp4, tmp9, tmp0, tmp4
1781     tmp4 = __PS_SUM1(tmp9, tmp0, tmp4);
1782 
1783     // tmp6 [m12][m13] : STORE
1784     //psq_st      tmp6, 24(m), 0, 0
1785     //m[1][2] = tmp6[0];
1786     //m[1][3] = tmp6[1];
1787     __PSQ_STX(m, 24, tmp6, 0, 0);
1788 
1789     // tmp5 = [t*nz*nz+c][0]   == [m22][m23]
1790     //ps_sum0     tmp5, tmp5, fc0, cT
1791     tmp5 = __PS_SUM0(tmp5, c00, cT2);
1792 
1793     // tmp4 [m20][m21] : STORE
1794     //psq_st      tmp4, 32(m), 0, 0
1795     //m[2][0] = tmp4[0];
1796     //m[2][1] = tmp4[1];
1797     __PSQ_STX(m, 32, tmp4, 0, 0);
1798 
1799     // tmp5 [m22][m23] : STORE
1800     //psq_st      tmp5, 40(m), 0, 0
1801     //m[2][2] = tmp5[0];
1802     //m[2][3] = tmp5[1];
1803     __PSQ_STX(m, 40, tmp5, 0, 0);
1804 
1805 }
1806 
PSMTX44RotAxisRad(Mtx44 m,const Vec * axis,f32 rad)1807 void PSMTX44RotAxisRad( Mtx44 m, const Vec *axis, f32 rad )
1808 {
1809     f32     sinT, cosT;
1810 
1811     sinT = sinf(rad);
1812     cosT = cosf(rad);
1813 
1814     __PSMTX44RotAxisRadInternal(m, axis, sinT, cosT);
1815 }
1816 #endif
1817 
1818 
1819 /*---------------------------------------------------------------------------*
1820     MATRIX CONVERSION
1821  *---------------------------------------------------------------------------*/
C_MTX34To44(MTX_CONST Mtx src,Mtx44 dst)1822 void C_MTX34To44 ( MTX_CONST Mtx src, Mtx44 dst)
1823 {
1824     dst[0][0] = src[0][0];    dst[0][1] = src[0][1];    dst[0][2] = src[0][2];    dst[0][3] = src[0][3];
1825     dst[1][0] = src[1][0];    dst[1][1] = src[1][1];    dst[1][2] = src[1][2];    dst[1][3] = src[1][3];
1826     dst[2][0] = src[2][0];    dst[2][1] = src[2][1];    dst[2][2] = src[2][2];    dst[2][3] = src[2][3];
1827     dst[3][0] = 0.0f;         dst[3][1] = 0.0f;         dst[3][2] = 0.0f;         dst[3][3] = 1.0f;
1828 }
1829 
1830 
1831 #if !defined(WIN32) && !defined(WIN64)
1832 /*---------------------------------------------------------------------*
1833     Paired-Single assembler version
1834  *---------------------------------------------------------------------*
1835                 Note that this performs NO error checking.
1836  *---------------------------------------------------------------------*/
PSMTX34To44(MTX_CONST Mtx src,Mtx44 dst)1837 void PSMTX34To44( MTX_CONST Mtx src, Mtx44 dst )
1838 {
1839     f32x2 fp1;
1840 
1841     //psq_l       fp1,  0(src), 0, 0;
1842     fp1 = __PSQ_L(src, 0, 0);
1843 
1844     //psq_st      fp1,  0(dst), 0, 0;
1845     __PSQ_ST(dst, fp1, 0, 0);
1846 
1847     //psq_l       fp1,  8(src), 0, 0;
1848     fp1 = __PSQ_LX(src, 8, 0, 0);
1849 
1850     //psq_st      fp1,  8(dst), 0, 0;
1851     __PSQ_STX(dst, 8, fp1, 0, 0);
1852 
1853     //psq_l       fp1, 16(src), 0, 0;
1854     fp1 = __PSQ_LX(src, 16, 0, 0);
1855 
1856     //psq_st      fp1, 16(dst), 0, 0;
1857     __PSQ_STX(dst, 16, fp1, 0, 0);
1858 
1859     //psq_l       fp1, 24(src), 0, 0;
1860     fp1 = __PSQ_LX(src, 24, 0, 0);
1861 
1862     //psq_st      fp1, 24(dst), 0, 0;
1863     __PSQ_STX(dst, 24, fp1, 0, 0);
1864 
1865     //psq_l       fp1, 32(src), 0, 0;
1866     fp1 = __PSQ_LX(src, 32, 0, 0);
1867 
1868     //psq_st      fp1, 32(dst), 0, 0;
1869     __PSQ_STX(dst, 32, fp1, 0, 0);
1870 
1871     //psq_l       fp1, 40(src), 0, 0;
1872     fp1 = __PSQ_LX(src, 40, 0, 0);
1873 
1874     //psq_st      fp1, 40(dst), 0, 0;
1875     __PSQ_STX(dst, 40, fp1, 0, 0);
1876 
1877     //psq_st      c00, 48(dst), 0, 0;
1878     __PSQ_STX(dst, 48, c00, 0, 0);
1879 
1880     //psq_st      c01, 56(dst), 0, 0;
1881     __PSQ_STX(dst, 56, c01, 0, 0);
1882 }
1883 
1884 /*===========================================================================*/
1885 
1886 
1887 extern void _ASM_MTX44RotAxisRadInternal(Mtx m, const Vec *axis, f32 sT, f32 cT);
1888 
ASM_MTX44RotAxisRad(Mtx44 m,const Vec * axis,f32 rad)1889 void ASM_MTX44RotAxisRad(Mtx44 m, const Vec *axis, f32 rad) {
1890     f32     sinT, cosT;
1891 
1892     sinT = sinf(rad);
1893     cosT = cosf(rad);
1894 
1895     _ASM_MTX44RotAxisRadInternal(m, axis, sinT, cosT);
1896 }
1897 
ASM_MTX44RotRad(Mtx44 m,char axis,f32 rad)1898 void ASM_MTX44RotRad ( Mtx44 m, char axis, f32 rad )
1899 {
1900     f32 sinA, cosA;
1901 
1902     sinA = sinf(rad);
1903     cosA = cosf(rad);
1904 
1905     ASM_MTX44RotTrig( m, axis, sinA, cosA );
1906 }
1907 #endif
1908