1 /*---------------------------------------------------------------------------*
2   Project: matrix vector Library
3   File:    mtx.c
4 
5   Copyright (C) Nintendo.  All rights reserved.
6 
7   These coded instructions, statements, and computer programs contain
8   proprietary information of Nintendo of America Inc. and/or Nintendo
9   Company Ltd., and are protected by Federal copyright law.     They may
10   not be disclosed to third parties or copied or duplicated in any form,
11   in whole or in part, without the prior written consent of Nintendo.
12 
13  *---------------------------------------------------------------------------*/
14 
15 #include <math.h>
16 #include <stdio.h>
17 #include <cafe/mtx.h>
18 #include "mtxAssert.h"
19 
20 /*---------------------------------------------------------------------*
21     Constants
22  *---------------------------------------------------------------------*/
23 static const f32x2 c00 = {0.0F, 0.0F};
24 static const f32x2 c01 = {0.0F, 1.0F};
25 static const f32x2 c10 = {1.0F, 0.0F};
26 static const f32x2 c11 = {1.0F, 1.0F};
27 //static const f32x2 c22 = {2.0F, 2.0F};
28 static const f32x2 c33 = {3.0F, 3.0F};
29 static const f32x2 c0505 = {0.5F, 0.5F};
30 
31 /*---------------------------------------------------------------------*
32 
33 
34                             GENERAL SECTION
35 
36 
37 *---------------------------------------------------------------------*/
38 
39 
40 /*---------------------------------------------------------------------*
41 
42 Name:           MTXIdentity
43 
44 Description:    sets a matrix to identity
45 
46 Arguments:      m :  matrix to be set
47 
48 Return:         none
49 
50 *---------------------------------------------------------------------*/
51 /*---------------------------------------------------------------------*
52     C version
53  *---------------------------------------------------------------------*/
C_MTXIdentity(Mtx m)54 void C_MTXIdentity ( Mtx m )
55 {
56     ASSERTMSG( (m != 0), MTX_IDENTITY_1 );
57 
58     m[0][0] = 1.0f;     m[0][1] = 0.0f;  m[0][2] = 0.0f;  m[0][3] = 0.0f;
59     m[1][0] = 0.0f;     m[1][1] = 1.0f;  m[1][2] = 0.0f;  m[1][3] = 0.0f;
60     m[2][0] = 0.0f;     m[2][1] = 0.0f;  m[2][2] = 1.0f;  m[2][3] = 0.0f;
61 }
62 
63 #if !defined(WIN32) && !defined(WIN64)
64 /*---------------------------------------------------------------------*
65     Paired-Single intrinsics version
66  *---------------------------------------------------------------------*
67                 Note that this performs NO error checking.
68  *---------------------------------------------------------------------*/
PSMTXIdentity(Mtx m)69 void PSMTXIdentity( Mtx m )
70 {
71 
72     //psq_st      c00, 8(m),   0, 0     // m[0][2], m[0][3]
73     __PSQ_STX(m, 8, c00, 0, 0);
74 
75     //psq_st      c00, 24(m),  0, 0     // m[1][2], m[1][3]
76     __PSQ_STX(m, 24, c00, 0, 0);
77 
78     //psq_st      c00, 32(m),  0, 0     // m[2][0], m[2][1]
79     __PSQ_STX(m, 32, c00, 0, 0);
80 
81     //psq_st      c01,   16(m),  0, 0     // m[1][0], m[1][1]
82     __PSQ_STX(m, 16, c01, 0, 0);
83 
84     //psq_st      c10,   0(m),   0, 0     // m[0][0], m[0][1]
85     __PSQ_STX(m, 0, c10, 0, 0);
86 
87     //psq_st      c10,   40(m),  0, 0     // m[2][2], m[2][3]
88     __PSQ_STX(m, 40, c10, 0, 0);
89 }
90 #endif
91 
92 /*---------------------------------------------------------------------*
93 
94 Name:           MTXCopy
95 
96 Description:    copies the contents of one matrix into another
97 
98 Arguments:      src        source matrix for copy
99                 dst        destination matrix for copy
100 
101 Return:         none
102 
103 *---------------------------------------------------------------------*/
104 /*---------------------------------------------------------------------*
105     C version
106  *---------------------------------------------------------------------*/
C_MTXCopy(MTX_CONST Mtx src,Mtx dst)107 void C_MTXCopy ( MTX_CONST Mtx src, Mtx dst )
108 {
109     ASSERTMSG( (src != 0) , MTX_COPY_1 );
110     ASSERTMSG( (dst != 0) , MTX_COPY_2 );
111 
112     if( src == dst )
113     {
114         return;
115     }
116 
117     dst[0][0] = src[0][0];    dst[0][1] = src[0][1];    dst[0][2] = src[0][2];    dst[0][3] = src[0][3];
118     dst[1][0] = src[1][0];    dst[1][1] = src[1][1];    dst[1][2] = src[1][2];    dst[1][3] = src[1][3];
119     dst[2][0] = src[2][0];    dst[2][1] = src[2][1];    dst[2][2] = src[2][2];    dst[2][3] = src[2][3];
120 }
121 
122 #if !defined(WIN32) && !defined(WIN64)
123 /*---------------------------------------------------------------------*
124     Paired-Single intrinsics version
125  *---------------------------------------------------------------------*
126                 Note that this performs NO error checking.
127  *---------------------------------------------------------------------*/
PSMTXCopy(MTX_CONST Mtx src,Mtx dst)128 void PSMTXCopy(MTX_CONST Mtx src, Mtx dst )
129 {
130     f32x2 fp0, fp1, fp2, fp3, fp4, fp5;
131 
132     //psq_l       fp0, 0(src),   0, 0
133     fp0 = __PSQ_L(src, 0, 0);
134 
135     //psq_st      fp0, 0(dst),   0, 0
136     __PSQ_ST(dst, fp0, 0, 0);
137 
138     //psq_l       fp1, 8(src),   0, 0
139     fp1 = __PSQ_LX(src, 8, 0, 0);
140 
141     //psq_st      fp1, 8(dst),   0, 0
142     __PSQ_STX(dst, 8, fp1, 0, 0);
143 
144     //psq_l       fp2, 16(src),  0, 0
145     fp2 = __PSQ_LX(src, 16, 0, 0);
146 
147     //psq_st      fp2, 16(dst),  0, 0
148     __PSQ_STX(dst, 16, fp2, 0, 0);
149 
150     //psq_l       fp3, 24(src),  0, 0
151     fp3 = __PSQ_LX(src, 24, 0, 0);
152 
153     //psq_st      fp3, 24(dst),  0, 0
154     __PSQ_STX(dst, 24, fp3, 0, 0);
155 
156     //psq_l       fp4, 32(src),  0, 0
157     fp4 = __PSQ_LX(src, 32, 0, 0);
158 
159     //psq_st      fp4, 32(dst),  0, 0
160     __PSQ_STX(dst, 32, fp4, 0, 0);
161 
162     //psq_l       fp5, 40(src),  0, 0
163     fp5 = __PSQ_LX(src, 40, 0, 0);
164 
165     //psq_st      fp5, 40(dst),  0, 0
166     __PSQ_STX(dst, 40, fp5, 0, 0);
167 
168 }
169 #endif
170 
171 /*---------------------------------------------------------------------*
172 
173 Name:           MTXConcat
174 
175 Description:    concatenates two matrices.
176                 order of operation is A x B = AB.
177                 ok for any of ab == a == b.
178 
179                 saves a MTXCopy operation if ab != to a or b.
180 
181 Arguments:      a        first matrix for concat.
182                 b        second matrix for concat.
183                 ab       resultant matrix from concat.
184 
185 Return:         none
186 
187  *---------------------------------------------------------------------*/
188 /*---------------------------------------------------------------------*
189     C version
190  *---------------------------------------------------------------------*/
C_MTXConcat(MTX_CONST Mtx a,MTX_CONST Mtx b,Mtx ab)191 void C_MTXConcat ( MTX_CONST Mtx a, MTX_CONST Mtx b, Mtx ab )
192 {
193     Mtx mTmp;
194     MtxPtr m;
195 
196     ASSERTMSG( (a  != 0), MTX_CONCAT_1 );
197     ASSERTMSG( (b  != 0), MTX_CONCAT_2 );
198     ASSERTMSG( (ab != 0), MTX_CONCAT_3 );
199 
200     if( (ab == a) || (ab == b) )
201     {
202         m = mTmp;
203     }
204 
205     else
206     {
207         m = ab;
208     }
209 
210     // compute (a x b) -> m
211 
212     m[0][0] = a[0][0]*b[0][0] + a[0][1]*b[1][0] + a[0][2]*b[2][0];
213     m[0][1] = a[0][0]*b[0][1] + a[0][1]*b[1][1] + a[0][2]*b[2][1];
214     m[0][2] = a[0][0]*b[0][2] + a[0][1]*b[1][2] + a[0][2]*b[2][2];
215     m[0][3] = a[0][0]*b[0][3] + a[0][1]*b[1][3] + a[0][2]*b[2][3] + a[0][3];
216 
217     m[1][0] = a[1][0]*b[0][0] + a[1][1]*b[1][0] + a[1][2]*b[2][0];
218     m[1][1] = a[1][0]*b[0][1] + a[1][1]*b[1][1] + a[1][2]*b[2][1];
219     m[1][2] = a[1][0]*b[0][2] + a[1][1]*b[1][2] + a[1][2]*b[2][2];
220     m[1][3] = a[1][0]*b[0][3] + a[1][1]*b[1][3] + a[1][2]*b[2][3] + a[1][3];
221 
222     m[2][0] = a[2][0]*b[0][0] + a[2][1]*b[1][0] + a[2][2]*b[2][0];
223     m[2][1] = a[2][0]*b[0][1] + a[2][1]*b[1][1] + a[2][2]*b[2][1];
224     m[2][2] = a[2][0]*b[0][2] + a[2][1]*b[1][2] + a[2][2]*b[2][2];
225     m[2][3] = a[2][0]*b[0][3] + a[2][1]*b[1][3] + a[2][2]*b[2][3] + a[2][3];
226 
227     // overwrite a or b if needed
228     if(m == mTmp)
229     {
230         C_MTXCopy( *((MTX_CONST Mtx *)&mTmp), ab );
231     }
232 }
233 
234 #if !defined(WIN32) && !defined(WIN64)
235 /*---------------------------------------------------------------------*
236     Paired-Single intrinsics version
237  *---------------------------------------------------------------------*
238                 Note that this performs NO error checking.
239  *---------------------------------------------------------------------*/
PSMTXConcat(MTX_CONST Mtx a,MTX_CONST Mtx b,Mtx ab)240 void PSMTXConcat ( MTX_CONST Mtx a, MTX_CONST Mtx b, Mtx ab )
241 {
242     f32x2 A00_A01 = __PSQ_L(a, 0, 0);
243     f32x2 A02_A03;
244     f32x2 A10_A11;
245     f32x2 A12_A13;
246     f32x2 A20_A21;
247     f32x2 A22_A23;
248     f32x2 B00_B01 = __PSQ_L(b, 0, 0);
249     f32x2 B02_B03 = __PSQ_LX(b,  8, 0, 0);
250     f32x2 B10_B11 = __PSQ_LX(b, 16, 0, 0);
251     f32x2 B12_B13;
252     f32x2 B20_B21;
253     f32x2 B22_B23;
254 
255     f32x2 D00_D01;
256     f32x2 D02_D03;
257     f32x2 D10_D11;
258     f32x2 D12_D13;
259     f32x2 D20_D21;
260     f32x2 D22_D23;
261 
262     // D00_D01 = b00a00 , b01a00
263     D00_D01 = __PS_MULS0( B00_B01, A00_A01);
264     A10_A11 = __PSQ_LX(a, 16, 0, 0);
265 
266     // D02_D03 = b02a00 , b03a00
267     D02_D03 = __PS_MULS0( B02_B03, A00_A01);
268 
269     // D10_D11 = a10b00 , a10b01
270     D10_D11 = __PS_MULS0( B00_B01, A10_A11);
271     B12_B13 = __PSQ_LX(b, 24, 0, 0);
272 
273     // D12_D13 = a10b02 , a10b03
274     D12_D13 = __PS_MULS0( B02_B03, A10_A11);
275     A02_A03 = __PSQ_LX(a,  8, 0, 0);
276 
277     // D00_D01 = b10a01 + b00a00 , b11a01 + b01a00
278     D00_D01 = __PS_MADDS1( B10_B11, A00_A01, D00_D01);
279     A12_A13 = __PSQ_LX(a, 24, 0, 0);
280 
281     // D10_D11 = a10b00 + a11b10 , a10b01 + a11b11
282     D10_D11 =  __PS_MADDS1( B10_B11, A10_A11, D10_D11);
283     B20_B21 = __PSQ_LX(b, 32, 0, 0);
284 
285     // D02_D03 = b12a01 + b02a00 , b13a01 + b03a00
286     D02_D03 =  __PS_MADDS1( B12_B13, A00_A01, D02_D03);
287     B22_B23 = __PSQ_LX(b, 40, 0, 0);
288 
289     // D12_D13 = a10b02 + a11b12, a10b03+a11b13
290     D12_D13 =  __PS_MADDS1( B12_B13, A10_A11, D12_D13);
291 
292     A20_A21 = __PSQ_LX(a, 32, 0, 0);
293     A22_A23 = __PSQ_LX(a, 40, 0, 0);
294 
295     // D00_D01 = b20a02 + b10a01 + b00a00 , b21a02 + b11a01 + b01a00
296     D00_D01 =  __PS_MADDS0( B20_B21, A02_A03, D00_D01); // m00, m01 computed
297 
298     // D02_D03 = b12a01 + b02a00 + b22a02 , b13a01 + b03a00 + b23a02
299     D02_D03 =  __PS_MADDS0( B22_B23, A02_A03, D02_D03);
300 
301     // D10_D11 = a10b00 + a11b10 +a12b20, a10b01 + a11b11 + a12b21
302     D10_D11 =  __PS_MADDS0( B20_B21, A12_A13, D10_D11); // m10, m11 computed
303 
304     // D12_D13 = a10b02 + a11b12 + a12b22, a10b03+a11b13 + a12b23 + a13
305     D12_D13 =  __PS_MADDS0( B22_B23, A12_A13, D12_D13);
306 
307     // store m00m01
308     __PSQ_ST(ab, D00_D01, 0, 0);
309 
310     // D20_D21 = a20b00, a20b01
311     D20_D21 = __PS_MULS0( B00_B01, A20_A21);
312 
313     // get a03 from fp1 and add to D02_D03
314     D02_D03 =  __PS_MADDS1( c01, A02_A03, D02_D03); // m02, m03 computed
315 
316     // D22_D23 = a20b02, a20b03
317     D22_D23 = __PS_MULS0( B02_B03, A20_A21);
318 
319     // store m10m11
320     __PSQ_STX(ab, 16, D10_D11, 0, 0);
321 
322     // get a13 from fp3 and add to D12_D13
323     D12_D13 =  __PS_MADDS1( c01, A12_A13, D12_D13); // m12, m13 computed
324 
325     // store m02m03
326     __PSQ_STX(ab, 8, D02_D03, 0, 0);
327 
328     // D20_D21 = a20b00 + a21b10, a20b01 + a21b11
329     D20_D21 =  __PS_MADDS1( B10_B11, A20_A21, D20_D21);
330 
331     // D22_D23 = a20b02 + a21b12, a20b03 + a21b13
332     D22_D23 =  __PS_MADDS1( B12_B13, A20_A21, D22_D23);
333 
334     // D20_D21 = a20b00 + a21b10 + a22b20, a20b01 + a21b11 + a22b21
335     D20_D21 =  __PS_MADDS0( B20_B21, A22_A23, D20_D21);
336 
337     // store m12m13
338     __PSQ_STX(ab, 24, D12_D13, 0, 0);
339 
340     // D22_D23 = a20b02 + a21b12 + a22b22, a20b03 + a21b13 + a22b23 + a23
341     D22_D23 =  __PS_MADDS0( B22_B23, A22_A23, D22_D23);
342 
343     // store m20m21
344 
345     __PSQ_STX(ab, 32, D20_D21, 0, 0);
346 
347     // get a23 from fp5 and add to fp17
348     D22_D23 =  __PS_MADDS1( c01, A22_A23, D22_D23);
349 
350     // store m22m23
351     __PSQ_STX(ab, 40, D22_D23, 0, 0);
352 
353 }
354 #endif
355 
356 /*---------------------------------------------------------------------*
357 
358 Name:           MTXConcatArray
359 
360 Description:    concatenates a matrix to an array of matrices.
361                 order of operation is A x B(array) = AB(array).
362 
363 Arguments:      a        first matrix for concat.
364                 srcBase  array base of second matrix for concat.
365                 dstBase  array base of resultant matrix from concat.
366                 count    number of matrices in srcBase, dstBase arrays.
367 
368                 note:      cannot check for array overflow
369 
370 Return:         none
371 
372  *---------------------------------------------------------------------*/
373 /*---------------------------------------------------------------------*
374     C version
375  *---------------------------------------------------------------------*/
C_MTXConcatArray(MTX_CONST Mtx a,MTX_CONST Mtx * srcBase,Mtx * dstBase,u32 count)376 void C_MTXConcatArray ( MTX_CONST Mtx a, MTX_CONST Mtx* srcBase, Mtx* dstBase, u32 count )
377 {
378     u32 i;
379 
380     ASSERTMSG( (a       != 0), "MTXConcatArray(): NULL MtxPtr 'a' " );
381     ASSERTMSG( (srcBase != 0), "MTXConcatArray(): NULL MtxPtr 'srcBase' " );
382     ASSERTMSG( (dstBase != 0), "MTXConcatArray(): NULL MtxPtr 'dstBase' " );
383     ASSERTMSG( (count > 1),    "MTXConcatArray(): count must be greater than 1." );
384 
385     for ( i = 0 ; i < count ; i++ )
386     {
387         C_MTXConcat(a, *srcBase, *dstBase);
388 
389         srcBase++;
390         dstBase++;
391     }
392 }
393 
394 #if !defined(WIN32) && !defined(WIN64)
395 /*---------------------------------------------------------------------*
396     Paired-Single intrinsics version
397  *---------------------------------------------------------------------*
398                 Note that this performs NO error checking.
399  *---------------------------------------------------------------------*/
PSMTXConcatArray(MTX_CONST Mtx a,MTX_CONST Mtx * srcBase,Mtx * dstBase,u32 count)400 void PSMTXConcatArray (
401     MTX_CONST Mtx  a,
402     MTX_CONST Mtx* srcBase,
403     Mtx* dstBase,
404     u32  count )
405 {
406 
407     int i;
408 
409     for ( i = 0 ; i < count ; i++ )
410     {
411         PSMTXConcat(a, *srcBase, *dstBase);
412 
413         srcBase++;
414         dstBase++;
415     }
416 }
417 #endif
418 
419 /*---------------------------------------------------------------------*
420 
421 Name:           MTXTranspose
422 
423 Description:    computes the transpose of a matrix.
424                 As matrices are 3x4, fourth column (translation component) is
425                 lost and becomes (0,0,0).
426 
427                 This function is intended for use in computing an
428                 inverse-transpose matrix to transform normals for lighting.
429                 In this case, lost translation component doesn't matter.
430 
431 Arguments:      src       source matrix.
432                 xPose     destination (transposed) matrix.
433                           ok if src == xPose.
434 
435 Return:         none
436 
437 *---------------------------------------------------------------------*/
438 /*---------------------------------------------------------------------*
439     C version
440  *---------------------------------------------------------------------*/
C_MTXTranspose(MTX_CONST Mtx src,Mtx xPose)441 void C_MTXTranspose ( MTX_CONST Mtx src, Mtx xPose )
442 {
443     Mtx mTmp;
444     MtxPtr m;
445 
446     ASSERTMSG( (src   != 0), MTX_TRANSPOSE_1  );
447     ASSERTMSG( (xPose != 0), MTX_TRANSPOSE_2  );
448 
449     if(src == xPose)
450     {
451         m = mTmp;
452     }
453     else
454     {
455         m = xPose;
456     }
457 
458     m[0][0] = src[0][0];   m[0][1] = src[1][0];      m[0][2] = src[2][0];     m[0][3] = 0.0f;
459     m[1][0] = src[0][1];   m[1][1] = src[1][1];      m[1][2] = src[2][1];     m[1][3] = 0.0f;
460     m[2][0] = src[0][2];   m[2][1] = src[1][2];      m[2][2] = src[2][2];     m[2][3] = 0.0f;
461 
462     // copy back if needed
463     if( m == mTmp )
464     {
465         C_MTXCopy( *((MTX_CONST Mtx *)&mTmp), xPose );
466     }
467 }
468 
469 #if !defined(WIN32) && !defined(WIN64)
470 /*---------------------------------------------------------------------*
471     Paired-Single intrinsics version
472  *---------------------------------------------------------------------*
473                 Note that this performs NO error checking.
474  *---------------------------------------------------------------------*/
PSMTXTranspose(MTX_CONST Mtx src,Mtx xPose)475 void PSMTXTranspose ( MTX_CONST Mtx src, Mtx xPose )
476 {
477     f32x2 row0a, row1a, row0b, row1b;
478     f32x2 trns0, trns1, trns2;
479 
480     //psq_l       row0a, 0(src),  0, 0    // [0][0], [0][1]
481     row0a = __PSQ_L(src, 0, 0);
482 
483     //psq_l       row1a, 16(src), 0, 0    // [1][0], [1][1]
484     row1a = __PSQ_LX(src, 16, 0, 0);
485 
486     //ps_merge00  trns0, row0a, row1a     // [0][0], [1][0]
487     trns0 = __PS_MERGE00(row0a, row1a);
488 
489     //psq_l       row0b, 8(src),  1, 0    // [0][2], 1
490     row0b = __PSQ_LX(src, 8, 1, 0);
491 
492     //ps_merge11  trns1, row0a, row1a     // [0][1], [1][1]
493     trns1 = __PS_MERGE11(row0a, row1a);
494 
495     //psq_l       row1b, 24(src), 1, 0    // [1][2], 1
496     row1b = __PSQ_LX(src, 24, 1, 0);
497 
498     //psq_st      trns0, 0(xPose),  0, 0  // [0][0], [1][0] -> [0][0], [0][1]
499     __PSQ_ST(xPose, trns0, 0, 0);
500 
501     //psq_l       row0a, 32(src), 0, 0    // [2][0], [2][1]
502     row0a = __PSQ_LX(src, 32, 0, 0);
503 
504     //ps_merge00  trns2, row0b, row1b     // [0][2], [1][2]
505     trns2 = __PS_MERGE00(row0b, row1b);
506 
507     //psq_st      trns1, 16(xPose), 0, 0  // [0][1], [1][1] -> [1][0], [1][1]
508     __PSQ_STX(xPose, 16, trns1, 0, 0);
509 
510     //ps_merge00  trns0, row0a, c00       // [2][0], 0
511     trns0 = __PS_MERGE00(row0a, c00);
512 
513     //psq_st      trns2, 32(xPose), 0, 0  // [0][2], [1][2] -> [2][0], [2][1]
514     __PSQ_STX(xPose, 32, trns2, 0, 0);
515 
516     //ps_merge10  trns1, row0a, c00       // [2][1], 0
517     trns1 = __PS_MERGE10(row0a, c00);
518 
519     //psq_st      trns0, 8(xPose),  0, 0  // [2][0], 0 -> [0][2], [0][3]
520     __PSQ_STX(xPose, 8, trns0, 0, 0);
521 
522     //lfs         row0b, 40(src)          // [2][2]
523     row0b = __PSQ_LX(src, 40, 1, 0);
524 
525     //psq_st      trns1, 24(xPose), 0, 0  // [2][1], 0 -> [1][2], [1][3]
526     __PSQ_STX(xPose, 24, trns1, 0, 0);
527 
528     //stfs        row0b, 40(xPose)        // [2][2] -> [2][2]
529    __PSQ_STX(xPose, 40, row0b, 1, 0);
530 }
531 #endif
532 
533 /*---------------------------------------------------------------------*
534 
535 Name:           MTXInverse
536 
537 Description:    computes a fast inverse of a matrix.
538                 this algorithm works for matrices with a fourth row of
539                 (0,0,0,1).
540 
541                 for a matrix
542                 M =  |     A      C      |  where A is the upper 3x3 submatrix,
543                      |     0      1      |        C is a 1x3 column vector
544 
545                 INV(M)     =    |  inv(A)      (inv(A))*(-C)    |
546                                 |     0               1         |
547 
548 Arguments:      src       source matrix.
549                 inv       destination (inverse) matrix.
550                           ok if src == inv.
551 
552 Return:         0 if src is not invertible.
553                 1 on success.
554 
555 *---------------------------------------------------------------------*/
556 /*---------------------------------------------------------------------*
557     C version
558  *---------------------------------------------------------------------*/
C_MTXInverse(MTX_CONST Mtx src,Mtx inv)559 u32 C_MTXInverse ( MTX_CONST Mtx src, Mtx inv )
560 {
561     Mtx mTmp;
562     MtxPtr m;
563     f32 det;
564 
565     ASSERTMSG( (src != 0), MTX_INVERSE_1 );
566     ASSERTMSG( (inv != 0), MTX_INVERSE_2 );
567 
568     if( src == inv )
569     {
570         m = mTmp;
571     }
572     else
573     {
574         m = inv;
575     }
576 
577     // compute the determinant of the upper 3x3 submatrix
578     det =   src[0][0]*src[1][1]*src[2][2] + src[0][1]*src[1][2]*src[2][0] + src[0][2]*src[1][0]*src[2][1]
579           - src[2][0]*src[1][1]*src[0][2] - src[1][0]*src[0][1]*src[2][2] - src[0][0]*src[2][1]*src[1][2];
580 
581     // check if matrix is singular
582     if( det == 0.0f )
583     {
584         return 0;
585     }
586 
587     // compute the inverse of the upper submatrix:
588 
589     // find the transposed matrix of cofactors of the upper submatrix
590     // and multiply by (1/det)
591 
592     det = 1.0f / det;
593 
594     m[0][0] =  (src[1][1]*src[2][2] - src[2][1]*src[1][2]) * det;
595     m[0][1] = -(src[0][1]*src[2][2] - src[2][1]*src[0][2]) * det;
596     m[0][2] =  (src[0][1]*src[1][2] - src[1][1]*src[0][2]) * det;
597 
598     m[1][0] = -(src[1][0]*src[2][2] - src[2][0]*src[1][2]) * det;
599     m[1][1] =  (src[0][0]*src[2][2] - src[2][0]*src[0][2]) * det;
600     m[1][2] = -(src[0][0]*src[1][2] - src[1][0]*src[0][2]) * det;
601 
602     m[2][0] =  (src[1][0]*src[2][1] - src[2][0]*src[1][1]) * det;
603     m[2][1] = -(src[0][0]*src[2][1] - src[2][0]*src[0][1]) * det;
604     m[2][2] =  (src[0][0]*src[1][1] - src[1][0]*src[0][1]) * det;
605 
606     // compute (invA)*(-C)
607     m[0][3] = -m[0][0]*src[0][3] - m[0][1]*src[1][3] - m[0][2]*src[2][3];
608     m[1][3] = -m[1][0]*src[0][3] - m[1][1]*src[1][3] - m[1][2]*src[2][3];
609     m[2][3] = -m[2][0]*src[0][3] - m[2][1]*src[1][3] - m[2][2]*src[2][3];
610 
611     // copy back if needed
612     if( m == mTmp )
613     {
614         C_MTXCopy( *((MTX_CONST Mtx *)&mTmp),inv );
615     }
616 
617     return 1;
618 }
619 
620 #if !defined(WIN32) && !defined(WIN64)
621 /*---------------------------------------------------------------------*
622     Paired-Single intrinsics version
623  *---------------------------------------------------------------------*
624             Note that this performs NO error checking.
625             Results may be a little bit different from the C version
626             because it doesn't perform exactly same calculation.
627  *---------------------------------------------------------------------*/
PSMTXInverse(MTX_CONST Mtx src,Mtx inv)628 u32 PSMTXInverse ( MTX_CONST Mtx src, Mtx inv )
629 {
630 
631     f32x2 fp0;
632     f32x2 fp1;
633     f32x2 fp2;
634     f32x2 fp3;
635     f32x2 fp4;
636     f32x2 fp5;
637 
638     f32x2 fp6;
639     f32x2 fp7;
640     f32x2 fp8;
641     f32x2 fp9;
642     f32x2 fp10;
643     f32x2 fp11;
644     f32x2 fp12;
645     f32x2 fp13;
646 
647     // fp0 [ 00 ][ 1.0F ] : Load
648     fp0 = __PSQ_LX(src, 0, 1, 0);
649 
650     // fp1 [ 01 ][ 02 ]   : Load
651     fp1 = __PSQ_LX(src, 4, 0, 0);
652 
653     // fp2 [ 10 ][ 1.0F ] : Load
654     fp2 = __PSQ_LX(src, 16, 1, 0);
655 
656     // fp6 [ 02 ][ 00 ]
657     fp6 = __PS_MERGE10(fp1, fp0);
658 
659     // fp3 [ 11 ][ 12 ]   : Load
660     fp3 = __PSQ_LX(src, 20, 0, 0);
661 
662     // fp4 [ 20 ][ 1.0F ] : Load
663     fp4 = __PSQ_LX(src, 32, 1, 0);
664 
665     // fp7 [ 12 ][ 10 ]
666     fp7 = __PS_MERGE10(fp3, fp2);
667 
668     // fp5 [ 21 ][ 22 ]   : Load
669     fp5 = __PSQ_LX(src, 36, 0, 0);
670 
671     // fp11[ 11*02 ][ 00*12 ]
672     fp11 = __PS_MUL(fp3, fp6);
673 
674     // fp8 [ 22 ][ 20 ]
675     fp8 = __PS_MERGE10(fp5, fp4);
676 
677     // fp13[ 21*12 ][ 10*22 ]
678     fp13 = __PS_MUL(fp5, fp7);
679 
680     // fp11[ 01*12 - 11*02 ][ 10*02 - 00*12 ]
681     fp11 = __PS_MSUB(fp1, fp7, fp11);
682 
683     // fp12[ 01*22 ][ 20*02 ]
684     fp12 = __PS_MUL(fp1, fp8);
685 
686     // fp13[ 11*22 - 21*12 ][ 20*12 - 10*22 ]
687     fp13 = __PS_MSUB(fp3, fp8, fp13);
688 
689     // fp10[ 20*11 ][ N/A ]
690     fp10 = __PS_MUL(fp3, fp4);
691 
692     // fp12[ 21*02 - 01*22 ][ 00*22 - 20*02 ]
693     fp12 = __PS_MSUB(fp5, fp6, fp12);
694 
695     // fp7 [ 00*(11*22-21*12) ][ N/A ]
696     fp7  = __PS_MUL(fp0, fp13);
697 
698     // fp9 [ 00*21 ][ N/A ]
699     fp9  = __PS_MUL(fp0, fp5);
700 
701     // fp8 [ 10*01 ][ N/A ]
702     fp8  = __PS_MUL(fp1, fp2);
703 
704     // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) ][ N/A ]
705     fp7 = __PS_MADD(fp2, fp12, fp7);
706 
707     // fp6 [ 0.0F ][ 0.0F ]
708     fp6 = __PS_SUB(fp6, fp6);
709 
710     // fp10[ 10*21 - 20*11 ][ N/A ]
711     fp10 = __PS_MSUB(fp2, fp5, fp10);
712 
713     // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) + 20*(01*12-11*02) ][ N/A ] : det
714     fp7 = __PS_MADD(fp4, fp11, fp7);
715 
716     // fp9 [ 20*01 - 00*21 ][ N/A ]
717     fp9 = __PS_MSUB(fp1, fp4, fp9);
718 
719     // fp8 [ 00*11 - 10*01 ][ N/A ]
720     fp8 = __PS_MSUB(fp0, fp3, fp8);
721 
722     // check if matrix is singular
723     if( fp7[0] == 0.0f && fp7[1] == 0.0f)
724     {
725         return 0;
726     }
727 
728     // compute the inverse of the upper submatrix:
729 
730     // find the transposed matrix of cofactors of the upper submatrix
731     // and multiply by (1/det)
732 
733     // fp0 [ 1/det ][ N/A ]
734     fp0 = __PS_RES(fp7);
735 
736     // Newton's approximation
737     // Refinement : ( E = est. of 1/K ) -> ( E' = ( 2 - K * E ) * E )
738     fp6 = __PS_ADD(fp0, fp0);
739     fp5 = __PS_MUL(fp7, fp0);
740     fp0 = __PS_NMSUB(fp0, fp5, fp6);
741 
742     // fp1 [ 03 ][ 03 ] : Load
743     fp1[0] = src[0][3];
744     fp1[1] = src[0][3];
745 
746     // fp13[ ( 11*22 - 21*12 ) * rdet ][ ( 20*12 - 10*22 ) * rdet ] : i[0][0], i[1][0]
747     fp13 = __PS_MULS0(fp13, fp0);
748 
749     // fp2 [ 13 ][ 13 ] : Load
750     fp2[0] = src[1][3];
751     fp2[1] = src[1][3];
752 
753     // fp12[ ( 21*02 - 01*22 ) * rdet ][ ( 00*22 - 20*02 ) * rdet ] : i[0][1], i[1][1]
754     fp12 = __PS_MULS0(fp12, fp0);
755 
756     // fp3 [ 23 ][ 23 ] : Load
757     fp3[0] = src[2][3];
758     fp3[1] = src[2][3];
759 
760     // fp11[ ( 01*12 - 11*02 ) * rdet ][ ( 10*02 - 00*12 ) * rdet ] : i[0][2], i[1][2]
761     fp11 = __PS_MULS0(fp11, fp0);
762 
763     // fp5 [ i00 ][ i01 ]
764     fp5 = __PS_MERGE00(fp13, fp12);
765 
766     // fp4 [ i10 ][ i11 ]
767     fp4 = __PS_MERGE11(fp13, fp12);
768 
769     // fp6 [ i00*03 ][ i10*03 ]
770     fp6 = __PS_MUL(fp13, fp1);
771 
772     // [ i00 ][ i01 ] : Store fp5   -> free(fp5[ i00 ][ i01 ])
773     //inv[0][0] = fp5[0];
774     //inv[0][1] = fp5[1];
775     __PSQ_STX(inv, 0, fp5, 0, 0);
776 
777     // [ i10 ][ i11 ] : Store fp4   -> free(fp4[ i10 ][ i11 ])
778     //inv[1][0] = fp4[0];
779     //inv[1][1] = fp4[1];
780     __PSQ_STX(inv, 16, fp4, 0, 0);
781 
782     // fp10[ ( 10*21 - 20*11 ) * rdet ] : i[2][0]
783     fp10  = __PS_MULS0(fp10, fp0);
784 
785     // fp9 [ ( 20*01 - 00*21 ) * rdet ] : i[2][1]
786     fp9  = __PS_MULS0(fp9,  fp0);
787 
788     // fp6 [ i00*03+i01*13 ][ i10*03+i11*13 ]
789     fp6 = __PS_MADD(fp12, fp2, fp6);
790 
791     // [ i20 ] : Store fp10
792     //inv[2][0] = fp10[0];
793     __PSQ_STX(inv, 32, fp10, 1, 0);
794 
795     // fp8 [ ( 00*11 - 10*01 ) * rdet ] : i[2][2]
796     fp8 = __PS_MULS0(fp8,  fp0);
797 
798     // fp6 [ -i00*03-i01*13-i02*23 ][ -i10*03-i11*13-i12*23 ] : i[0][3], i[1][3]
799     fp6 = __PS_NMADD(fp11, fp3, fp6);
800 
801     // [ i21 ] : Store fp9
802     //inv[2][1] = fp9[0];
803     __PSQ_STX(inv, 36, fp9, 1, 0);
804 
805     // fp7 [ i20*03 ][ N/A ]
806     fp7 = __PS_MUL(fp10, fp1);
807 
808     // fp5 [ i02 ][ i03 ]
809     fp5 = __PS_MERGE00(fp11, fp6);
810 
811     // [ i22 ] : Store fp8
812     //inv[2][2] = fp8[0];
813     __PSQ_STX(inv, 40, fp8, 1, 0);
814 
815     // fp7 [ i20*03+i21*13 ][ N/A ]
816     fp7  = __PS_MADD(fp9,  fp2, fp7);
817 
818     // fp4 [ i12 ][ i13 ]
819     fp4  = __PS_MERGE11(fp11, fp6);
820 
821     // [ i02 ][ i03 ] : Store fp5
822     //inv[0][2] = fp5[0];
823     //inv[0][3] = fp5[1];
824     __PSQ_STX(inv, 8, fp5, 0, 0);
825 
826     // fp7 [ -i20*03-i21*13-i22*23 ][ N/A ] : i[2][3]
827     fp7 = __PS_NMADD(fp8,  fp3, fp7);
828 
829     // [ i12 ][ i13 ] : Store fp4
830     //inv[1][2] = fp4[0];
831     //inv[1][3] = fp4[1];
832     __PSQ_STX(inv, 24, fp4, 0, 0);
833 
834     // [ i23 ] : Store fp7
835     //inv[2][3] = fp7[0];
836     __PSQ_STX(inv, 44, fp7, 1, 0);
837 
838     return 1;
839 }
840 #endif
841 
842 /*---------------------------------------------------------------------*
843 
844 Name:           MTXInvXpose
845 
846 Description:    computes a fast inverse-transpose of a matrix.
847                 this algorithm works for matrices with a fourth row of
848                 (0,0,0,1). Commonly used for calculating normal transform
849                 matrices.
850 
851                 This function is equivalent to the combination of
852                 two functions MTXInverse + MTXTranspose.
853 
854 Arguments:      src       source matrix.
855                 invx      destination (inverse-transpose) matrix.
856                           ok if src == invx.
857 
858 Return:         0 if src is not invertible.
859                 1 on success.
860 
861 *---------------------------------------------------------------------*/
862 /*---------------------------------------------------------------------*
863     C version
864  *---------------------------------------------------------------------*/
C_MTXInvXpose(MTX_CONST Mtx src,Mtx invX)865 u32 C_MTXInvXpose ( MTX_CONST Mtx src, Mtx invX )
866 {
867     Mtx mTmp;
868     MtxPtr m;
869     f32 det;
870 
871     ASSERTMSG( (src != 0), MTX_INVXPOSE_1 );
872     ASSERTMSG( (invX != 0), MTX_INVXPOSE_2 );
873 
874     if( src == invX )
875     {
876         m = mTmp;
877     }
878     else
879     {
880         m = invX;
881     }
882 
883     // compute the determinant of the upper 3x3 submatrix
884     det =   src[0][0]*src[1][1]*src[2][2] + src[0][1]*src[1][2]*src[2][0] + src[0][2]*src[1][0]*src[2][1]
885           - src[2][0]*src[1][1]*src[0][2] - src[1][0]*src[0][1]*src[2][2] - src[0][0]*src[2][1]*src[1][2];
886 
887     // check if matrix is singular
888     if( det == 0.0f )
889     {
890         return 0;
891     }
892 
893     // compute the inverse-transpose of the upper submatrix:
894 
895     // find the transposed matrix of cofactors of the upper submatrix
896     // and multiply by (1/det)
897 
898     det = 1.0f / det;
899 
900     m[0][0] =  (src[1][1]*src[2][2] - src[2][1]*src[1][2]) * det;
901     m[0][1] = -(src[1][0]*src[2][2] - src[2][0]*src[1][2]) * det;
902     m[0][2] =  (src[1][0]*src[2][1] - src[2][0]*src[1][1]) * det;
903 
904     m[1][0] = -(src[0][1]*src[2][2] - src[2][1]*src[0][2]) * det;
905     m[1][1] =  (src[0][0]*src[2][2] - src[2][0]*src[0][2]) * det;
906     m[1][2] = -(src[0][0]*src[2][1] - src[2][0]*src[0][1]) * det;
907 
908     m[2][0] =  (src[0][1]*src[1][2] - src[1][1]*src[0][2]) * det;
909     m[2][1] = -(src[0][0]*src[1][2] - src[1][0]*src[0][2]) * det;
910     m[2][2] =  (src[0][0]*src[1][1] - src[1][0]*src[0][1]) * det;
911 
912     // the fourth columns should be all zero
913     m[0][3] = 0.0F;
914     m[1][3] = 0.0F;
915     m[2][3] = 0.0F;
916 
917     // copy back if needed
918     if( m == mTmp )
919     {
920         C_MTXCopy( *((MTX_CONST Mtx *)&mTmp),invX );
921     }
922 
923     return 1;
924 }
925 
926 #if !defined(WIN32) && !defined(WIN64)
927 /*---------------------------------------------------------------------*
928     Paired-Single intrinsics version
929  *---------------------------------------------------------------------*
930             Note that this performs NO error checking.
931             Results may be a little bit different from the C version
932             because it doesn't perform exactly same calculation.
933  *---------------------------------------------------------------------*/
PSMTXInvXpose(MTX_CONST Mtx src,Mtx invX)934 u32 PSMTXInvXpose ( MTX_CONST Mtx src, Mtx invX )
935 {
936     f32x2 fp0;
937     f32x2 fp1;
938     f32x2 fp2;
939     f32x2 fp3;
940     f32x2 fp4;
941     f32x2 fp5;
942 
943     f32x2 fp6;
944     f32x2 fp7;
945     f32x2 fp8;
946     f32x2 fp9;
947     f32x2 fp10;
948     f32x2 fp11;
949     f32x2 fp12;
950     f32x2 fp13;
951 
952     // fp0 [ 00 ][ 1.0F ] : Load
953     //fp0[0] = src[0][0];
954     //fp0[1] = 1.0F;
955     fp0 = __PSQ_LX(src, 0, 1, 0);
956 
957     // fp1 [ 01 ][ 02 ]   : Load
958     //fp1[0] = src[0][1];
959     //fp1[1] = src[0][2];
960     fp1 = __PSQ_LX(src, 4, 0, 0);
961 
962     // fp2 [ 10 ][ 1.0F ] : Load
963     //fp2[0] = src[1][0];
964     //fp2[1] = 1.0F;
965     fp2 = __PSQ_LX(src, 16, 1, 0);
966 
967     // fp6 [ 02 ][ 00 ]
968     fp6 = __PS_MERGE10(fp1, fp0);
969 
970     // fp3 [ 11 ][ 12 ]   : Load
971     //fp3[0] = src[1][1];
972     //fp3[1] = src[1][2];
973     fp3 = __PSQ_LX(src, 20, 0, 0);
974 
975     // fp4 [ 20 ][ 1.0F ] : Load
976     //fp4[0] = src[2][0];
977     //fp4[1] = 1.0F;
978     fp4 = __PSQ_LX(src, 32, 1, 0);
979 
980     // fp7 [ 12 ][ 10 ]
981     fp7 = __PS_MERGE10(fp3, fp2);
982 
983     // fp5 [ 21 ][ 22 ]   : Load
984     //fp5[0] = src[2][1];
985     //fp5[1] = src[2][2];
986     fp5 = __PSQ_LX(src, 36, 0, 0);
987 
988     // fp11[ 11*02 ][ 00*12 ]
989     fp11 = __PS_MUL(fp3, fp6);
990 
991     // fp8 [ 22 ][ 20 ]
992     fp8 = __PS_MERGE10(fp5, fp4);
993 
994     // fp13[ 21*12 ][ 10*22 ]
995     fp13 = __PS_MUL(fp5, fp7);
996 
997     // fp11[ 01*12 - 11*02 ][ 10*02 - 00*12 ]
998     fp11 = __PS_MSUB(fp1, fp7, fp11);
999 
1000     // fp12[ 01*22 ][ 20*02 ]
1001     fp12 = __PS_MUL(fp1, fp8);
1002 
1003     // fp13[ 11*22 - 21*12 ][ 20*12 - 10*22 ]
1004     fp13 = __PS_MSUB(fp3, fp8, fp13);
1005 
1006     // fp10[ 20*11 ][ N/A ]
1007     fp10 = __PS_MUL(fp3, fp4);
1008 
1009     // fp12[ 21*02 - 01*22 ][ 00*22 - 20*02 ]
1010     fp12 = __PS_MSUB(fp5, fp6, fp12);
1011 
1012     // fp7 [ 00*(11*22-21*12) ][ N/A ]
1013     fp7  = __PS_MUL(fp0, fp13);
1014 
1015     // fp9 [ 00*21 ][ N/A ]
1016     fp9  = __PS_MUL(fp0, fp5);
1017 
1018     // fp8 [ 10*01 ][ N/A ]
1019     fp8  = __PS_MUL(fp1, fp2);
1020 
1021     // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) ][ N/A ]
1022     fp7 = __PS_MADD(fp2, fp12, fp7);
1023 
1024     // fp6 [ 0.0F ][ 0.0F ]
1025     fp6 = __PS_SUB(fp6, fp6);
1026 
1027     // fp10[ 10*21 - 20*11 ][ N/A ]
1028     fp10 = __PS_MSUB(fp2, fp5, fp10);
1029 
1030     // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) + 20*(01*12-11*02) ][ N/A ] : det
1031     fp7 = __PS_MADD(fp4, fp11, fp7);
1032 
1033     // fp9 [ 20*01 - 00*21 ][ N/A ]
1034     fp9 = __PS_MSUB(fp1, fp4, fp9);
1035 
1036     // fp8 [ 00*11 - 10*01 ][ N/A ]
1037     fp8 = __PS_MSUB(fp0, fp3, fp8);
1038 
1039     // check if matrix is singular
1040     if( fp7[0] == 0.0f && fp7[1] == 0.0f)
1041     {
1042         return 0;
1043     }
1044 
1045     // compute the inverse-transpose of the upper submatrix:
1046 
1047     // find the transposed matrix of cofactors of the upper submatrix
1048     // and multiply by (1/det)
1049 
1050     // fp0 [ 1/det ][ N/A ]
1051     fp0 = __PS_RES(fp7);
1052 
1053     // [ ix03 ] : Store fp6
1054     invX[0][3] = fp6[0];
1055 
1056     // Newton's approximation
1057     // Refinement : ( E = est. of 1/K ) -> ( E' = ( 2 - K * E ) * E )
1058     fp4 = __PS_ADD(fp0, fp0);
1059     fp5 = __PS_MUL(fp7, fp0);
1060 
1061     // [ ix13 ] : Store fp6
1062     //invX[1][3] = fp6[0];
1063     __PSQ_STX(invX, 28, fp6, 1, 0);
1064 
1065     fp0 = __PS_NMSUB(fp0, fp5, fp4);
1066 
1067     // [ ix23 ] : Store fp6
1068     //invX[2][3] = fp6[0];
1069     __PSQ_STX(invX, 44, fp6, 1, 0);
1070 
1071     // fp13[ ( 11*22 - 21*12 ) * rdet ][ ( 20*12 - 10*22 ) * rdet ] : ix[0][0], ix[0][1]
1072     fp13 = __PS_MULS0(fp13, fp0);
1073 
1074     // fp12[ ( 21*02 - 01*22 ) * rdet ][ ( 00*22 - 20*02 ) * rdet ] : ix[1][0], ix[1][1]
1075     fp12 = __PS_MULS0(fp12, fp0);
1076 
1077     // [ ix00 ][ ix01 ] : Store fp13
1078     //invX[0][0] = fp13[0];
1079     //invX[0][1] = fp13[1];
1080     __PSQ_STX(invX, 0, fp13, 0, 0);
1081 
1082     // fp11[ ( 01*12 - 11*02 ) * rdet ][ ( 10*02 - 00*12 ) * rdet ] : ix[2][0], ix[2][1]
1083     fp11 = __PS_MULS0(fp11, fp0);
1084 
1085     // [ ix10 ][ ix11 ] : Store fp12
1086     //invX[1][0] = fp12[0];
1087     //invX[1][1] = fp12[1];
1088     __PSQ_STX(invX, 16, fp12, 0, 0);
1089 
1090     // fp10[ ( 10*21 - 20*11 ) * rdet ] : i[0][2]
1091     fp10 = __PS_MULS0(fp10, fp0);
1092 
1093     // [ ix20 ][ ix21 ] : Store fp11
1094     //invX[2][0] = fp11[0];
1095     //invX[2][1] = fp11[1];
1096     __PSQ_STX(invX, 32, fp11, 0, 0);
1097 
1098     // fp9 [ ( 20*01 - 00*21 ) * rdet ] : i[1][2]
1099     fp9 = __PS_MULS0(fp9, fp0);
1100 
1101     // [ ix02 ]         : Store fp10
1102     //invX[0][2] = fp10[0];
1103     __PSQ_STX(invX, 8, fp10, 1, 0);
1104 
1105     // fp8 [ ( 00*11 - 10*01 ) * rdet ] : i[2][2]
1106     fp8 = __PS_MULS0(fp8, fp0);
1107 
1108     // [ ix12 ]         : Store fp9
1109     //invX[1][2] = fp9[0];
1110     __PSQ_STX(invX, 24, fp9, 1, 0);
1111 
1112     // [ ix22 ]         : Store fp8
1113     //invX[2][2] = fp8[0];
1114     __PSQ_STX(invX, 40, fp8, 1, 0);
1115 
1116     return 1;
1117 }
1118 #endif
1119 
1120 /*---------------------------------------------------------------------*
1121 
1122 
1123                              MODEL SECTION
1124 
1125 
1126 *---------------------------------------------------------------------*/
1127 
1128 /*---------------------------------------------------------------------*
1129 
1130 Name:           MTXRotDeg
1131 
1132 Description:    sets a rotation matrix about one of the X, Y or Z axes
1133 
1134 Arguments:      m       matrix to be set
1135 
1136                 axis    major axis about which to rotate.
1137                         axis is passed in as a character.
1138                         it must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1139 
1140                 deg     rotation angle in degrees.
1141 
1142                         note:  counter-clockwise rotation is positive.
1143 
1144 Return:         none
1145 
1146 *---------------------------------------------------------------------*/
1147 
1148 /*---------------------------------------------------------------------*
1149 
1150 Name:           MTXRotRad
1151 
1152 Description:    sets a rotation matrix about one of the X, Y or Z axes
1153 
1154 Arguments:      m       matrix to be set
1155 
1156                 axis    major axis about which to rotate.
1157                         axis is passed in as a character.
1158                         it must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1159 
1160                 deg     rotation angle in radians.
1161 
1162                         note:  counter-clockwise rotation is positive.
1163 
1164 Return:         none
1165 
1166 *---------------------------------------------------------------------*/
1167 /*---------------------------------------------------------------------*
1168     C version
1169  *---------------------------------------------------------------------*/
C_MTXRotRad(Mtx m,char axis,f32 rad)1170 void C_MTXRotRad ( Mtx m, char axis, f32 rad )
1171 {
1172 
1173     f32 sinA, cosA;
1174 
1175     ASSERTMSG( (m != 0), MTX_ROTRAD_1 );
1176 
1177     // verification of "axis" will occur in MTXRotTrig
1178 
1179     sinA = sinf(rad);
1180     cosA = cosf(rad);
1181 
1182     C_MTXRotTrig( m, axis, sinA, cosA );
1183 }
1184 
1185 #if !defined(WIN32) && !defined(WIN64)
1186 /*---------------------------------------------------------------------*
1187     Paired-Single intrinsics version
1188  *---------------------------------------------------------------------*
1189                 Note that this performs NO error checking.
1190  *---------------------------------------------------------------------*/
PSMTXRotRad(Mtx m,char axis,f32 rad)1191 void PSMTXRotRad ( Mtx m, char axis, f32 rad )
1192 {
1193     f32 sinA, cosA;
1194 
1195     sinA = sinf(rad);
1196     cosA = cosf(rad);
1197 
1198     PSMTXRotTrig( m, axis, sinA, cosA );
1199 }
1200 #endif
1201 
1202 /*---------------------------------------------------------------------*
1203 
1204 Name:           MTXRotTrig
1205 
1206 Description:    sets a rotation matrix about one of the X, Y or Z axes
1207                 from specified trig ratios
1208 
1209 Arguments:      m       matrix to be set
1210 
1211                 axis    major axis about which to rotate.
1212                         axis is passed in as a character.
1213                         It must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1214 
1215                 sinA    sine of rotation angle.
1216 
1217                 cosA    cosine of rotation angle.
1218 
1219                         note:  counter-clockwise rotation is positive.
1220 
1221 Return:         none
1222 
1223 *---------------------------------------------------------------------*/
1224 /*---------------------------------------------------------------------*
1225     C version
1226  *---------------------------------------------------------------------*/
C_MTXRotTrig(Mtx m,char axis,f32 sinA,f32 cosA)1227 void C_MTXRotTrig ( Mtx m, char axis, f32 sinA, f32 cosA )
1228 {
1229     ASSERTMSG( (m != 0), MTX_ROTTRIG_1 );
1230 
1231     switch(axis)
1232     {
1233 
1234     case 'x':
1235     case 'X':
1236         m[0][0] =  1.0f;  m[0][1] =  0.0f;    m[0][2] =  0.0f;  m[0][3] = 0.0f;
1237         m[1][0] =  0.0f;  m[1][1] =  cosA;    m[1][2] = -sinA;  m[1][3] = 0.0f;
1238         m[2][0] =  0.0f;  m[2][1] =  sinA;    m[2][2] =  cosA;  m[2][3] = 0.0f;
1239         break;
1240 
1241     case 'y':
1242     case 'Y':
1243         m[0][0] =  cosA;  m[0][1] =  0.0f;    m[0][2] =  sinA;  m[0][3] = 0.0f;
1244         m[1][0] =  0.0f;  m[1][1] =  1.0f;    m[1][2] =  0.0f;  m[1][3] = 0.0f;
1245         m[2][0] = -sinA;  m[2][1] =  0.0f;    m[2][2] =  cosA;  m[2][3] = 0.0f;
1246         break;
1247 
1248     case 'z':
1249     case 'Z':
1250         m[0][0] =  cosA;  m[0][1] = -sinA;    m[0][2] =  0.0f;  m[0][3] = 0.0f;
1251         m[1][0] =  sinA;  m[1][1] =  cosA;    m[1][2] =  0.0f;  m[1][3] = 0.0f;
1252         m[2][0] =  0.0f;  m[2][1] =  0.0f;    m[2][2] =  1.0f;  m[2][3] = 0.0f;
1253         break;
1254 
1255     default:
1256         ASSERTMSG( 0, MTX_ROTTRIG_2 );
1257         break;
1258 
1259     }
1260 }
1261 
1262 #if !defined(WIN32) && !defined(WIN64)
1263 /*---------------------------------------------------------------------*
1264     Paired-Single intrinsics version
1265  *---------------------------------------------------------------------*
1266                 Note that this performs NO error checking.
1267  *---------------------------------------------------------------------*/
PSMTXRotTrig(Mtx m,char axis,f32 sinA,f32 cosA)1268 void PSMTXRotTrig ( Mtx m, char axis, f32 sinA, f32 cosA )
1269 {
1270     f32x2 nsinA;
1271     f32x2 fw0, fw1, fw2, fw3;
1272     f32x2 sinA10 = {sinA, 0.0f};
1273     f32x2 cosA10 = {cosA, 0.0f};
1274 
1275     //ps_neg      nsinA, sinA
1276     nsinA = __PS_NEG(sinA10);
1277 
1278     switch(axis)
1279     {
1280     case 'x':
1281     case 'X':
1282         //psq_st      fc1,  0(m), 1, 0
1283         __PSQ_ST(m, c11, 1, 0);
1284 
1285         //psq_st      fc0,  4(m), 0, 0
1286         __PSQ_STX(m, 4, c00, 0, 0);
1287 
1288         //ps_merge00  fw0, sinA, cosA
1289         fw0 = __PS_MERGE00(sinA10, cosA10);
1290 
1291         //psq_st      fc0, 12(m), 0, 0
1292         __PSQ_STX(m, 12, c00, 0, 0);
1293 
1294         //ps_merge00  fw1, cosA, nsinA
1295         fw1 = __PS_MERGE00(cosA10, nsinA);
1296 
1297         //psq_st      fc0, 28(m), 0, 0
1298         __PSQ_STX(m, 28, c00, 0, 0);
1299 
1300         //psq_st      fc0, 44(m), 1, 0
1301         __PSQ_STX(m, 44, c00, 1, 0);
1302 
1303         //psq_st      fw0, 36(m), 0, 0
1304         __PSQ_STX(m, 36, fw0, 0, 0);
1305 
1306         //psq_st      fw1, 20(m), 0, 0
1307         __PSQ_STX(m, 20, fw1, 0, 0);
1308 
1309         break;
1310 
1311     case 'y':
1312     case 'Y':
1313         //ps_merge00  fw0, cosA, fc0
1314         fw0 = __PS_MERGE00(cosA10, c00);
1315 
1316         //ps_merge00  fw1, fc0, fc1
1317         fw1 = __PS_MERGE00(c00, c11);
1318 
1319         //psq_st      fc0, 24(m), 0, 0
1320         __PSQ_STX(m, 24, c00, 0, 0);
1321 
1322         //psq_st      fw0,  0(m), 0, 0
1323         __PSQ_ST(m, fw0, 0, 0);
1324 
1325         //ps_merge00  fw2, nsinA, fc0
1326         fw2 = __PS_MERGE00(nsinA, c00);
1327 
1328         //ps_merge00  fw3, sinA, fc0
1329         fw3 = __PS_MERGE00(sinA10, c00);
1330 
1331         //psq_st      fw0, 40(m), 0, 0;
1332         __PSQ_STX(m, 40, fw0, 0, 0);
1333 
1334         //psq_st      fw1, 16(m), 0, 0;
1335         __PSQ_STX(m, 16, fw1, 0, 0);
1336 
1337         //psq_st      fw3,  8(m), 0, 0;
1338         __PSQ_STX(m, 8, fw3, 0, 0);
1339 
1340         //psq_st      fw2, 32(m), 0, 0;
1341         __PSQ_STX(m, 32, fw2, 0, 0);
1342 
1343         break;
1344 
1345     case 'z':
1346     case 'Z':
1347 
1348         //psq_st      fc0,  8(m), 0, 0
1349         __PSQ_STX(m, 8, c00, 0, 0);
1350 
1351         //ps_merge00  fw0, sinA, cosA
1352         fw0 = __PS_MERGE00(sinA10, cosA10);
1353 
1354         //ps_merge00  fw2, cosA, nsinA
1355         fw2 = __PS_MERGE00(cosA10, nsinA);
1356 
1357         //psq_st      fc0, 24(m), 0, 0
1358         __PSQ_STX(m, 24, c00, 0, 0);
1359 
1360         //psq_st      fc0, 32(m), 0, 0
1361         __PSQ_STX(m, 32, c00, 0, 0);
1362 
1363         //ps_merge00  fw1, fc1, fc0
1364         fw1 = __PS_MERGE00(c11, c00);
1365 
1366         //psq_st      fw0, 16(m), 0, 0
1367         __PSQ_STX(m, 16, fw0, 0, 0);
1368 
1369         //psq_st      fw2,  0(m), 0, 0
1370         __PSQ_ST(m, fw2, 0, 0);
1371 
1372         //psq_st      fw1, 40(m), 0, 0
1373         __PSQ_STX(m, 40, fw1, 0, 0);
1374 
1375         break;
1376 
1377     default:
1378         ASSERTMSG( 0, MTX_ROTTRIG_2 );
1379         break;
1380     }
1381 }
1382 #endif
1383 
1384 /*---------------------------------------------------------------------*
1385 
1386 Name:           MTXRotAxisRad
1387 
1388 Description:    sets a rotation matrix about an arbitrary axis
1389 
1390 
1391 Arguments:      m       matrix to be set
1392 
1393                 axis    ptr to a vector containing the x,y,z axis
1394                         components.
1395                         axis does not have to be a unit vector.
1396 
1397                 deg     rotation angle in radians.
1398 
1399                         note:  counter-clockwise rotation is positive.
1400 
1401 Return:         none
1402 
1403 *---------------------------------------------------------------------*/
1404 /*---------------------------------------------------------------------*
1405     C version
1406  *---------------------------------------------------------------------*/
C_MTXRotAxisRad(Mtx m,const Vec * axis,f32 rad)1407 void C_MTXRotAxisRad( Mtx m, const Vec *axis, f32 rad )
1408 {
1409     Vec vN;
1410     f32 s, c;             // sinTheta, cosTheta
1411     f32 t;                // ( 1 - cosTheta )
1412     f32 x, y, z;          // x, y, z components of normalized axis
1413     f32 xSq, ySq, zSq;    // x, y, z squared
1414 
1415     ASSERTMSG( (m    != 0), MTX_ROTAXIS_1  );
1416     ASSERTMSG( (axis != 0), MTX_ROTAXIS_2  );
1417 
1418     s = sinf(rad);
1419     c = cosf(rad);
1420     t = 1.0f - c;
1421 
1422     C_VECNormalize( axis, &vN );
1423 
1424     x = vN.x;
1425     y = vN.y;
1426     z = vN.z;
1427 
1428     xSq = x * x;
1429     ySq = y * y;
1430     zSq = z * z;
1431 
1432     m[0][0] = ( t * xSq )   + ( c );
1433     m[0][1] = ( t * x * y ) - ( s * z );
1434     m[0][2] = ( t * x * z ) + ( s * y );
1435     m[0][3] =    0.0f;
1436 
1437     m[1][0] = ( t * x * y ) + ( s * z );
1438     m[1][1] = ( t * ySq )   + ( c );
1439     m[1][2] = ( t * y * z ) - ( s * x );
1440     m[1][3] =    0.0f;
1441 
1442     m[2][0] = ( t * x * z ) - ( s * y );
1443     m[2][1] = ( t * y * z ) + ( s * x );
1444     m[2][2] = ( t * zSq )   + ( c );
1445     m[2][3] =    0.0f;
1446 }
1447 
1448 #if !defined(WIN32) && !defined(WIN64)
1449 /*---------------------------------------------------------------------*
1450     Paired-Single intrinsics version
1451  *---------------------------------------------------------------------*
1452                 Note that this performs NO error checking.
1453  *---------------------------------------------------------------------*/
_PSMTXRotAxisRadInternal(Mtx m,const Vec * axis,f32 sT,f32 cT)1454 static void _PSMTXRotAxisRadInternal(
1455     Mtx    m,
1456     const Vec *axis,
1457     f32    sT,
1458     f32    cT )
1459 {
1460     f32x2    tT, sT2, cT2;
1461     f32x2    tmp0, tmp1, tmp2, tmp3, tmp4;
1462     f32x2    tmp5, tmp6, tmp7, tmp9, tmp8;
1463 
1464     // tmp0 = [x][y] : LOAD
1465     //psq_l       tmp0, 0(axis), 0, 0
1466     //tmp0[0] = axis->x;
1467     //tmp0[1] = axis->y;
1468     tmp0 = __PSQ_L(axis, 0, 0);
1469 
1470     // tmp1 = [z][z] : LOAD
1471     tmp1[0] = axis->z;
1472     tmp1[1] = axis->z;
1473 
1474     // tmp2 = [x*x][y*y]
1475     tmp2 = __PS_MUL(tmp0, tmp0);
1476 
1477     // tmp3 = [x*x+z*z][y*y+z*z]
1478     tmp3 = __PS_MADD(tmp1, tmp1, tmp2);
1479 
1480     // tmp4 = [S = x*x+y*y+z*z][z]
1481     tmp4 = __PS_SUM0(tmp3, tmp1, tmp2);
1482 
1483     // tT = 1.0F - cT
1484     tT[0] = tT[1] = 1.0f - cT;
1485 
1486     // tmp5 = [1.0/sqrt(S)] :estimation[E]
1487     tmp5[0] = tmp5[1] = __FRSQRTE(tmp4[0]);
1488 
1489     // Newton-Rapson refinement step
1490     // E' = E/2(3.0 - E*E*S)
1491     tmp2 = __PS_MUL(tmp5, tmp5);            // E*E
1492     tmp3 = __PS_MUL(tmp5, c0505);            // E/2
1493     tmp2 = __PS_NMSUB(tmp2, tmp4, c33);    // (3-E*E*S)
1494     tmp5 = __PS_MUL(tmp2, tmp3);            // (E/2)(3-E*E*S)
1495 
1496     // cT = [c][c]
1497     cT2[0] = cT2[1] = cT;
1498 
1499     // sT = [c][c]
1500     sT2[0] = sT2[1] = sT;
1501 
1502     // tmp0 = [nx = x/sqrt(S)][ny = y/sqrt(S)]
1503     tmp0 = __PS_MULS0(tmp0, tmp5);
1504 
1505     // tmp1 = [nz = z/sqrt(S)][nz = z/sqrt(S)]
1506     tmp1 = __PS_MULS0(tmp1, tmp5);
1507 
1508     // tmp4 = [t*nx][t*ny]
1509     tmp4 = __PS_MULS0(tmp0, tT);
1510 
1511     // tmp9 = [s*nx][s*ny]
1512     tmp9 = __PS_MULS0(tmp0, sT2);
1513 
1514     // tmp5 = [t*nz][t*nz]
1515     tmp5  = __PS_MULS0(tmp1, tT);
1516 
1517     // tmp3 = [t*nx*ny][t*ny*ny]
1518     tmp3  = __PS_MULS1(tmp4, tmp0);
1519 
1520     // tmp2 = [t*nx*nx][t*ny*nx]
1521     tmp2 = __PS_MULS0(tmp4, tmp0);
1522 
1523     // tmp4 = [t*nx*nz][t*ny*nz]
1524     tmp4 = __PS_MULS0(tmp4, tmp1);
1525 
1526     // tmp6 = [t*nx*nx-s*nz][t*ny*ny-s*nz]
1527     tmp6 = __PS_NMSUB(tmp1, sT2, tmp2);
1528 
1529     // tmp7 = [t*nx*ny+s*nz][t*ny*ny+s*nz]
1530     tmp7 = __PS_MADD(tmp1, sT2, tmp3);
1531 
1532     // tmp0 = [-s*nx][-s*ny]
1533     tmp0 = __PS_NEG(tmp9);
1534 
1535     // tmp8 = [t*nx*nz+s*ny][0] == [m02][m03]
1536     tmp8 = __PS_SUM0(tmp4, c00, tmp9);
1537 
1538     // tmp2 = [t*nx*nx+c][t*nx*ny-s*nz] == [m00][m01]
1539     tmp2  = __PS_SUM0(tmp2, tmp6, cT2);
1540 
1541     // tmp3 = [t*nx*ny+s*nz][t*ny*ny+c] == [m10][m11]
1542     tmp3 = __PS_SUM1(cT2, tmp7, tmp3);
1543 
1544     // tmp6 = [t*ny*nz-s*nx][0] == [m12][m13]
1545     tmp6 = __PS_SUM0(tmp0, c00 ,tmp4);
1546 
1547     // tmp8 [m02][m03] : STORE
1548     //psq_st      tmp8, 8(m), 0, 0
1549     //m[0][2] = tmp8[0];
1550     //m[0][3] = tmp8[1];
1551     __PSQ_STX(m, 8, tmp8, 0, 0);
1552 
1553     // tmp0 = [t*nx*nz-s*ny][t*ny*nz]
1554     tmp0 = __PS_SUM0(tmp4, tmp4, tmp0);
1555 
1556     // tmp2 [m00][m01] : STORE
1557     //psq_st      tmp2, 0(m), 0, 0
1558     //m[0][0] = tmp2[0];
1559     //m[0][1] = tmp2[1];
1560     __PSQ_STX(m, 0, tmp2, 0, 0);
1561 
1562     // tmp5 = [t*nz*nz][t*nz*nz]
1563     tmp5 = __PS_MULS0(tmp5, tmp1);
1564 
1565     // tmp3 [m10][m11] : STORE
1566     //psq_st      tmp3, 16(m), 0, 0
1567     //m[1][0] = tmp3[0];
1568     //m[1][1] = tmp3[1];
1569     __PSQ_STX(m, 16, tmp3, 0, 0);
1570 
1571     // tmp4 = [t*nx*nz-s*ny][t*ny*nz+s*nx] == [m20][m21]
1572     tmp4 = __PS_SUM1(tmp9, tmp0, tmp4);
1573 
1574     // tmp6 [m12][m13] : STORE
1575     //psq_st      tmp6, 24(m), 0, 0
1576     //m[1][2] = tmp6[0];
1577     //m[1][3] = tmp6[1];
1578     __PSQ_STX(m, 24, tmp6, 0, 0);
1579 
1580     // tmp5 = [t*nz*nz+c][0]   == [m22][m23]
1581     tmp5  = __PS_SUM0(tmp5, c00, cT2);
1582 
1583     // tmp4 [m20][m21] : STORE
1584     //psq_st      tmp4, 32(m), 0, 0
1585     //m[2][0] = tmp4[0];
1586     //m[2][1] = tmp4[1];
1587     __PSQ_STX(m, 32, tmp4, 0, 0);
1588 
1589     // tmp5 [m22][m23] : STORE
1590     //psq_st      tmp5, 40(m), 0, 0
1591     //m[2][2] = tmp5[0];
1592     //m[2][3] = tmp5[1];
1593     __PSQ_STX(m, 40, tmp5, 0, 0);
1594 }
1595 
1596 /*---------------------------------------------------------------------*
1597     Paired-Single intrinsics version
1598  *---------------------------------------------------------------------*
1599                 Note that this performs NO error checking.
1600  *---------------------------------------------------------------------*/
PSMTXRotAxisRad(Mtx m,const Vec * axis,f32 rad)1601 void PSMTXRotAxisRad(
1602     Mtx             m,
1603     const Vec      *axis,
1604     f32             rad )
1605 {
1606     f32     sinT, cosT;
1607 
1608     sinT = sinf(rad);
1609     cosT = cosf(rad);
1610 
1611     _PSMTXRotAxisRadInternal(m, axis, sinT, cosT);
1612 }
1613 #endif
1614 
1615 /*---------------------------------------------------------------------*
1616 
1617 Name:           MTXTrans
1618 
1619 Description:    sets a translation matrix.
1620 
1621 Arguments:       m        matrix to be set
1622 
1623                 xT        x component of translation.
1624 
1625                 yT        y component of translation.
1626 
1627                 zT        z component of translation.
1628 
1629 Return:         none
1630 
1631 *---------------------------------------------------------------------*/
1632 /*---------------------------------------------------------------------*
1633     C version
1634  *---------------------------------------------------------------------*/
C_MTXTrans(Mtx m,f32 xT,f32 yT,f32 zT)1635 void C_MTXTrans ( Mtx m, f32 xT, f32 yT, f32 zT )
1636 {
1637     ASSERTMSG( (m != 0), MTX_TRANS_1 );
1638 
1639     m[0][0] = 1.0f;  m[0][1] = 0.0f;  m[0][2] = 0.0f;  m[0][3] =  xT;
1640     m[1][0] = 0.0f;  m[1][1] = 1.0f;  m[1][2] = 0.0f;  m[1][3] =  yT;
1641     m[2][0] = 0.0f;  m[2][1] = 0.0f;  m[2][2] = 1.0f;  m[2][3] =  zT;
1642 }
1643 
1644 #if !defined(WIN32) && !defined(WIN64)
1645 /*---------------------------------------------------------------------*
1646     Paired-Single intrinsics version
1647  *---------------------------------------------------------------------*
1648                 Note that this performs NO error checking.
1649  *---------------------------------------------------------------------*/
PSMTXTrans(Mtx m,f32 xT,f32 yT,f32 zT)1650 void PSMTXTrans( Mtx m, f32 xT, f32 yT, f32 zT )
1651 {
1652     f32x2 xT2 = {0.0F, xT};
1653     f32x2 yT2 = {0.0F, yT};
1654     f32x2 zT2 = {1.0F, zT};
1655     __PSQ_ST(m, c10, 0, 0);
1656     __PSQ_STX(m,  8, xT2, 0, 0);
1657     __PSQ_STX(m, 16, c01, 0, 0);
1658     __PSQ_STX(m, 24, yT2, 0, 0);
1659     __PSQ_STX(m, 32, c00, 0, 0);
1660     __PSQ_STX(m, 40, zT2, 0, 0);
1661 }
1662 #endif
1663 
1664 /*---------------------------------------------------------------------*
1665 
1666 Name:           MTXTransApply
1667 
1668 Description:    This function performs the operation equivalent to
1669                 MTXTrans + MTXConcat.
1670 
1671 Arguments:      src       matrix to be operated.
1672 
1673                 dst       resultant matrix from concat.
1674 
1675                 xT        x component of translation.
1676 
1677                 yT        y component of translation.
1678 
1679                 zT        z component of translation.
1680 
1681 Return:         none
1682 
1683 *---------------------------------------------------------------------*/
1684 /*---------------------------------------------------------------------*
1685     C version
1686  *---------------------------------------------------------------------*/
C_MTXTransApply(MTX_CONST Mtx src,Mtx dst,f32 xT,f32 yT,f32 zT)1687 void C_MTXTransApply ( MTX_CONST Mtx src, Mtx dst, f32 xT, f32 yT, f32 zT )
1688 {
1689     ASSERTMSG( (src != 0), MTX_TRANSAPPLY_1 );
1690     ASSERTMSG( (dst != 0), MTX_TRANSAPPLY_1 );
1691 
1692     if ( src != dst )
1693     {
1694         dst[0][0] = src[0][0];    dst[0][1] = src[0][1];    dst[0][2] = src[0][2];
1695         dst[1][0] = src[1][0];    dst[1][1] = src[1][1];    dst[1][2] = src[1][2];
1696         dst[2][0] = src[2][0];    dst[2][1] = src[2][1];    dst[2][2] = src[2][2];
1697     }
1698 
1699     dst[0][3] = src[0][3] + xT;
1700     dst[1][3] = src[1][3] + yT;
1701     dst[2][3] = src[2][3] + zT;
1702 }
1703 
1704 #if !defined(WIN32) && !defined(WIN64)
1705 /*---------------------------------------------------------------------*
1706     Paired-Single intrinsics version
1707  *---------------------------------------------------------------------*
1708                 Note that this performs NO error checking.
1709  *---------------------------------------------------------------------*/
PSMTXTransApply(Mtx src,Mtx dst,f32 xT,f32 yT,f32 zT)1710 void PSMTXTransApply( Mtx src, Mtx dst, f32 xT, f32 yT, f32 zT )
1711 {
1712     f32x2 fp4, fp5, fp6, fp7, fp8, fp9;
1713     f32x2 xT10 = {xT, 0.0f};
1714     f32x2 yT10 = {yT, 0.0f};
1715     f32x2 zT10 = {zT, 0.0f};
1716 
1717     //psq_l       fp4, 0(src),        0, 0;
1718     fp4 = __PSQ_L(src, 0, 0);
1719 
1720     //frsp        xT, xT;                     // to make sure xT = single precision
1721     //psq_l       fp5, 8(src),        0, 0;
1722     fp5 = __PSQ_LX(src, 8, 0, 0);
1723 
1724     //frsp        yT, yT;                     // to make sure yT = single precision
1725     //psq_l       fp7, 24(src),       0, 0;
1726     fp7 = __PSQ_LX(src, 24, 0, 0);
1727 
1728     //frsp        zT, zT;                     // to make sure zT = single precision
1729     //psq_l       fp8, 40(src),       0, 0;
1730     fp8 = __PSQ_LX(src, 40, 0, 0);
1731 
1732     //psq_st      fp4, 0(dst),        0, 0;
1733     __PSQ_ST(dst, fp4, 0, 0);
1734 
1735     //ps_sum1     fp5, xT, fp5, fp5;
1736     fp5 = __PS_SUM1(xT10, fp5, fp5);
1737 
1738     //psq_l       fp6, 16(src),       0, 0;
1739     fp6 = __PSQ_LX(src, 16, 0, 0);
1740 
1741     //psq_st      fp5, 8(dst),        0, 0;
1742     __PSQ_STX(dst, 8, fp5, 0, 0);
1743 
1744     //ps_sum1     fp7, yT, fp7, fp7;
1745     fp7 = __PS_SUM1(yT10, fp7, fp7);
1746 
1747     //psq_l       fp9, 32(src),       0, 0;
1748     fp9 = __PSQ_LX(src, 32, 0, 0);
1749 
1750     //psq_st      fp6, 16(dst),       0, 0;
1751     __PSQ_STX(dst, 16, fp6, 0, 0);
1752 
1753     //ps_sum1     fp8, zT, fp8, fp8;
1754     fp8 = __PS_SUM1(zT10, fp8, fp8);
1755 
1756     //psq_st      fp7, 24(dst),       0, 0;
1757     __PSQ_STX(dst, 24, fp7, 0, 0);
1758 
1759     //psq_st      fp9, 32(dst),       0, 0;
1760     __PSQ_STX(dst, 32, fp9, 0, 0);
1761 
1762     //psq_st      fp8, 40(dst),       0, 0;
1763     __PSQ_STX(dst, 40, fp8, 0, 0);
1764 }
1765 #endif
1766 
1767 /*---------------------------------------------------------------------*
1768 
1769 Name:            MTXScale
1770 
1771 Description:     sets a scaling matrix.
1772 
1773 
1774 Arguments:       m        matrix to be set
1775 
1776                 xS        x scale factor.
1777 
1778                 yS        y scale factor.
1779 
1780                 zS        z scale factor.
1781 
1782 Return:         none
1783 
1784  *---------------------------------------------------------------------*/
1785 /*---------------------------------------------------------------------*
1786     C version
1787  *---------------------------------------------------------------------*/
C_MTXScale(Mtx m,f32 xS,f32 yS,f32 zS)1788 void C_MTXScale ( Mtx m, f32 xS, f32 yS, f32 zS )
1789 {
1790     ASSERTMSG( (m != 0), MTX_SCALE_1 );
1791 
1792 
1793     m[0][0] = xS;    m[0][1] = 0.0f;  m[0][2] = 0.0f;  m[0][3] = 0.0f;
1794     m[1][0] = 0.0f;  m[1][1] = yS;    m[1][2] = 0.0f;  m[1][3] = 0.0f;
1795     m[2][0] = 0.0f;  m[2][1] = 0.0f;  m[2][2] = zS;    m[2][3] = 0.0f;
1796 }
1797 
1798 #if !defined(WIN32) && !defined(WIN64)
1799 /*---------------------------------------------------------------------*
1800     Paired-Single intrinsics version
1801  *---------------------------------------------------------------------*
1802                 Note that this performs NO error checking.
1803  *---------------------------------------------------------------------*/
PSMTXScale(Mtx m,f32 xS,f32 yS,f32 zS)1804 void PSMTXScale( Mtx m, f32 xS, f32 yS, f32 zS )
1805 {
1806     f32x2 xS2 = {xS,   0.0F};
1807     f32x2 yS2 = {0.0F, yS};
1808     f32x2 zS2 = {zS, 0.0F};
1809 
1810     __PSQ_ST(m, xS2, 0, 0);
1811     __PSQ_STX(m,  8, c00, 0, 0);
1812     __PSQ_STX(m, 16, yS2, 0, 0);
1813     __PSQ_STX(m, 24, c00, 0, 0);
1814     __PSQ_STX(m, 32, c00, 0, 0);
1815     __PSQ_STX(m, 40, zS2, 0, 0);
1816 }
1817 #endif
1818 
1819 /*---------------------------------------------------------------------*
1820 
1821 Name:           MTXScaleApply
1822 
1823 Description:    This function performs the operation equivalent to
1824                 MTXScale + MTXConcat
1825 
1826 Arguments:      src       matrix to be operated.
1827 
1828                 dst       resultant matrix from concat.
1829 
1830                 xS        x scale factor.
1831 
1832                 yS        y scale factor.
1833 
1834                 zS        z scale factor.
1835 
1836 Return:         none
1837 
1838  *---------------------------------------------------------------------*/
1839 /*---------------------------------------------------------------------*
1840     C version
1841  *---------------------------------------------------------------------*/
C_MTXScaleApply(MTX_CONST Mtx src,Mtx dst,f32 xS,f32 yS,f32 zS)1842 void C_MTXScaleApply ( MTX_CONST Mtx src, Mtx dst, f32 xS, f32 yS, f32 zS )
1843 {
1844     ASSERTMSG( (src != 0), MTX_SCALEAPPLY_1 );
1845     ASSERTMSG( (dst != 0), MTX_SCALEAPPLY_2 );
1846 
1847     dst[0][0] = src[0][0] * xS;     dst[0][1] = src[0][1] * xS;
1848     dst[0][2] = src[0][2] * xS;     dst[0][3] = src[0][3] * xS;
1849 
1850     dst[1][0] = src[1][0] * yS;     dst[1][1] = src[1][1] * yS;
1851     dst[1][2] = src[1][2] * yS;     dst[1][3] = src[1][3] * yS;
1852 
1853     dst[2][0] = src[2][0] * zS;     dst[2][1] = src[2][1] * zS;
1854     dst[2][2] = src[2][2] * zS;     dst[2][3] = src[2][3] * zS;
1855 }
1856 
1857 #if !defined(WIN32) && !defined(WIN64)
1858 /*---------------------------------------------------------------------*
1859     Paired-Single intrinsics version
1860  *---------------------------------------------------------------------*
1861                 Note that this performs NO error checking.
1862  *---------------------------------------------------------------------*/
1863 
PSMTXScaleApply(MTX_CONST Mtx src,Mtx dst,f32 xS,f32 yS,f32 zS)1864 void PSMTXScaleApply ( MTX_CONST Mtx src, Mtx dst, f32 xS, f32 yS, f32 zS )
1865 {
1866     //f32x2 fp0;
1867     //f32x2 fp1;
1868     f32x2 fp2;
1869     //f32x2 fp3;
1870     f32x2 fp4;
1871     f32x2 fp5;
1872 
1873     f32x2 fp6;
1874     f32x2 fp7;
1875     f32x2 fp8;
1876     //f32x2 fp9;
1877     //f32x2 fp10;
1878     //f32x2 fp11;
1879 
1880     f32x2 xS2 = {xS, xS};
1881     f32x2 yS2 = {yS, yS};
1882     f32x2 zS2 = {zS, zS};
1883 
1884     //psq_l       fp4, 0(src),        0, 0;
1885     fp4 = __PSQ_LX(src, 0, 0, 0);
1886 
1887     //psq_l       fp5, 8(src),        0, 0;
1888     fp5 = __PSQ_LX(src, 8, 0, 0);
1889 
1890     //ps_muls0    fp4, fp4, xS;
1891     fp4 = __PS_MUL(fp4, xS2);
1892 
1893     //psq_l       fp6, 16(src),       0, 0;
1894     fp6 = __PSQ_LX(src, 16, 0, 0);
1895 
1896     //ps_muls0    fp5, fp5, xS;
1897     fp5 = __PS_MUL(fp5, xS2);
1898 
1899     //psq_l       fp7, 24(src),       0, 0;
1900     fp7 = __PSQ_LX(src, 24, 0, 0);
1901 
1902     //ps_muls0    fp6, fp6, yS;
1903     fp6 = __PS_MUL(fp6, yS2);
1904 
1905     //psq_l       fp8, 32(src),       0, 0;
1906     fp8 = __PSQ_LX(src, 32, 0, 0);
1907 
1908     //psq_st      fp4, 0(dst),        0, 0;
1909     __PSQ_STX(dst, 0, fp4, 0, 0);
1910 
1911     //ps_muls0    fp7, fp7, yS;
1912     fp7 = __PS_MUL(fp7, yS2);
1913 
1914     //psq_l       fp2, 40(src),       0, 0;
1915     fp2 = __PSQ_LX(src, 40, 0, 0);
1916 
1917     //psq_st      fp5, 8(dst),        0, 0;
1918     __PSQ_STX(dst, 8, fp5, 0, 0);
1919 
1920     //ps_muls0    fp8, fp8, zS;
1921     fp8 = __PS_MUL(fp8, zS2);
1922 
1923     //psq_st      fp6, 16(dst),       0, 0;
1924     __PSQ_STX(dst, 16, fp6, 0, 0);
1925 
1926     //ps_muls0    fp2, fp2, zS;
1927     fp2 = __PS_MUL(fp2, zS2);
1928 
1929     //psq_st      fp7, 24(dst),       0, 0;
1930     __PSQ_STX(dst, 24, fp7, 0, 0);
1931 
1932     //psq_st      fp8, 32(dst),       0, 0;
1933     __PSQ_STX(dst, 32, fp8, 0, 0);
1934 
1935     //psq_st      fp2, 40(dst),       0, 0;
1936     __PSQ_STX(dst, 40, fp2, 0, 0);
1937 
1938 }
1939 #endif
1940 
1941 /*---------------------------------------------------------------------*
1942 
1943 Name:           MTXReflect
1944 
1945 Description:    reflect a rotation matrix with respect to a plane.
1946 
1947 Arguments:      m        matrix to be set
1948 
1949                 p        point on the planar reflector.
1950 
1951                 n       normal of the planar reflector.
1952 
1953 Return:         none
1954 
1955  *---------------------------------------------------------------------*/
1956 /*---------------------------------------------------------------------*
1957     C version
1958  *---------------------------------------------------------------------*/
C_MTXReflect(Mtx m,const Vec * p,const Vec * n)1959 void C_MTXReflect ( Mtx m, const Vec *p, const Vec *n )
1960 {
1961     f32 vxy, vxz, vyz, pdotn;
1962 
1963     vxy   = -2.0f * n->x * n->y;
1964     vxz   = -2.0f * n->x * n->z;
1965     vyz   = -2.0f * n->y * n->z;
1966     pdotn = 2.0f * C_VECDotProduct(p, n);
1967 
1968     m[0][0] = 1.0f - 2.0f * n->x * n->x;
1969     m[0][1] = vxy;
1970     m[0][2] = vxz;
1971     m[0][3] = pdotn * n->x;
1972 
1973     m[1][0] = vxy;
1974     m[1][1] = 1.0f - 2.0f * n->y * n->y;
1975     m[1][2] = vyz;
1976     m[1][3] = pdotn * n->y;
1977 
1978     m[2][0] = vxz;
1979     m[2][1] = vyz;
1980     m[2][2] = 1.0f - 2.0f * n->z * n->z;
1981     m[2][3] = pdotn * n->z;
1982 }
1983 
1984 #if !defined(WIN32) && !defined(WIN64)
1985 /*---------------------------------------------------------------------*
1986     Paired-Single intrinsics version
1987  *---------------------------------------------------------------------*/
PSMTXReflect(Mtx m,const Vec * p,const Vec * n)1988 void PSMTXReflect ( Mtx m, const Vec *p, const Vec *n )
1989 {
1990     f32x2    vn_xy, vn_z1, n2vn_xy, n2vn_z1, pdotn;
1991     f32x2    tmp0, tmp1, tmp2, tmp3;
1992     f32x2    tmp4, tmp5, tmp6, tmp7;
1993 
1994     // vn_z1 = [nz][1.0F] : LOAD
1995     //vn_z1[0] = n->z;
1996     //vn_z1[1] = 1.0F;
1997     vn_z1 = __PSQ_LX(n, 8, 1, 0);
1998 
1999     // vn_xy = [nx][ny]   : LOAD
2000     //vn_xy[0] = n->x;
2001     //vn_xy[1] = n->y;
2002     vn_xy = __PSQ_LX(n, 0, 0, 0);
2003 
2004     // tmp0 = [px][py]   : LOAD
2005     //tmp0[0] = p->x;
2006     //tmp0[1] = p->y;
2007     tmp0 = __PSQ_LX(p, 0, 0, 0);
2008 
2009     // n2vn_z1 = [-2nz][-2.0F]
2010     n2vn_z1 = __PS_NMADD(vn_z1, c11, vn_z1);
2011 
2012     // tmp1 = [pz][1.0F] : LOAD
2013     //psq_l       tmp1,  8(p), 1, 0
2014     //tmp1[0] = p->z;
2015     //tmp1[1] = 1.0F;
2016     tmp1 = __PSQ_LX(p, 8, 1, 0);
2017 
2018     // n2vn_xy = [-2nx][-2ny]
2019     n2vn_xy = __PS_NMADD(vn_xy, c11, vn_xy);
2020 
2021     // tmp4 = [-2nx*nz][-2ny*nz]   : [m20][m21]
2022     tmp4 = __PS_MULS0(vn_xy, n2vn_z1);
2023 
2024     // pdotn = [-2(px*nx)][-2(py*ny)]
2025     pdotn = __PS_MUL(n2vn_xy, tmp0);
2026 
2027     // tmp2 = [-2nx*nx][-2nx*ny]
2028     tmp2 = __PS_MULS0(vn_xy, n2vn_xy);
2029 
2030     // pdotn = [-2(px*nx+py*ny)][?]
2031     pdotn = __PS_SUM0(pdotn, pdotn, pdotn);
2032 
2033     // tmp3 = [-2nx*ny][-2ny*ny]
2034     tmp3 = __PS_MULS1(vn_xy, n2vn_xy);
2035 
2036     // tmp4 = [m20][m21] : STORE
2037     //m[2][0] = tmp4[0];
2038     //m[2][1] = tmp4[1];
2039     __PSQ_STX(m, 32, tmp4, 0, 0);
2040 
2041     // tmp2 = [1-2nx*nx][-2nx*ny]  : [m00][m01]
2042     tmp2  = __PS_SUM0(tmp2, tmp2, c11);
2043 
2044     // pdotn = [2(px*nx+py*ny+pz*nz)][?]
2045     pdotn  = __PS_NMADD(n2vn_z1, tmp1, pdotn);
2046 
2047     // tmp3 = [-2nx*ny][1-2ny*ny]  : [m10][m11]
2048     tmp3 = __PS_SUM1(c11, tmp3, tmp3);
2049 
2050     // tmp2 = [m00][m01] : STORE
2051     //m[0][0] = tmp2[0];
2052     //m[0][1] = tmp2[1];
2053     __PSQ_STX(m, 0, tmp2, 0, 0);
2054 
2055     // tmp5 = [pdotn*nx][pdotn*ny]
2056     tmp5 = __PS_MULS0(vn_xy, pdotn);
2057 
2058     // tmp6 = [-2nz][pdotn]
2059     tmp6 = __PS_MERGE00(n2vn_z1, pdotn);
2060 
2061     // tmp3 = [m10][m11] : STORE
2062     //m[1][0] = tmp3[0];
2063     //m[1][1] = tmp3[1];
2064     __PSQ_STX(m, 16, tmp3, 0, 0);
2065 
2066     // tmp7 = [-2nx*nz][pdotn*nx]  : [m02][m03]
2067     tmp7 = __PS_MERGE00(tmp4, tmp5);
2068 
2069     // tmp6 = [-2nz*nz][pdotn*nz]
2070     tmp6 = __PS_MULS0(tmp6, vn_z1);
2071 
2072     // tmp5 = [-2ny*nz][pdotn*ny]  : [m12][m13]
2073     tmp5 = __PS_MERGE11(tmp4, tmp5);
2074 
2075     // tmp7 = [m02][m03] : STORE
2076     //m[0][2] = tmp7[0];
2077     //m[0][3] = tmp7[1];
2078     __PSQ_STX(m, 8, tmp7, 0, 0);
2079 
2080     // tmp6 = [1-2nz*nz][pdotn*nz] : [m22][m23]
2081     tmp6 = __PS_SUM0(tmp6, tmp6, c11);
2082 
2083     // tmp5 = [m12][m13] : STORE
2084     //m[1][2] = tmp5[0];
2085     //m[1][3] = tmp5[1];
2086     __PSQ_STX(m, 24, tmp5, 0, 0);
2087 
2088     // tmp6 = [m22][m23] : STORE
2089     //m[2][2] = tmp6[0];
2090     //m[2][3] = tmp6[1];
2091     __PSQ_STX(m, 40, tmp6, 0, 0);
2092 }
2093 #endif
2094 
2095 
2096 /*---------------------------------------------------------------------*
2097 
2098                              VIEW SECTION
2099 
2100 *---------------------------------------------------------------------*/
2101 
2102 /*---------------------------------------------------------------------*
2103 
2104 Name:           MTXLookAt
2105 
2106 Description:    compute a matrix to transform points to camera coordinates.
2107 
2108 Arguments:      m        matrix to be set
2109 
2110                 camPos   camera position.
2111 
2112                 camUp    camera 'up' direction.
2113 
2114                 target   camera aim point.
2115 
2116 Return:         none
2117 
2118 *---------------------------------------------------------------------*/
2119 /*---------------------------------------------------------------------*
2120     C version
2121  *---------------------------------------------------------------------*/
C_MTXLookAt(Mtx m,const Point3d * camPos,const Vec * camUp,const Point3d * target)2122 void C_MTXLookAt ( Mtx m, const Point3d *camPos, const Vec *camUp, const Point3d *target )
2123 {
2124     Vec vLook,vRight,vUp;
2125 
2126     ASSERTMSG( (m != 0),      MTX_LOOKAT_1    );
2127     ASSERTMSG( (camPos != 0), MTX_LOOKAT_2    );
2128     ASSERTMSG( (camUp  != 0), MTX_LOOKAT_3    );
2129     ASSERTMSG( (target != 0), MTX_LOOKAT_4    );
2130 
2131     // compute unit target vector
2132     // use negative value to look down (-Z) axis
2133     vLook.x = camPos->x - target->x;
2134     vLook.y = camPos->y - target->y;
2135     vLook.z = camPos->z - target->z;
2136     VECNormalize( &vLook,&vLook );
2137 
2138     // vRight = camUp x vLook
2139     VECCrossProduct    ( camUp, &vLook, &vRight );
2140     VECNormalize( &vRight,&vRight );
2141 
2142     // vUp = vLook x vRight
2143     VECCrossProduct( &vLook, &vRight, &vUp );
2144     // Don't need to normalize vUp since it should already be unit length
2145     // VECNormalize( &vUp, &vUp );
2146 
2147     m[0][0] = vRight.x;
2148     m[0][1] = vRight.y;
2149     m[0][2] = vRight.z;
2150     m[0][3] = -( camPos->x * vRight.x + camPos->y * vRight.y + camPos->z * vRight.z );
2151 
2152     m[1][0] = vUp.x;
2153     m[1][1] = vUp.y;
2154     m[1][2] = vUp.z;
2155     m[1][3] = -( camPos->x * vUp.x + camPos->y * vUp.y + camPos->z * vUp.z );
2156 
2157     m[2][0] = vLook.x;
2158     m[2][1] = vLook.y;
2159     m[2][2] = vLook.z;
2160     m[2][3] = -( camPos->x * vLook.x + camPos->y * vLook.y + camPos->z * vLook.z );
2161 }
2162 
2163 /*---------------------------------------------------------------------*
2164 
2165 
2166                        TEXTURE PROJECTION SECTION
2167 
2168 
2169 *---------------------------------------------------------------------*/
2170 
2171 /*---------------------------------------------------------------------*
2172 
2173 Name:           MTXLightFrustum
2174 
2175 Description:    Compute a 3x4 projection matrix for texture projection
2176 
2177 Arguments:      m        3x4 matrix to be set
2178 
2179                 t        top coord. of view volume at the near clipping plane
2180 
2181                 b        bottom coord of view volume at the near clipping plane
2182 
2183                 lf       left coord. of view volume at near clipping plane
2184 
2185                 r        right coord. of view volume at near clipping plane
2186 
2187                 n        positive distance from camera to near clipping plane
2188 
2189                 scaleS   scale in the S direction for projected coordinates
2190                          (usually 0.5)
2191 
2192                 scaleT   scale in the T direction for projected coordinates
2193                          (usually 0.5)
2194 
2195                 transS   translate in the S direction for projected coordinates
2196                          (usually 0.5)
2197 
2198                 transT   translate in the T direction for projected coordinates
2199                          (usually 0.5)
2200 
2201 Return:         none.
2202 
2203  *---------------------------------------------------------------------*/
2204 /*---------------------------------------------------------------------*
2205     C version
2206  *---------------------------------------------------------------------*/
C_MTXLightFrustum(Mtx m,float t,float b,float lf,float r,float n,float scaleS,float scaleT,float transS,float transT)2207 void C_MTXLightFrustum  ( Mtx m, float t, float b, float lf, float r, float n,
2208                           float scaleS, float scaleT, float transS,
2209                           float transT )
2210 {
2211     f32 tmp;
2212 
2213     ASSERTMSG( (m != 0),  MTX_LIGHT_FRUSTUM_1  );
2214     ASSERTMSG( (t != b),  MTX_LIGHT_FRUSTUM_2  );
2215     ASSERTMSG( (lf != r), MTX_LIGHT_FRUSTUM_3  );
2216 
2217     tmp     =  1.0f / (r - lf);
2218     m[0][0] =  ((2*n) * tmp) * scaleS;
2219     m[0][1] =  0.0f;
2220     m[0][2] =  (((r + lf) * tmp) * scaleS) - transS;
2221     m[0][3] =  0.0f;
2222 
2223     tmp     =  1.0f / (t - b);
2224     m[1][0] =  0.0f;
2225     m[1][1] =  ((2*n) * tmp) * scaleT;
2226     m[1][2] =  (((t + b) * tmp) * scaleT) - transT;
2227     m[1][3] =  0.0f;
2228 
2229     m[2][0] =  0.0f;
2230     m[2][1] =  0.0f;
2231     m[2][2] = -1.0f;
2232     m[2][3] =  0.0f;
2233 }
2234 
2235 /*---------------------------------------------------------------------*
2236 
2237 Name:           MTXLightPerspective
2238 
2239 Description:    compute a 3x4 perspective projection matrix from
2240                 field of view and aspect ratio for texture projection.
2241 
2242 Arguments:      m        3x4 matrix to be set
2243 
2244                 fovy     total field of view in in degrees in the YZ plane
2245 
2246                 aspect   ratio of view window width:height (X / Y)
2247 
2248                 scaleS   scale in the S direction for projected coordinates
2249                          (usually 0.5)
2250 
2251                 scaleT   scale in the T direction for projected coordinates
2252                          (usually 0.5)
2253 
2254                 transS   translate in the S direction for projected coordinates
2255                          (usually 0.5)
2256 
2257                 transT   translate in the T direction for projected coordinates
2258                          (usually 0.5)
2259 
2260 Return:         none
2261 
2262  *---------------------------------------------------------------------*/
2263 /*---------------------------------------------------------------------*
2264     C version
2265  *---------------------------------------------------------------------*/
C_MTXLightPerspective(Mtx m,f32 fovY,f32 aspect,float scaleS,float scaleT,float transS,float transT)2266 void C_MTXLightPerspective  ( Mtx m, f32 fovY, f32 aspect, float scaleS,
2267                               float scaleT, float transS, float transT )
2268 {
2269     f32 angle;
2270     f32 cot;
2271 
2272     ASSERTMSG( (m != 0),                            MTX_LIGHT_PERSPECTIVE_1  );
2273     ASSERTMSG( ( (fovY > 0.0) && ( fovY < 180.0) ), MTX_LIGHT_PERSPECTIVE_2  );
2274     ASSERTMSG( (aspect != 0),                       MTX_LIGHT_PERSPECTIVE_3  );
2275 
2276     // find the cotangent of half the (YZ) field of view
2277     angle = fovY * 0.5f;
2278     angle = MTXDegToRad( angle );
2279 
2280     cot = 1.0f / tanf(angle);
2281 
2282     m[0][0] =    (cot / aspect) * scaleS;
2283     m[0][1] =    0.0f;
2284     m[0][2] =    -transS;
2285     m[0][3] =    0.0f;
2286 
2287     m[1][0] =    0.0f;
2288     m[1][1] =    cot * scaleT;
2289     m[1][2] =    -transT;
2290     m[1][3] =    0.0f;
2291 
2292     m[2][0] =    0.0f;
2293     m[2][1] =    0.0f;
2294     m[2][2] =   -1.0f;
2295     m[2][3] =    0.0f;
2296 }
2297 
2298 /*---------------------------------------------------------------------*
2299 
2300 Name:           MTXLightOrtho
2301 
2302 Description:    compute a 3x4 orthographic projection matrix.
2303 
2304 Arguments:      m        matrix to be set
2305 
2306                 t        top coord. of parallel view volume
2307 
2308                 b        bottom coord of parallel view volume
2309 
2310                 lf       left coord. of parallel view volume
2311 
2312                 r        right coord. of parallel view volume
2313 
2314                 scaleS   scale in the S direction for projected coordinates
2315                          (usually 0.5)
2316 
2317                 scaleT   scale in the T direction for projected coordinates
2318                          (usually 0.5)
2319 
2320                 transS   translate in the S direction for projected coordinates
2321                          (usually 0.5)
2322 
2323                 transT   translate in the T direction for projected coordinates
2324                          (usually 0.5)
2325 
2326 Return:         none
2327 
2328  *---------------------------------------------------------------------*/
2329 /*---------------------------------------------------------------------*
2330     C version
2331  *---------------------------------------------------------------------*/
C_MTXLightOrtho(Mtx m,f32 t,f32 b,f32 lf,f32 r,float scaleS,float scaleT,float transS,float transT)2332 void C_MTXLightOrtho ( Mtx m, f32 t, f32 b, f32 lf, f32 r, float scaleS,
2333                               float scaleT, float transS, float transT )
2334 {
2335     f32 tmp;
2336 
2337     ASSERTMSG( (m != 0),  MTX_LIGHT_ORTHO_1     );
2338     ASSERTMSG( (t != b),  MTX_LIGHT_ORTHO_2     );
2339     ASSERTMSG( (lf != r), MTX_LIGHT_ORTHO_3     );
2340 
2341     tmp     =  1.0f / (r - lf);
2342     m[0][0] =  (2.0f * tmp * scaleS);
2343     m[0][1] =  0.0f;
2344     m[0][2] =  0.0f;
2345     m[0][3] =  ((-(r + lf) * tmp) * scaleS) + transS;
2346 
2347     tmp     =  1.0f / (t - b);
2348     m[1][0] =  0.0f;
2349     m[1][1] =  (2.0f * tmp) * scaleT;
2350     m[1][2] =  0.0f;
2351     m[1][3] =  ((-(t + b) * tmp)* scaleT) + transT;
2352 
2353     m[2][0] =  0.0f;
2354     m[2][1] =  0.0f;
2355     m[2][2] =  0.0f;
2356     m[2][3] =  1.0f;
2357 }
2358 
2359 /*---------------------------------------------------------------------*
2360 
2361 Name:           MTXReorder
2362 
2363 Description:    Creates a reordered (column-major) matrix from a
2364                 row-major matrix, using paired single operations.
2365                 Reordered matrices are required for the MTXRO*
2366                 functions, which operate faster than their non-reordered
2367                 counterparts.
2368 
2369 Arguments:      src      source matrix.
2370                 dest     destination matrix, note type is ROMtx.
2371 
2372 Return:         none
2373 
2374 *---------------------------------------------------------------------*/
2375 /*---------------------------------------------------------------------*
2376     C version
2377  *---------------------------------------------------------------------*/
C_MTXReorder(MTX_CONST Mtx src,ROMtx dst)2378 void C_MTXReorder(MTX_CONST Mtx src, ROMtx dst)
2379 {
2380     dst[0][0] = src[0][0];    dst[0][1] = src[1][0];    dst[0][2] = src[2][0];
2381     dst[1][0] = src[0][1];    dst[1][1] = src[1][1];    dst[1][2] = src[2][1];
2382     dst[2][0] = src[0][2];    dst[2][1] = src[1][2];    dst[2][2] = src[2][2];
2383     dst[3][0] = src[0][3];    dst[3][1] = src[1][3];    dst[3][2] = src[2][3];
2384 }
2385 
2386 #if !defined(WIN32) && !defined(WIN64)
2387 /*---------------------------------------------------------------------*
2388     Paired-Single intrinsics version
2389  *---------------------------------------------------------------------*/
PSMTXReorder(MTX_CONST Mtx src,register ROMtx dest)2390 void PSMTXReorder(MTX_CONST Mtx src, register ROMtx dest)
2391 {
2392     f32x2 S00_S01, S02_S03, S10_S11, S12_S13, S20_S21, S22_S23;
2393     f32x2 D00_D10, D11_D21, D02_D12, D22_D03, D13_D23, D20_D01;
2394 
2395     //psq_l       S00_S01, 0(src),  0, 0
2396     S00_S01 = __PSQ_L(src, 0, 0);
2397 
2398     //psq_l       S10_S11, 16(src), 0, 0
2399     S10_S11 = __PSQ_LX(src, 16, 0, 0);
2400 
2401     //psq_l       S20_S21, 32(src), 0, 0
2402     S20_S21 = __PSQ_LX(src, 32, 0, 0);
2403 
2404     //psq_l       S02_S03, 8(src),  0, 0
2405     S02_S03 = __PSQ_LX(src, 8, 0, 0);
2406 
2407     //ps_merge00  D00_D10, S00_S01, S10_S11
2408     D00_D10 = __PS_MERGE00(S00_S01, S10_S11);
2409 
2410     //psq_l       S12_S13, 24(src), 0, 0
2411     S12_S13 = __PSQ_LX(src, 24, 0, 0);
2412 
2413     //ps_merge01  D20_D01, S20_S21, S00_S01
2414     D20_D01 = __PS_MERGE01(S20_S21, S00_S01);
2415 
2416     //psq_l       S22_S23, 40(src), 0, 0
2417     S22_S23 = __PSQ_LX(src, 40, 0, 0);
2418 
2419     //ps_merge11  D11_D21, S10_S11, S20_S21
2420     D11_D21 = __PS_MERGE11(S10_S11, S20_S21);
2421 
2422     //psq_st      D00_D10, 0(dest), 0, 0
2423     __PSQ_ST(dest, D00_D10, 0, 0);
2424 
2425     //ps_merge00  D02_D12, S02_S03, S12_S13
2426     D02_D12 = __PS_MERGE00(S02_S03, S12_S13);
2427 
2428     //psq_st      D20_D01, 8(dest), 0, 0
2429     __PSQ_STX(dest, 8, D20_D01, 0, 0);
2430 
2431     //ps_merge01  D22_D03, S22_S23, S02_S03
2432     D22_D03 = __PS_MERGE01(S22_S23, S02_S03);
2433 
2434     //psq_st      D11_D21, 16(dest),0, 0
2435     __PSQ_STX(dest, 16, D11_D21, 0, 0);
2436 
2437     //ps_merge11  D13_D23, S12_S13, S22_S23
2438     D13_D23 = __PS_MERGE11(S12_S13, S22_S23);
2439 
2440     //psq_st      D02_D12, 24(dest),0, 0
2441     __PSQ_STX(dest, 24, D02_D12, 0, 0);
2442 
2443     //psq_st      D22_D03, 32(dest),0,0
2444     __PSQ_STX(dest, 32, D22_D03, 0, 0);
2445 
2446     //psq_st      D13_D23, 40(dest),0,0
2447     __PSQ_STX(dest, 40, D13_D23, 0, 0);
2448 }
2449 
2450 /*===========================================================================*/
2451 
2452 
2453 extern void _ASM_MTXRotAxisRadInternal(Mtx m, const Vec *axis, f32 sT, f32 cT);
2454 
ASM_MTXRotAxisRad(Mtx m,const Vec * axis,f32 rad)2455 void ASM_MTXRotAxisRad(Mtx        m,
2456                        const Vec *axis,
2457                        f32        rad ) {
2458     f32     sinT, cosT;
2459 
2460     sinT = sinf(rad);
2461     cosT = cosf(rad);
2462 
2463     _ASM_MTXRotAxisRadInternal(m, axis, sinT, cosT);
2464 }
2465 
ASM_MTXRotRad(Mtx m,char axis,f32 rad)2466 void ASM_MTXRotRad ( Mtx m, char axis, f32 rad )
2467 {
2468     f32 sinA, cosA;
2469 
2470     sinA = sinf(rad);
2471     cosA = cosf(rad);
2472 
2473     ASM_MTXRotTrig( m, axis, sinA, cosA );
2474 }
2475 
ASM_QUATDivide(const Quaternion * p,const Quaternion * q,Quaternion * r)2476 void ASM_QUATDivide( const Quaternion *p, const Quaternion *q, Quaternion *r)
2477 {
2478     Quaternion qtmp;
2479 
2480     ASM_QUATInverse(q, &qtmp);
2481     ASM_QUATMultiply(&qtmp, p, r);
2482 }
2483 #endif
2484