1 /*---------------------------------------------------------------------------*
2 Project: matrix vector Library
3 File: mtx.c
4
5 Copyright 1998-2011 Nintendo. All rights reserved.
6
7 These coded instructions, statements, and computer programs contain
8 proprietary information of Nintendo of America Inc. and/or Nintendo
9 Company Ltd., and are protected by Federal copyright law. They may
10 not be disclosed to third parties or copied or duplicated in any form,
11 in whole or in part, without the prior written consent of Nintendo.
12
13 *---------------------------------------------------------------------------*/
14
15 #include <math.h>
16 #include <stdio.h>
17 #include <cafe/mtx.h>
18 #include "mtxAssert.h"
19
20 /*---------------------------------------------------------------------*
21 Constants
22 *---------------------------------------------------------------------*/
23 static const f32x2 c00 = {0.0F, 0.0F};
24 static const f32x2 c01 = {0.0F, 1.0F};
25 static const f32x2 c10 = {1.0F, 0.0F};
26 static const f32x2 c11 = {1.0F, 1.0F};
27 //static const f32x2 c22 = {2.0F, 2.0F};
28 static const f32x2 c33 = {3.0F, 3.0F};
29 static const f32x2 c0505 = {0.5F, 0.5F};
30
31 /*---------------------------------------------------------------------*
32
33
34 GENERAL SECTION
35
36
37 *---------------------------------------------------------------------*/
38
39
40 /*---------------------------------------------------------------------*
41
42 Name: MTXIdentity
43
44 Description: sets a matrix to identity
45
46 Arguments: m : matrix to be set
47
48 Return: none
49
50 *---------------------------------------------------------------------*/
51 /*---------------------------------------------------------------------*
52 C version
53 *---------------------------------------------------------------------*/
C_MTXIdentity(Mtx m)54 void C_MTXIdentity ( Mtx m )
55 {
56 ASSERTMSG( (m != 0), MTX_IDENTITY_1 );
57
58 m[0][0] = 1.0f; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = 0.0f;
59 m[1][0] = 0.0f; m[1][1] = 1.0f; m[1][2] = 0.0f; m[1][3] = 0.0f;
60 m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = 1.0f; m[2][3] = 0.0f;
61 }
62
63 #if !defined(WIN32) && !defined(WIN64)
64 /*---------------------------------------------------------------------*
65 Paired-Single intrinsics version
66 *---------------------------------------------------------------------*
67 Note that this performs NO error checking.
68 *---------------------------------------------------------------------*/
PSMTXIdentity(Mtx m)69 void PSMTXIdentity( Mtx m )
70 {
71
72 //psq_st c00, 8(m), 0, 0 // m[0][2], m[0][3]
73 __PSQ_STX(m, 8, c00, 0, 0);
74
75 //psq_st c00, 24(m), 0, 0 // m[1][2], m[1][3]
76 __PSQ_STX(m, 24, c00, 0, 0);
77
78 //psq_st c00, 32(m), 0, 0 // m[2][0], m[2][1]
79 __PSQ_STX(m, 32, c00, 0, 0);
80
81 //psq_st c01, 16(m), 0, 0 // m[1][0], m[1][1]
82 __PSQ_STX(m, 16, c01, 0, 0);
83
84 //psq_st c10, 0(m), 0, 0 // m[0][0], m[0][1]
85 __PSQ_STX(m, 0, c10, 0, 0);
86
87 //psq_st c10, 40(m), 0, 0 // m[2][2], m[2][3]
88 __PSQ_STX(m, 40, c10, 0, 0);
89 }
90 #endif
91
92 /*---------------------------------------------------------------------*
93
94 Name: MTXCopy
95
96 Description: copies the contents of one matrix into another
97
98 Arguments: src source matrix for copy
99 dst destination matrix for copy
100
101 Return: none
102
103 *---------------------------------------------------------------------*/
104 /*---------------------------------------------------------------------*
105 C version
106 *---------------------------------------------------------------------*/
C_MTXCopy(MTX_CONST Mtx src,Mtx dst)107 void C_MTXCopy ( MTX_CONST Mtx src, Mtx dst )
108 {
109 ASSERTMSG( (src != 0) , MTX_COPY_1 );
110 ASSERTMSG( (dst != 0) , MTX_COPY_2 );
111
112 if( src == dst )
113 {
114 return;
115 }
116
117 dst[0][0] = src[0][0]; dst[0][1] = src[0][1]; dst[0][2] = src[0][2]; dst[0][3] = src[0][3];
118 dst[1][0] = src[1][0]; dst[1][1] = src[1][1]; dst[1][2] = src[1][2]; dst[1][3] = src[1][3];
119 dst[2][0] = src[2][0]; dst[2][1] = src[2][1]; dst[2][2] = src[2][2]; dst[2][3] = src[2][3];
120 }
121
122 #if !defined(WIN32) && !defined(WIN64)
123 /*---------------------------------------------------------------------*
124 Paired-Single intrinsics version
125 *---------------------------------------------------------------------*
126 Note that this performs NO error checking.
127 *---------------------------------------------------------------------*/
PSMTXCopy(MTX_CONST Mtx src,Mtx dst)128 void PSMTXCopy(MTX_CONST Mtx src, Mtx dst )
129 {
130 f32x2 fp0, fp1, fp2, fp3, fp4, fp5;
131
132 //psq_l fp0, 0(src), 0, 0
133 fp0 = __PSQ_L(src, 0, 0);
134
135 //psq_st fp0, 0(dst), 0, 0
136 __PSQ_ST(dst, fp0, 0, 0);
137
138 //psq_l fp1, 8(src), 0, 0
139 fp1 = __PSQ_LX(src, 8, 0, 0);
140
141 //psq_st fp1, 8(dst), 0, 0
142 __PSQ_STX(dst, 8, fp1, 0, 0);
143
144 //psq_l fp2, 16(src), 0, 0
145 fp2 = __PSQ_LX(src, 16, 0, 0);
146
147 //psq_st fp2, 16(dst), 0, 0
148 __PSQ_STX(dst, 16, fp2, 0, 0);
149
150 //psq_l fp3, 24(src), 0, 0
151 fp3 = __PSQ_LX(src, 24, 0, 0);
152
153 //psq_st fp3, 24(dst), 0, 0
154 __PSQ_STX(dst, 24, fp3, 0, 0);
155
156 //psq_l fp4, 32(src), 0, 0
157 fp4 = __PSQ_LX(src, 32, 0, 0);
158
159 //psq_st fp4, 32(dst), 0, 0
160 __PSQ_STX(dst, 32, fp4, 0, 0);
161
162 //psq_l fp5, 40(src), 0, 0
163 fp5 = __PSQ_LX(src, 40, 0, 0);
164
165 //psq_st fp5, 40(dst), 0, 0
166 __PSQ_STX(dst, 40, fp5, 0, 0);
167
168 }
169 #endif
170
171 /*---------------------------------------------------------------------*
172
173 Name: MTXConcat
174
175 Description: concatenates two matrices.
176 order of operation is A x B = AB.
177 ok for any of ab == a == b.
178
179 saves a MTXCopy operation if ab != to a or b.
180
181 Arguments: a first matrix for concat.
182 b second matrix for concat.
183 ab resultant matrix from concat.
184
185 Return: none
186
187 *---------------------------------------------------------------------*/
188 /*---------------------------------------------------------------------*
189 C version
190 *---------------------------------------------------------------------*/
C_MTXConcat(MTX_CONST Mtx a,MTX_CONST Mtx b,Mtx ab)191 void C_MTXConcat ( MTX_CONST Mtx a, MTX_CONST Mtx b, Mtx ab )
192 {
193 Mtx mTmp;
194 MtxPtr m;
195
196 ASSERTMSG( (a != 0), MTX_CONCAT_1 );
197 ASSERTMSG( (b != 0), MTX_CONCAT_2 );
198 ASSERTMSG( (ab != 0), MTX_CONCAT_3 );
199
200 if( (ab == a) || (ab == b) )
201 {
202 m = mTmp;
203 }
204
205 else
206 {
207 m = ab;
208 }
209
210 // compute (a x b) -> m
211
212 m[0][0] = a[0][0]*b[0][0] + a[0][1]*b[1][0] + a[0][2]*b[2][0];
213 m[0][1] = a[0][0]*b[0][1] + a[0][1]*b[1][1] + a[0][2]*b[2][1];
214 m[0][2] = a[0][0]*b[0][2] + a[0][1]*b[1][2] + a[0][2]*b[2][2];
215 m[0][3] = a[0][0]*b[0][3] + a[0][1]*b[1][3] + a[0][2]*b[2][3] + a[0][3];
216
217 m[1][0] = a[1][0]*b[0][0] + a[1][1]*b[1][0] + a[1][2]*b[2][0];
218 m[1][1] = a[1][0]*b[0][1] + a[1][1]*b[1][1] + a[1][2]*b[2][1];
219 m[1][2] = a[1][0]*b[0][2] + a[1][1]*b[1][2] + a[1][2]*b[2][2];
220 m[1][3] = a[1][0]*b[0][3] + a[1][1]*b[1][3] + a[1][2]*b[2][3] + a[1][3];
221
222 m[2][0] = a[2][0]*b[0][0] + a[2][1]*b[1][0] + a[2][2]*b[2][0];
223 m[2][1] = a[2][0]*b[0][1] + a[2][1]*b[1][1] + a[2][2]*b[2][1];
224 m[2][2] = a[2][0]*b[0][2] + a[2][1]*b[1][2] + a[2][2]*b[2][2];
225 m[2][3] = a[2][0]*b[0][3] + a[2][1]*b[1][3] + a[2][2]*b[2][3] + a[2][3];
226
227 // overwrite a or b if needed
228 if(m == mTmp)
229 {
230 C_MTXCopy( *((MTX_CONST Mtx *)&mTmp), ab );
231 }
232 }
233
234 #if !defined(WIN32) && !defined(WIN64)
235 /*---------------------------------------------------------------------*
236 Paired-Single intrinsics version
237 *---------------------------------------------------------------------*
238 Note that this performs NO error checking.
239 *---------------------------------------------------------------------*/
PSMTXConcat(MTX_CONST Mtx a,MTX_CONST Mtx b,Mtx ab)240 void PSMTXConcat ( MTX_CONST Mtx a, MTX_CONST Mtx b, Mtx ab )
241 {
242 f32x2 A00_A01 = __PSQ_L(a, 0, 0);
243 f32x2 A02_A03;
244 f32x2 A10_A11;
245 f32x2 A12_A13;
246 f32x2 A20_A21;
247 f32x2 A22_A23;
248 f32x2 B00_B01 = __PSQ_L(b, 0, 0);
249 f32x2 B02_B03 = __PSQ_LX(b, 8, 0, 0);
250 f32x2 B10_B11 = __PSQ_LX(b, 16, 0, 0);
251 f32x2 B12_B13;
252 f32x2 B20_B21;
253 f32x2 B22_B23;
254
255 f32x2 D00_D01;
256 f32x2 D02_D03;
257 f32x2 D10_D11;
258 f32x2 D12_D13;
259 f32x2 D20_D21;
260 f32x2 D22_D23;
261
262 // D00_D01 = b00a00 , b01a00
263 D00_D01 = __PS_MULS0( B00_B01, A00_A01);
264 A10_A11 = __PSQ_LX(a, 16, 0, 0);
265
266 // D02_D03 = b02a00 , b03a00
267 D02_D03 = __PS_MULS0( B02_B03, A00_A01);
268
269 // D10_D11 = a10b00 , a10b01
270 D10_D11 = __PS_MULS0( B00_B01, A10_A11);
271 B12_B13 = __PSQ_LX(b, 24, 0, 0);
272
273 // D12_D13 = a10b02 , a10b03
274 D12_D13 = __PS_MULS0( B02_B03, A10_A11);
275 A02_A03 = __PSQ_LX(a, 8, 0, 0);
276
277 // D00_D01 = b10a01 + b00a00 , b11a01 + b01a00
278 D00_D01 = __PS_MADDS1( B10_B11, A00_A01, D00_D01);
279 A12_A13 = __PSQ_LX(a, 24, 0, 0);
280
281 // D10_D11 = a10b00 + a11b10 , a10b01 + a11b11
282 D10_D11 = __PS_MADDS1( B10_B11, A10_A11, D10_D11);
283 B20_B21 = __PSQ_LX(b, 32, 0, 0);
284
285 // D02_D03 = b12a01 + b02a00 , b13a01 + b03a00
286 D02_D03 = __PS_MADDS1( B12_B13, A00_A01, D02_D03);
287 B22_B23 = __PSQ_LX(b, 40, 0, 0);
288
289 // D12_D13 = a10b02 + a11b12, a10b03+a11b13
290 D12_D13 = __PS_MADDS1( B12_B13, A10_A11, D12_D13);
291
292 A20_A21 = __PSQ_LX(a, 32, 0, 0);
293 A22_A23 = __PSQ_LX(a, 40, 0, 0);
294
295 // D00_D01 = b20a02 + b10a01 + b00a00 , b21a02 + b11a01 + b01a00
296 D00_D01 = __PS_MADDS0( B20_B21, A02_A03, D00_D01); // m00, m01 computed
297
298 // D02_D03 = b12a01 + b02a00 + b22a02 , b13a01 + b03a00 + b23a02
299 D02_D03 = __PS_MADDS0( B22_B23, A02_A03, D02_D03);
300
301 // D10_D11 = a10b00 + a11b10 +a12b20, a10b01 + a11b11 + a12b21
302 D10_D11 = __PS_MADDS0( B20_B21, A12_A13, D10_D11); // m10, m11 computed
303
304 // D12_D13 = a10b02 + a11b12 + a12b22, a10b03+a11b13 + a12b23 + a13
305 D12_D13 = __PS_MADDS0( B22_B23, A12_A13, D12_D13);
306
307 // store m00m01
308 __PSQ_ST(ab, D00_D01, 0, 0);
309
310 // D20_D21 = a20b00, a20b01
311 D20_D21 = __PS_MULS0( B00_B01, A20_A21);
312
313 // get a03 from fp1 and add to D02_D03
314 D02_D03 = __PS_MADDS1( c01, A02_A03, D02_D03); // m02, m03 computed
315
316 // D22_D23 = a20b02, a20b03
317 D22_D23 = __PS_MULS0( B02_B03, A20_A21);
318
319 // store m10m11
320 __PSQ_STX(ab, 16, D10_D11, 0, 0);
321
322 // get a13 from fp3 and add to D12_D13
323 D12_D13 = __PS_MADDS1( c01, A12_A13, D12_D13); // m12, m13 computed
324
325 // store m02m03
326 __PSQ_STX(ab, 8, D02_D03, 0, 0);
327
328 // D20_D21 = a20b00 + a21b10, a20b01 + a21b11
329 D20_D21 = __PS_MADDS1( B10_B11, A20_A21, D20_D21);
330
331 // D22_D23 = a20b02 + a21b12, a20b03 + a21b13
332 D22_D23 = __PS_MADDS1( B12_B13, A20_A21, D22_D23);
333
334 // D20_D21 = a20b00 + a21b10 + a22b20, a20b01 + a21b11 + a22b21
335 D20_D21 = __PS_MADDS0( B20_B21, A22_A23, D20_D21);
336
337 // store m12m13
338 __PSQ_STX(ab, 24, D12_D13, 0, 0);
339
340 // D22_D23 = a20b02 + a21b12 + a22b22, a20b03 + a21b13 + a22b23 + a23
341 D22_D23 = __PS_MADDS0( B22_B23, A22_A23, D22_D23);
342
343 // store m20m21
344
345 __PSQ_STX(ab, 32, D20_D21, 0, 0);
346
347 // get a23 from fp5 and add to fp17
348 D22_D23 = __PS_MADDS1( c01, A22_A23, D22_D23);
349
350 // store m22m23
351 __PSQ_STX(ab, 40, D22_D23, 0, 0);
352
353 }
354 #endif
355
356 /*---------------------------------------------------------------------*
357
358 Name: MTXConcatArray
359
360 Description: concatenates a matrix to an array of matrices.
361 order of operation is A x B(array) = AB(array).
362
363 Arguments: a first matrix for concat.
364 srcBase array base of second matrix for concat.
365 dstBase array base of resultant matrix from concat.
366 count number of matrices in srcBase, dstBase arrays.
367
368 note: cannot check for array overflow
369
370 Return: none
371
372 *---------------------------------------------------------------------*/
373 /*---------------------------------------------------------------------*
374 C version
375 *---------------------------------------------------------------------*/
C_MTXConcatArray(MTX_CONST Mtx a,MTX_CONST Mtx * srcBase,Mtx * dstBase,u32 count)376 void C_MTXConcatArray ( MTX_CONST Mtx a, MTX_CONST Mtx* srcBase, Mtx* dstBase, u32 count )
377 {
378 u32 i;
379
380 ASSERTMSG( (a != 0), "MTXConcatArray(): NULL MtxPtr 'a' " );
381 ASSERTMSG( (srcBase != 0), "MTXConcatArray(): NULL MtxPtr 'srcBase' " );
382 ASSERTMSG( (dstBase != 0), "MTXConcatArray(): NULL MtxPtr 'dstBase' " );
383 ASSERTMSG( (count > 1), "MTXConcatArray(): count must be greater than 1." );
384
385 for ( i = 0 ; i < count ; i++ )
386 {
387 C_MTXConcat(a, *srcBase, *dstBase);
388
389 srcBase++;
390 dstBase++;
391 }
392 }
393
394 #if !defined(WIN32) && !defined(WIN64)
395 /*---------------------------------------------------------------------*
396 Paired-Single intrinsics version
397 *---------------------------------------------------------------------*
398 Note that this performs NO error checking.
399 *---------------------------------------------------------------------*/
PSMTXConcatArray(MTX_CONST Mtx a,MTX_CONST Mtx * srcBase,Mtx * dstBase,u32 count)400 void PSMTXConcatArray (
401 MTX_CONST Mtx a,
402 MTX_CONST Mtx* srcBase,
403 Mtx* dstBase,
404 u32 count )
405 {
406
407 int i;
408
409 for ( i = 0 ; i < count ; i++ )
410 {
411 PSMTXConcat(a, *srcBase, *dstBase);
412
413 srcBase++;
414 dstBase++;
415 }
416 }
417 #endif
418
419 /*---------------------------------------------------------------------*
420
421 Name: MTXTranspose
422
423 Description: computes the transpose of a matrix.
424 As matrices are 3x4, fourth column (translation component) is
425 lost and becomes (0,0,0).
426
427 This function is intended for use in computing an
428 inverse-transpose matrix to transform normals for lighting.
429 In this case, lost translation component doesn't matter.
430
431 Arguments: src source matrix.
432 xPose destination (transposed) matrix.
433 ok if src == xPose.
434
435 Return: none
436
437 *---------------------------------------------------------------------*/
438 /*---------------------------------------------------------------------*
439 C version
440 *---------------------------------------------------------------------*/
C_MTXTranspose(MTX_CONST Mtx src,Mtx xPose)441 void C_MTXTranspose ( MTX_CONST Mtx src, Mtx xPose )
442 {
443 Mtx mTmp;
444 MtxPtr m;
445
446 ASSERTMSG( (src != 0), MTX_TRANSPOSE_1 );
447 ASSERTMSG( (xPose != 0), MTX_TRANSPOSE_2 );
448
449 if(src == xPose)
450 {
451 m = mTmp;
452 }
453 else
454 {
455 m = xPose;
456 }
457
458 m[0][0] = src[0][0]; m[0][1] = src[1][0]; m[0][2] = src[2][0]; m[0][3] = 0.0f;
459 m[1][0] = src[0][1]; m[1][1] = src[1][1]; m[1][2] = src[2][1]; m[1][3] = 0.0f;
460 m[2][0] = src[0][2]; m[2][1] = src[1][2]; m[2][2] = src[2][2]; m[2][3] = 0.0f;
461
462 // copy back if needed
463 if( m == mTmp )
464 {
465 C_MTXCopy( *((MTX_CONST Mtx *)&mTmp), xPose );
466 }
467 }
468
469 #if !defined(WIN32) && !defined(WIN64)
470 /*---------------------------------------------------------------------*
471 Paired-Single intrinsics version
472 *---------------------------------------------------------------------*
473 Note that this performs NO error checking.
474 *---------------------------------------------------------------------*/
PSMTXTranspose(MTX_CONST Mtx src,Mtx xPose)475 void PSMTXTranspose ( MTX_CONST Mtx src, Mtx xPose )
476 {
477 f32x2 row0a, row1a, row0b, row1b;
478 f32x2 trns0, trns1, trns2;
479
480 //psq_l row0a, 0(src), 0, 0 // [0][0], [0][1]
481 row0a = __PSQ_L(src, 0, 0);
482
483 //psq_l row1a, 16(src), 0, 0 // [1][0], [1][1]
484 row1a = __PSQ_LX(src, 16, 0, 0);
485
486 //ps_merge00 trns0, row0a, row1a // [0][0], [1][0]
487 trns0 = __PS_MERGE00(row0a, row1a);
488
489 //psq_l row0b, 8(src), 1, 0 // [0][2], 1
490 row0b = __PSQ_LX(src, 8, 1, 0);
491
492 //ps_merge11 trns1, row0a, row1a // [0][1], [1][1]
493 trns1 = __PS_MERGE11(row0a, row1a);
494
495 //psq_l row1b, 24(src), 1, 0 // [1][2], 1
496 row1b = __PSQ_LX(src, 24, 1, 0);
497
498 //psq_st trns0, 0(xPose), 0, 0 // [0][0], [1][0] -> [0][0], [0][1]
499 __PSQ_ST(xPose, trns0, 0, 0);
500
501 //psq_l row0a, 32(src), 0, 0 // [2][0], [2][1]
502 row0a = __PSQ_LX(src, 32, 0, 0);
503
504 //ps_merge00 trns2, row0b, row1b // [0][2], [1][2]
505 trns2 = __PS_MERGE00(row0b, row1b);
506
507 //psq_st trns1, 16(xPose), 0, 0 // [0][1], [1][1] -> [1][0], [1][1]
508 __PSQ_STX(xPose, 16, trns1, 0, 0);
509
510 //ps_merge00 trns0, row0a, c00 // [2][0], 0
511 trns0 = __PS_MERGE00(row0a, c00);
512
513 //psq_st trns2, 32(xPose), 0, 0 // [0][2], [1][2] -> [2][0], [2][1]
514 __PSQ_STX(xPose, 32, trns2, 0, 0);
515
516 //ps_merge10 trns1, row0a, c00 // [2][1], 0
517 trns1 = __PS_MERGE10(row0a, c00);
518
519 //psq_st trns0, 8(xPose), 0, 0 // [2][0], 0 -> [0][2], [0][3]
520 __PSQ_STX(xPose, 8, trns0, 0, 0);
521
522 //lfs row0b, 40(src) // [2][2]
523 row0b = __PSQ_LX(src, 40, 1, 0);
524
525 //psq_st trns1, 24(xPose), 0, 0 // [2][1], 0 -> [1][2], [1][3]
526 __PSQ_STX(xPose, 24, trns1, 0, 0);
527
528 //stfs row0b, 40(xPose) // [2][2] -> [2][2]
529 __PSQ_STX(xPose, 40, row0b, 1, 0);
530 }
531 #endif
532
533 /*---------------------------------------------------------------------*
534
535 Name: MTXInverse
536
537 Description: computes a fast inverse of a matrix.
538 this algorithm works for matrices with a fourth row of
539 (0,0,0,1).
540
541 for a matrix
542 M = | A C | where A is the upper 3x3 submatrix,
543 | 0 1 | C is a 1x3 column vector
544
545 INV(M) = | inv(A) (inv(A))*(-C) |
546 | 0 1 |
547
548 Arguments: src source matrix.
549 inv destination (inverse) matrix.
550 ok if src == inv.
551
552 Return: 0 if src is not invertible.
553 1 on success.
554
555 *---------------------------------------------------------------------*/
556 /*---------------------------------------------------------------------*
557 C version
558 *---------------------------------------------------------------------*/
C_MTXInverse(MTX_CONST Mtx src,Mtx inv)559 u32 C_MTXInverse ( MTX_CONST Mtx src, Mtx inv )
560 {
561 Mtx mTmp;
562 MtxPtr m;
563 f32 det;
564
565 ASSERTMSG( (src != 0), MTX_INVERSE_1 );
566 ASSERTMSG( (inv != 0), MTX_INVERSE_2 );
567
568 if( src == inv )
569 {
570 m = mTmp;
571 }
572 else
573 {
574 m = inv;
575 }
576
577 // compute the determinant of the upper 3x3 submatrix
578 det = src[0][0]*src[1][1]*src[2][2] + src[0][1]*src[1][2]*src[2][0] + src[0][2]*src[1][0]*src[2][1]
579 - src[2][0]*src[1][1]*src[0][2] - src[1][0]*src[0][1]*src[2][2] - src[0][0]*src[2][1]*src[1][2];
580
581 // check if matrix is singular
582 if( det == 0.0f )
583 {
584 return 0;
585 }
586
587 // compute the inverse of the upper submatrix:
588
589 // find the transposed matrix of cofactors of the upper submatrix
590 // and multiply by (1/det)
591
592 det = 1.0f / det;
593
594 m[0][0] = (src[1][1]*src[2][2] - src[2][1]*src[1][2]) * det;
595 m[0][1] = -(src[0][1]*src[2][2] - src[2][1]*src[0][2]) * det;
596 m[0][2] = (src[0][1]*src[1][2] - src[1][1]*src[0][2]) * det;
597
598 m[1][0] = -(src[1][0]*src[2][2] - src[2][0]*src[1][2]) * det;
599 m[1][1] = (src[0][0]*src[2][2] - src[2][0]*src[0][2]) * det;
600 m[1][2] = -(src[0][0]*src[1][2] - src[1][0]*src[0][2]) * det;
601
602 m[2][0] = (src[1][0]*src[2][1] - src[2][0]*src[1][1]) * det;
603 m[2][1] = -(src[0][0]*src[2][1] - src[2][0]*src[0][1]) * det;
604 m[2][2] = (src[0][0]*src[1][1] - src[1][0]*src[0][1]) * det;
605
606 // compute (invA)*(-C)
607 m[0][3] = -m[0][0]*src[0][3] - m[0][1]*src[1][3] - m[0][2]*src[2][3];
608 m[1][3] = -m[1][0]*src[0][3] - m[1][1]*src[1][3] - m[1][2]*src[2][3];
609 m[2][3] = -m[2][0]*src[0][3] - m[2][1]*src[1][3] - m[2][2]*src[2][3];
610
611 // copy back if needed
612 if( m == mTmp )
613 {
614 C_MTXCopy( *((MTX_CONST Mtx *)&mTmp),inv );
615 }
616
617 return 1;
618 }
619
620 #if !defined(WIN32) && !defined(WIN64)
621 /*---------------------------------------------------------------------*
622 Paired-Single intrinsics version
623 *---------------------------------------------------------------------*
624 Note that this performs NO error checking.
625 Results may be a little bit different from the C version
626 because it doesn't perform exactly same calculation.
627 *---------------------------------------------------------------------*/
PSMTXInverse(MTX_CONST Mtx src,Mtx inv)628 u32 PSMTXInverse ( MTX_CONST Mtx src, Mtx inv )
629 {
630
631 f32x2 fp0;
632 f32x2 fp1;
633 f32x2 fp2;
634 f32x2 fp3;
635 f32x2 fp4;
636 f32x2 fp5;
637
638 f32x2 fp6;
639 f32x2 fp7;
640 f32x2 fp8;
641 f32x2 fp9;
642 f32x2 fp10;
643 f32x2 fp11;
644 f32x2 fp12;
645 f32x2 fp13;
646
647 // fp0 [ 00 ][ 1.0F ] : Load
648 fp0 = __PSQ_LX(src, 0, 1, 0);
649
650 // fp1 [ 01 ][ 02 ] : Load
651 fp1 = __PSQ_LX(src, 4, 0, 0);
652
653 // fp2 [ 10 ][ 1.0F ] : Load
654 fp2 = __PSQ_LX(src, 16, 1, 0);
655
656 // fp6 [ 02 ][ 00 ]
657 fp6 = __PS_MERGE10(fp1, fp0);
658
659 // fp3 [ 11 ][ 12 ] : Load
660 fp3 = __PSQ_LX(src, 20, 0, 0);
661
662 // fp4 [ 20 ][ 1.0F ] : Load
663 fp4 = __PSQ_LX(src, 32, 1, 0);
664
665 // fp7 [ 12 ][ 10 ]
666 fp7 = __PS_MERGE10(fp3, fp2);
667
668 // fp5 [ 21 ][ 22 ] : Load
669 fp5 = __PSQ_LX(src, 36, 0, 0);
670
671 // fp11[ 11*02 ][ 00*12 ]
672 fp11 = __PS_MUL(fp3, fp6);
673
674 // fp8 [ 22 ][ 20 ]
675 fp8 = __PS_MERGE10(fp5, fp4);
676
677 // fp13[ 21*12 ][ 10*22 ]
678 fp13 = __PS_MUL(fp5, fp7);
679
680 // fp11[ 01*12 - 11*02 ][ 10*02 - 00*12 ]
681 fp11 = __PS_MSUB(fp1, fp7, fp11);
682
683 // fp12[ 01*22 ][ 20*02 ]
684 fp12 = __PS_MUL(fp1, fp8);
685
686 // fp13[ 11*22 - 21*12 ][ 20*12 - 10*22 ]
687 fp13 = __PS_MSUB(fp3, fp8, fp13);
688
689 // fp10[ 20*11 ][ N/A ]
690 fp10 = __PS_MUL(fp3, fp4);
691
692 // fp12[ 21*02 - 01*22 ][ 00*22 - 20*02 ]
693 fp12 = __PS_MSUB(fp5, fp6, fp12);
694
695 // fp7 [ 00*(11*22-21*12) ][ N/A ]
696 fp7 = __PS_MUL(fp0, fp13);
697
698 // fp9 [ 00*21 ][ N/A ]
699 fp9 = __PS_MUL(fp0, fp5);
700
701 // fp8 [ 10*01 ][ N/A ]
702 fp8 = __PS_MUL(fp1, fp2);
703
704 // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) ][ N/A ]
705 fp7 = __PS_MADD(fp2, fp12, fp7);
706
707 // fp6 [ 0.0F ][ 0.0F ]
708 fp6 = __PS_SUB(fp6, fp6);
709
710 // fp10[ 10*21 - 20*11 ][ N/A ]
711 fp10 = __PS_MSUB(fp2, fp5, fp10);
712
713 // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) + 20*(01*12-11*02) ][ N/A ] : det
714 fp7 = __PS_MADD(fp4, fp11, fp7);
715
716 // fp9 [ 20*01 - 00*21 ][ N/A ]
717 fp9 = __PS_MSUB(fp1, fp4, fp9);
718
719 // fp8 [ 00*11 - 10*01 ][ N/A ]
720 fp8 = __PS_MSUB(fp0, fp3, fp8);
721
722 // check if matrix is singular
723 if( fp7[0] == 0.0f && fp7[1] == 0.0f)
724 {
725 return 0;
726 }
727
728 // compute the inverse of the upper submatrix:
729
730 // find the transposed matrix of cofactors of the upper submatrix
731 // and multiply by (1/det)
732
733 // fp0 [ 1/det ][ N/A ]
734 fp0 = __PS_RES(fp7);
735
736 // Newton's approximation
737 // Refinement : ( E = est. of 1/K ) -> ( E' = ( 2 - K * E ) * E )
738 fp6 = __PS_ADD(fp0, fp0);
739 fp5 = __PS_MUL(fp7, fp0);
740 fp0 = __PS_NMSUB(fp0, fp5, fp6);
741
742 // fp1 [ 03 ][ 03 ] : Load
743 fp1[0] = src[0][3];
744 fp1[1] = src[0][3];
745
746 // fp13[ ( 11*22 - 21*12 ) * rdet ][ ( 20*12 - 10*22 ) * rdet ] : i[0][0], i[1][0]
747 fp13 = __PS_MULS0(fp13, fp0);
748
749 // fp2 [ 13 ][ 13 ] : Load
750 fp2[0] = src[1][3];
751 fp2[1] = src[1][3];
752
753 // fp12[ ( 21*02 - 01*22 ) * rdet ][ ( 00*22 - 20*02 ) * rdet ] : i[0][1], i[1][1]
754 fp12 = __PS_MULS0(fp12, fp0);
755
756 // fp3 [ 23 ][ 23 ] : Load
757 fp3[0] = src[2][3];
758 fp3[1] = src[2][3];
759
760 // fp11[ ( 01*12 - 11*02 ) * rdet ][ ( 10*02 - 00*12 ) * rdet ] : i[0][2], i[1][2]
761 fp11 = __PS_MULS0(fp11, fp0);
762
763 // fp5 [ i00 ][ i01 ]
764 fp5 = __PS_MERGE00(fp13, fp12);
765
766 // fp4 [ i10 ][ i11 ]
767 fp4 = __PS_MERGE11(fp13, fp12);
768
769 // fp6 [ i00*03 ][ i10*03 ]
770 fp6 = __PS_MUL(fp13, fp1);
771
772 // [ i00 ][ i01 ] : Store fp5 -> free(fp5[ i00 ][ i01 ])
773 //inv[0][0] = fp5[0];
774 //inv[0][1] = fp5[1];
775 __PSQ_STX(inv, 0, fp5, 0, 0);
776
777 // [ i10 ][ i11 ] : Store fp4 -> free(fp4[ i10 ][ i11 ])
778 //inv[1][0] = fp4[0];
779 //inv[1][1] = fp4[1];
780 __PSQ_STX(inv, 16, fp4, 0, 0);
781
782 // fp10[ ( 10*21 - 20*11 ) * rdet ] : i[2][0]
783 fp10 = __PS_MULS0(fp10, fp0);
784
785 // fp9 [ ( 20*01 - 00*21 ) * rdet ] : i[2][1]
786 fp9 = __PS_MULS0(fp9, fp0);
787
788 // fp6 [ i00*03+i01*13 ][ i10*03+i11*13 ]
789 fp6 = __PS_MADD(fp12, fp2, fp6);
790
791 // [ i20 ] : Store fp10
792 //inv[2][0] = fp10[0];
793 __PSQ_STX(inv, 32, fp10, 1, 0);
794
795 // fp8 [ ( 00*11 - 10*01 ) * rdet ] : i[2][2]
796 fp8 = __PS_MULS0(fp8, fp0);
797
798 // fp6 [ -i00*03-i01*13-i02*23 ][ -i10*03-i11*13-i12*23 ] : i[0][3], i[1][3]
799 fp6 = __PS_NMADD(fp11, fp3, fp6);
800
801 // [ i21 ] : Store fp9
802 //inv[2][1] = fp9[0];
803 __PSQ_STX(inv, 36, fp9, 1, 0);
804
805 // fp7 [ i20*03 ][ N/A ]
806 fp7 = __PS_MUL(fp10, fp1);
807
808 // fp5 [ i02 ][ i03 ]
809 fp5 = __PS_MERGE00(fp11, fp6);
810
811 // [ i22 ] : Store fp8
812 //inv[2][2] = fp8[0];
813 __PSQ_STX(inv, 40, fp8, 1, 0);
814
815 // fp7 [ i20*03+i21*13 ][ N/A ]
816 fp7 = __PS_MADD(fp9, fp2, fp7);
817
818 // fp4 [ i12 ][ i13 ]
819 fp4 = __PS_MERGE11(fp11, fp6);
820
821 // [ i02 ][ i03 ] : Store fp5
822 //inv[0][2] = fp5[0];
823 //inv[0][3] = fp5[1];
824 __PSQ_STX(inv, 8, fp5, 0, 0);
825
826 // fp7 [ -i20*03-i21*13-i22*23 ][ N/A ] : i[2][3]
827 fp7 = __PS_NMADD(fp8, fp3, fp7);
828
829 // [ i12 ][ i13 ] : Store fp4
830 //inv[1][2] = fp4[0];
831 //inv[1][3] = fp4[1];
832 __PSQ_STX(inv, 24, fp4, 0, 0);
833
834 // [ i23 ] : Store fp7
835 //inv[2][3] = fp7[0];
836 __PSQ_STX(inv, 44, fp7, 1, 0);
837
838 return 1;
839 }
840 #endif
841
842 /*---------------------------------------------------------------------*
843
844 Name: MTXInvXpose
845
846 Description: computes a fast inverse-transpose of a matrix.
847 this algorithm works for matrices with a fourth row of
848 (0,0,0,1). Commonly used for calculating normal transform
849 matrices.
850
851 This function is equivalent to the combination of
852 two functions MTXInverse + MTXTranspose.
853
854 Arguments: src source matrix.
855 invx destination (inverse-transpose) matrix.
856 ok if src == invx.
857
858 Return: 0 if src is not invertible.
859 1 on success.
860
861 *---------------------------------------------------------------------*/
862 /*---------------------------------------------------------------------*
863 C version
864 *---------------------------------------------------------------------*/
C_MTXInvXpose(MTX_CONST Mtx src,Mtx invX)865 u32 C_MTXInvXpose ( MTX_CONST Mtx src, Mtx invX )
866 {
867 Mtx mTmp;
868 MtxPtr m;
869 f32 det;
870
871 ASSERTMSG( (src != 0), MTX_INVXPOSE_1 );
872 ASSERTMSG( (invX != 0), MTX_INVXPOSE_2 );
873
874 if( src == invX )
875 {
876 m = mTmp;
877 }
878 else
879 {
880 m = invX;
881 }
882
883 // compute the determinant of the upper 3x3 submatrix
884 det = src[0][0]*src[1][1]*src[2][2] + src[0][1]*src[1][2]*src[2][0] + src[0][2]*src[1][0]*src[2][1]
885 - src[2][0]*src[1][1]*src[0][2] - src[1][0]*src[0][1]*src[2][2] - src[0][0]*src[2][1]*src[1][2];
886
887 // check if matrix is singular
888 if( det == 0.0f )
889 {
890 return 0;
891 }
892
893 // compute the inverse-transpose of the upper submatrix:
894
895 // find the transposed matrix of cofactors of the upper submatrix
896 // and multiply by (1/det)
897
898 det = 1.0f / det;
899
900 m[0][0] = (src[1][1]*src[2][2] - src[2][1]*src[1][2]) * det;
901 m[0][1] = -(src[1][0]*src[2][2] - src[2][0]*src[1][2]) * det;
902 m[0][2] = (src[1][0]*src[2][1] - src[2][0]*src[1][1]) * det;
903
904 m[1][0] = -(src[0][1]*src[2][2] - src[2][1]*src[0][2]) * det;
905 m[1][1] = (src[0][0]*src[2][2] - src[2][0]*src[0][2]) * det;
906 m[1][2] = -(src[0][0]*src[2][1] - src[2][0]*src[0][1]) * det;
907
908 m[2][0] = (src[0][1]*src[1][2] - src[1][1]*src[0][2]) * det;
909 m[2][1] = -(src[0][0]*src[1][2] - src[1][0]*src[0][2]) * det;
910 m[2][2] = (src[0][0]*src[1][1] - src[1][0]*src[0][1]) * det;
911
912 // the fourth columns should be all zero
913 m[0][3] = 0.0F;
914 m[1][3] = 0.0F;
915 m[2][3] = 0.0F;
916
917 // copy back if needed
918 if( m == mTmp )
919 {
920 C_MTXCopy( *((MTX_CONST Mtx *)&mTmp),invX );
921 }
922
923 return 1;
924 }
925
926 #if !defined(WIN32) && !defined(WIN64)
927 /*---------------------------------------------------------------------*
928 Paired-Single intrinsics version
929 *---------------------------------------------------------------------*
930 Note that this performs NO error checking.
931 Results may be a little bit different from the C version
932 because it doesn't perform exactly same calculation.
933 *---------------------------------------------------------------------*/
PSMTXInvXpose(MTX_CONST Mtx src,Mtx invX)934 u32 PSMTXInvXpose ( MTX_CONST Mtx src, Mtx invX )
935 {
936 f32x2 fp0;
937 f32x2 fp1;
938 f32x2 fp2;
939 f32x2 fp3;
940 f32x2 fp4;
941 f32x2 fp5;
942
943 f32x2 fp6;
944 f32x2 fp7;
945 f32x2 fp8;
946 f32x2 fp9;
947 f32x2 fp10;
948 f32x2 fp11;
949 f32x2 fp12;
950 f32x2 fp13;
951
952 // fp0 [ 00 ][ 1.0F ] : Load
953 //fp0[0] = src[0][0];
954 //fp0[1] = 1.0F;
955 fp0 = __PSQ_LX(src, 0, 1, 0);
956
957 // fp1 [ 01 ][ 02 ] : Load
958 //fp1[0] = src[0][1];
959 //fp1[1] = src[0][2];
960 fp1 = __PSQ_LX(src, 4, 0, 0);
961
962 // fp2 [ 10 ][ 1.0F ] : Load
963 //fp2[0] = src[1][0];
964 //fp2[1] = 1.0F;
965 fp2 = __PSQ_LX(src, 16, 1, 0);
966
967 // fp6 [ 02 ][ 00 ]
968 fp6 = __PS_MERGE10(fp1, fp0);
969
970 // fp3 [ 11 ][ 12 ] : Load
971 //fp3[0] = src[1][1];
972 //fp3[1] = src[1][2];
973 fp3 = __PSQ_LX(src, 20, 0, 0);
974
975 // fp4 [ 20 ][ 1.0F ] : Load
976 //fp4[0] = src[2][0];
977 //fp4[1] = 1.0F;
978 fp4 = __PSQ_LX(src, 32, 1, 0);
979
980 // fp7 [ 12 ][ 10 ]
981 fp7 = __PS_MERGE10(fp3, fp2);
982
983 // fp5 [ 21 ][ 22 ] : Load
984 //fp5[0] = src[2][1];
985 //fp5[1] = src[2][2];
986 fp5 = __PSQ_LX(src, 36, 0, 0);
987
988 // fp11[ 11*02 ][ 00*12 ]
989 fp11 = __PS_MUL(fp3, fp6);
990
991 // fp8 [ 22 ][ 20 ]
992 fp8 = __PS_MERGE10(fp5, fp4);
993
994 // fp13[ 21*12 ][ 10*22 ]
995 fp13 = __PS_MUL(fp5, fp7);
996
997 // fp11[ 01*12 - 11*02 ][ 10*02 - 00*12 ]
998 fp11 = __PS_MSUB(fp1, fp7, fp11);
999
1000 // fp12[ 01*22 ][ 20*02 ]
1001 fp12 = __PS_MUL(fp1, fp8);
1002
1003 // fp13[ 11*22 - 21*12 ][ 20*12 - 10*22 ]
1004 fp13 = __PS_MSUB(fp3, fp8, fp13);
1005
1006 // fp10[ 20*11 ][ N/A ]
1007 fp10 = __PS_MUL(fp3, fp4);
1008
1009 // fp12[ 21*02 - 01*22 ][ 00*22 - 20*02 ]
1010 fp12 = __PS_MSUB(fp5, fp6, fp12);
1011
1012 // fp7 [ 00*(11*22-21*12) ][ N/A ]
1013 fp7 = __PS_MUL(fp0, fp13);
1014
1015 // fp9 [ 00*21 ][ N/A ]
1016 fp9 = __PS_MUL(fp0, fp5);
1017
1018 // fp8 [ 10*01 ][ N/A ]
1019 fp8 = __PS_MUL(fp1, fp2);
1020
1021 // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) ][ N/A ]
1022 fp7 = __PS_MADD(fp2, fp12, fp7);
1023
1024 // fp6 [ 0.0F ][ 0.0F ]
1025 fp6 = __PS_SUB(fp6, fp6);
1026
1027 // fp10[ 10*21 - 20*11 ][ N/A ]
1028 fp10 = __PS_MSUB(fp2, fp5, fp10);
1029
1030 // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) + 20*(01*12-11*02) ][ N/A ] : det
1031 fp7 = __PS_MADD(fp4, fp11, fp7);
1032
1033 // fp9 [ 20*01 - 00*21 ][ N/A ]
1034 fp9 = __PS_MSUB(fp1, fp4, fp9);
1035
1036 // fp8 [ 00*11 - 10*01 ][ N/A ]
1037 fp8 = __PS_MSUB(fp0, fp3, fp8);
1038
1039 // check if matrix is singular
1040 if( fp7[0] == 0.0f && fp7[1] == 0.0f)
1041 {
1042 return 0;
1043 }
1044
1045 // compute the inverse-transpose of the upper submatrix:
1046
1047 // find the transposed matrix of cofactors of the upper submatrix
1048 // and multiply by (1/det)
1049
1050 // fp0 [ 1/det ][ N/A ]
1051 fp0 = __PS_RES(fp7);
1052
1053 // [ ix03 ] : Store fp6
1054 invX[0][3] = fp6[0];
1055
1056 // Newton's approximation
1057 // Refinement : ( E = est. of 1/K ) -> ( E' = ( 2 - K * E ) * E )
1058 fp4 = __PS_ADD(fp0, fp0);
1059 fp5 = __PS_MUL(fp7, fp0);
1060
1061 // [ ix13 ] : Store fp6
1062 //invX[1][3] = fp6[0];
1063 __PSQ_STX(invX, 28, fp6, 1, 0);
1064
1065 fp0 = __PS_NMSUB(fp0, fp5, fp4);
1066
1067 // [ ix23 ] : Store fp6
1068 //invX[2][3] = fp6[0];
1069 __PSQ_STX(invX, 44, fp6, 1, 0);
1070
1071 // fp13[ ( 11*22 - 21*12 ) * rdet ][ ( 20*12 - 10*22 ) * rdet ] : ix[0][0], ix[0][1]
1072 fp13 = __PS_MULS0(fp13, fp0);
1073
1074 // fp12[ ( 21*02 - 01*22 ) * rdet ][ ( 00*22 - 20*02 ) * rdet ] : ix[1][0], ix[1][1]
1075 fp12 = __PS_MULS0(fp12, fp0);
1076
1077 // [ ix00 ][ ix01 ] : Store fp13
1078 //invX[0][0] = fp13[0];
1079 //invX[0][1] = fp13[1];
1080 __PSQ_STX(invX, 0, fp13, 0, 0);
1081
1082 // fp11[ ( 01*12 - 11*02 ) * rdet ][ ( 10*02 - 00*12 ) * rdet ] : ix[2][0], ix[2][1]
1083 fp11 = __PS_MULS0(fp11, fp0);
1084
1085 // [ ix10 ][ ix11 ] : Store fp12
1086 //invX[1][0] = fp12[0];
1087 //invX[1][1] = fp12[1];
1088 __PSQ_STX(invX, 16, fp12, 0, 0);
1089
1090 // fp10[ ( 10*21 - 20*11 ) * rdet ] : i[0][2]
1091 fp10 = __PS_MULS0(fp10, fp0);
1092
1093 // [ ix20 ][ ix21 ] : Store fp11
1094 //invX[2][0] = fp11[0];
1095 //invX[2][1] = fp11[1];
1096 __PSQ_STX(invX, 32, fp11, 0, 0);
1097
1098 // fp9 [ ( 20*01 - 00*21 ) * rdet ] : i[1][2]
1099 fp9 = __PS_MULS0(fp9, fp0);
1100
1101 // [ ix02 ] : Store fp10
1102 //invX[0][2] = fp10[0];
1103 __PSQ_STX(invX, 8, fp10, 1, 0);
1104
1105 // fp8 [ ( 00*11 - 10*01 ) * rdet ] : i[2][2]
1106 fp8 = __PS_MULS0(fp8, fp0);
1107
1108 // [ ix12 ] : Store fp9
1109 //invX[1][2] = fp9[0];
1110 __PSQ_STX(invX, 24, fp9, 1, 0);
1111
1112 // [ ix22 ] : Store fp8
1113 //invX[2][2] = fp8[0];
1114 __PSQ_STX(invX, 40, fp8, 1, 0);
1115
1116 return 1;
1117 }
1118 #endif
1119
1120 /*---------------------------------------------------------------------*
1121
1122
1123 MODEL SECTION
1124
1125
1126 *---------------------------------------------------------------------*/
1127
1128 /*---------------------------------------------------------------------*
1129
1130 Name: MTXRotDeg
1131
1132 Description: sets a rotation matrix about one of the X, Y or Z axes
1133
1134 Arguments: m matrix to be set
1135
1136 axis major axis about which to rotate.
1137 axis is passed in as a character.
1138 it must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1139
1140 deg rotation angle in degrees.
1141
1142 note: counter-clockwise rotation is positive.
1143
1144 Return: none
1145
1146 *---------------------------------------------------------------------*/
1147
1148 /*---------------------------------------------------------------------*
1149
1150 Name: MTXRotRad
1151
1152 Description: sets a rotation matrix about one of the X, Y or Z axes
1153
1154 Arguments: m matrix to be set
1155
1156 axis major axis about which to rotate.
1157 axis is passed in as a character.
1158 it must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1159
1160 deg rotation angle in radians.
1161
1162 note: counter-clockwise rotation is positive.
1163
1164 Return: none
1165
1166 *---------------------------------------------------------------------*/
1167 /*---------------------------------------------------------------------*
1168 C version
1169 *---------------------------------------------------------------------*/
C_MTXRotRad(Mtx m,char axis,f32 rad)1170 void C_MTXRotRad ( Mtx m, char axis, f32 rad )
1171 {
1172
1173 f32 sinA, cosA;
1174
1175 ASSERTMSG( (m != 0), MTX_ROTRAD_1 );
1176
1177 // verification of "axis" will occur in MTXRotTrig
1178
1179 sinA = sinf(rad);
1180 cosA = cosf(rad);
1181
1182 C_MTXRotTrig( m, axis, sinA, cosA );
1183 }
1184
1185 #if !defined(WIN32) && !defined(WIN64)
1186 /*---------------------------------------------------------------------*
1187 Paired-Single intrinsics version
1188 *---------------------------------------------------------------------*
1189 Note that this performs NO error checking.
1190 *---------------------------------------------------------------------*/
PSMTXRotRad(Mtx m,char axis,f32 rad)1191 void PSMTXRotRad ( Mtx m, char axis, f32 rad )
1192 {
1193 f32 sinA, cosA;
1194
1195 sinA = sinf(rad);
1196 cosA = cosf(rad);
1197
1198 PSMTXRotTrig( m, axis, sinA, cosA );
1199 }
1200 #endif
1201
1202 /*---------------------------------------------------------------------*
1203
1204 Name: MTXRotTrig
1205
1206 Description: sets a rotation matrix about one of the X, Y or Z axes
1207 from specified trig ratios
1208
1209 Arguments: m matrix to be set
1210
1211 axis major axis about which to rotate.
1212 axis is passed in as a character.
1213 It must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1214
1215 sinA sine of rotation angle.
1216
1217 cosA cosine of rotation angle.
1218
1219 note: counter-clockwise rotation is positive.
1220
1221 Return: none
1222
1223 *---------------------------------------------------------------------*/
1224 /*---------------------------------------------------------------------*
1225 C version
1226 *---------------------------------------------------------------------*/
C_MTXRotTrig(Mtx m,char axis,f32 sinA,f32 cosA)1227 void C_MTXRotTrig ( Mtx m, char axis, f32 sinA, f32 cosA )
1228 {
1229 ASSERTMSG( (m != 0), MTX_ROTTRIG_1 );
1230
1231 switch(axis)
1232 {
1233
1234 case 'x':
1235 case 'X':
1236 m[0][0] = 1.0f; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = 0.0f;
1237 m[1][0] = 0.0f; m[1][1] = cosA; m[1][2] = -sinA; m[1][3] = 0.0f;
1238 m[2][0] = 0.0f; m[2][1] = sinA; m[2][2] = cosA; m[2][3] = 0.0f;
1239 break;
1240
1241 case 'y':
1242 case 'Y':
1243 m[0][0] = cosA; m[0][1] = 0.0f; m[0][2] = sinA; m[0][3] = 0.0f;
1244 m[1][0] = 0.0f; m[1][1] = 1.0f; m[1][2] = 0.0f; m[1][3] = 0.0f;
1245 m[2][0] = -sinA; m[2][1] = 0.0f; m[2][2] = cosA; m[2][3] = 0.0f;
1246 break;
1247
1248 case 'z':
1249 case 'Z':
1250 m[0][0] = cosA; m[0][1] = -sinA; m[0][2] = 0.0f; m[0][3] = 0.0f;
1251 m[1][0] = sinA; m[1][1] = cosA; m[1][2] = 0.0f; m[1][3] = 0.0f;
1252 m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = 1.0f; m[2][3] = 0.0f;
1253 break;
1254
1255 default:
1256 ASSERTMSG( 0, MTX_ROTTRIG_2 );
1257 break;
1258
1259 }
1260 }
1261
1262 #if !defined(WIN32) && !defined(WIN64)
1263 /*---------------------------------------------------------------------*
1264 Paired-Single intrinsics version
1265 *---------------------------------------------------------------------*
1266 Note that this performs NO error checking.
1267 *---------------------------------------------------------------------*/
PSMTXRotTrig(Mtx m,char axis,f32 sinA,f32 cosA)1268 void PSMTXRotTrig ( Mtx m, char axis, f32 sinA, f32 cosA )
1269 {
1270 f32x2 nsinA;
1271 f32x2 fw0, fw1, fw2, fw3;
1272 f32x2 sinA10 = {sinA, 0.0f};
1273 f32x2 cosA10 = {cosA, 0.0f};
1274
1275 //ps_neg nsinA, sinA
1276 nsinA = __PS_NEG(sinA10);
1277
1278 switch(axis)
1279 {
1280 case 'x':
1281 case 'X':
1282 //psq_st fc1, 0(m), 1, 0
1283 __PSQ_ST(m, c11, 1, 0);
1284
1285 //psq_st fc0, 4(m), 0, 0
1286 __PSQ_STX(m, 4, c00, 0, 0);
1287
1288 //ps_merge00 fw0, sinA, cosA
1289 fw0 = __PS_MERGE00(sinA10, cosA10);
1290
1291 //psq_st fc0, 12(m), 0, 0
1292 __PSQ_STX(m, 12, c00, 0, 0);
1293
1294 //ps_merge00 fw1, cosA, nsinA
1295 fw1 = __PS_MERGE00(cosA10, nsinA);
1296
1297 //psq_st fc0, 28(m), 0, 0
1298 __PSQ_STX(m, 28, c00, 0, 0);
1299
1300 //psq_st fc0, 44(m), 1, 0
1301 __PSQ_STX(m, 44, c00, 1, 0);
1302
1303 //psq_st fw0, 36(m), 0, 0
1304 __PSQ_STX(m, 36, fw0, 0, 0);
1305
1306 //psq_st fw1, 20(m), 0, 0
1307 __PSQ_STX(m, 20, fw1, 0, 0);
1308
1309 break;
1310
1311 case 'y':
1312 case 'Y':
1313 //ps_merge00 fw0, cosA, fc0
1314 fw0 = __PS_MERGE00(cosA10, c00);
1315
1316 //ps_merge00 fw1, fc0, fc1
1317 fw1 = __PS_MERGE00(c00, c11);
1318
1319 //psq_st fc0, 24(m), 0, 0
1320 __PSQ_STX(m, 24, c00, 0, 0);
1321
1322 //psq_st fw0, 0(m), 0, 0
1323 __PSQ_ST(m, fw0, 0, 0);
1324
1325 //ps_merge00 fw2, nsinA, fc0
1326 fw2 = __PS_MERGE00(nsinA, c00);
1327
1328 //ps_merge00 fw3, sinA, fc0
1329 fw3 = __PS_MERGE00(sinA10, c00);
1330
1331 //psq_st fw0, 40(m), 0, 0;
1332 __PSQ_STX(m, 40, fw0, 0, 0);
1333
1334 //psq_st fw1, 16(m), 0, 0;
1335 __PSQ_STX(m, 16, fw1, 0, 0);
1336
1337 //psq_st fw3, 8(m), 0, 0;
1338 __PSQ_STX(m, 8, fw3, 0, 0);
1339
1340 //psq_st fw2, 32(m), 0, 0;
1341 __PSQ_STX(m, 32, fw2, 0, 0);
1342
1343 break;
1344
1345 case 'z':
1346 case 'Z':
1347
1348 //psq_st fc0, 8(m), 0, 0
1349 __PSQ_STX(m, 8, c00, 0, 0);
1350
1351 //ps_merge00 fw0, sinA, cosA
1352 fw0 = __PS_MERGE00(sinA10, cosA10);
1353
1354 //ps_merge00 fw2, cosA, nsinA
1355 fw2 = __PS_MERGE00(cosA10, nsinA);
1356
1357 //psq_st fc0, 24(m), 0, 0
1358 __PSQ_STX(m, 24, c00, 0, 0);
1359
1360 //psq_st fc0, 32(m), 0, 0
1361 __PSQ_STX(m, 32, c00, 0, 0);
1362
1363 //ps_merge00 fw1, fc1, fc0
1364 fw1 = __PS_MERGE00(c11, c00);
1365
1366 //psq_st fw0, 16(m), 0, 0
1367 __PSQ_STX(m, 16, fw0, 0, 0);
1368
1369 //psq_st fw2, 0(m), 0, 0
1370 __PSQ_ST(m, fw2, 0, 0);
1371
1372 //psq_st fw1, 40(m), 0, 0
1373 __PSQ_STX(m, 40, fw1, 0, 0);
1374
1375 break;
1376
1377 default:
1378 ASSERTMSG( 0, MTX_ROTTRIG_2 );
1379 break;
1380 }
1381 }
1382 #endif
1383
1384 /*---------------------------------------------------------------------*
1385
1386 Name: MTXRotAxisRad
1387
1388 Description: sets a rotation matrix about an arbitrary axis
1389
1390
1391 Arguments: m matrix to be set
1392
1393 axis ptr to a vector containing the x,y,z axis
1394 components.
1395 axis does not have to be a unit vector.
1396
1397 deg rotation angle in radians.
1398
1399 note: counter-clockwise rotation is positive.
1400
1401 Return: none
1402
1403 *---------------------------------------------------------------------*/
1404 /*---------------------------------------------------------------------*
1405 C version
1406 *---------------------------------------------------------------------*/
C_MTXRotAxisRad(Mtx m,const Vec * axis,f32 rad)1407 void C_MTXRotAxisRad( Mtx m, const Vec *axis, f32 rad )
1408 {
1409 Vec vN;
1410 f32 s, c; // sinTheta, cosTheta
1411 f32 t; // ( 1 - cosTheta )
1412 f32 x, y, z; // x, y, z components of normalized axis
1413 f32 xSq, ySq, zSq; // x, y, z squared
1414
1415 ASSERTMSG( (m != 0), MTX_ROTAXIS_1 );
1416 ASSERTMSG( (axis != 0), MTX_ROTAXIS_2 );
1417
1418 s = sinf(rad);
1419 c = cosf(rad);
1420 t = 1.0f - c;
1421
1422 C_VECNormalize( axis, &vN );
1423
1424 x = vN.x;
1425 y = vN.y;
1426 z = vN.z;
1427
1428 xSq = x * x;
1429 ySq = y * y;
1430 zSq = z * z;
1431
1432 m[0][0] = ( t * xSq ) + ( c );
1433 m[0][1] = ( t * x * y ) - ( s * z );
1434 m[0][2] = ( t * x * z ) + ( s * y );
1435 m[0][3] = 0.0f;
1436
1437 m[1][0] = ( t * x * y ) + ( s * z );
1438 m[1][1] = ( t * ySq ) + ( c );
1439 m[1][2] = ( t * y * z ) - ( s * x );
1440 m[1][3] = 0.0f;
1441
1442 m[2][0] = ( t * x * z ) - ( s * y );
1443 m[2][1] = ( t * y * z ) + ( s * x );
1444 m[2][2] = ( t * zSq ) + ( c );
1445 m[2][3] = 0.0f;
1446 }
1447
1448 #if !defined(WIN32) && !defined(WIN64)
1449 /*---------------------------------------------------------------------*
1450 Paired-Single intrinsics version
1451 *---------------------------------------------------------------------*
1452 Note that this performs NO error checking.
1453 *---------------------------------------------------------------------*/
_PSMTXRotAxisRadInternal(Mtx m,const Vec * axis,f32 sT,f32 cT)1454 static void _PSMTXRotAxisRadInternal(
1455 Mtx m,
1456 const Vec *axis,
1457 f32 sT,
1458 f32 cT )
1459 {
1460 f32x2 tT, sT2, cT2;
1461 f32x2 tmp0, tmp1, tmp2, tmp3, tmp4;
1462 f32x2 tmp5, tmp6, tmp7, tmp9, tmp8;
1463
1464 // tmp0 = [x][y] : LOAD
1465 //psq_l tmp0, 0(axis), 0, 0
1466 //tmp0[0] = axis->x;
1467 //tmp0[1] = axis->y;
1468 tmp0 = __PSQ_L(axis, 0, 0);
1469
1470 // tmp1 = [z][z] : LOAD
1471 tmp1[0] = axis->z;
1472 tmp1[1] = axis->z;
1473
1474 // tmp2 = [x*x][y*y]
1475 tmp2 = __PS_MUL(tmp0, tmp0);
1476
1477 // tmp3 = [x*x+z*z][y*y+z*z]
1478 tmp3 = __PS_MADD(tmp1, tmp1, tmp2);
1479
1480 // tmp4 = [S = x*x+y*y+z*z][z]
1481 tmp4 = __PS_SUM0(tmp3, tmp1, tmp2);
1482
1483 // tT = 1.0F - cT
1484 tT[0] = tT[1] = 1.0f - cT;
1485
1486 // tmp5 = [1.0/sqrt(S)] :estimation[E]
1487 tmp5[0] = tmp5[1] = __FRSQRTE(tmp4[0]);
1488
1489 // Newton-Rapson refinement step
1490 // E' = E/2(3.0 - E*E*S)
1491 tmp2 = __PS_MUL(tmp5, tmp5); // E*E
1492 tmp3 = __PS_MUL(tmp5, c0505); // E/2
1493 tmp2 = __PS_NMSUB(tmp2, tmp4, c33); // (3-E*E*S)
1494 tmp5 = __PS_MUL(tmp2, tmp3); // (E/2)(3-E*E*S)
1495
1496 // cT = [c][c]
1497 cT2[0] = cT2[1] = cT;
1498
1499 // sT = [c][c]
1500 sT2[0] = sT2[1] = sT;
1501
1502 // tmp0 = [nx = x/sqrt(S)][ny = y/sqrt(S)]
1503 tmp0 = __PS_MULS0(tmp0, tmp5);
1504
1505 // tmp1 = [nz = z/sqrt(S)][nz = z/sqrt(S)]
1506 tmp1 = __PS_MULS0(tmp1, tmp5);
1507
1508 // tmp4 = [t*nx][t*ny]
1509 tmp4 = __PS_MULS0(tmp0, tT);
1510
1511 // tmp9 = [s*nx][s*ny]
1512 tmp9 = __PS_MULS0(tmp0, sT2);
1513
1514 // tmp5 = [t*nz][t*nz]
1515 tmp5 = __PS_MULS0(tmp1, tT);
1516
1517 // tmp3 = [t*nx*ny][t*ny*ny]
1518 tmp3 = __PS_MULS1(tmp4, tmp0);
1519
1520 // tmp2 = [t*nx*nx][t*ny*nx]
1521 tmp2 = __PS_MULS0(tmp4, tmp0);
1522
1523 // tmp4 = [t*nx*nz][t*ny*nz]
1524 tmp4 = __PS_MULS0(tmp4, tmp1);
1525
1526 // tmp6 = [t*nx*nx-s*nz][t*ny*ny-s*nz]
1527 tmp6 = __PS_NMSUB(tmp1, sT2, tmp2);
1528
1529 // tmp7 = [t*nx*ny+s*nz][t*ny*ny+s*nz]
1530 tmp7 = __PS_MADD(tmp1, sT2, tmp3);
1531
1532 // tmp0 = [-s*nx][-s*ny]
1533 tmp0 = __PS_NEG(tmp9);
1534
1535 // tmp8 = [t*nx*nz+s*ny][0] == [m02][m03]
1536 tmp8 = __PS_SUM0(tmp4, c00, tmp9);
1537
1538 // tmp2 = [t*nx*nx+c][t*nx*ny-s*nz] == [m00][m01]
1539 tmp2 = __PS_SUM0(tmp2, tmp6, cT2);
1540
1541 // tmp3 = [t*nx*ny+s*nz][t*ny*ny+c] == [m10][m11]
1542 tmp3 = __PS_SUM1(cT2, tmp7, tmp3);
1543
1544 // tmp6 = [t*ny*nz-s*nx][0] == [m12][m13]
1545 tmp6 = __PS_SUM0(tmp0, c00 ,tmp4);
1546
1547 // tmp8 [m02][m03] : STORE
1548 //psq_st tmp8, 8(m), 0, 0
1549 //m[0][2] = tmp8[0];
1550 //m[0][3] = tmp8[1];
1551 __PSQ_STX(m, 8, tmp8, 0, 0);
1552
1553 // tmp0 = [t*nx*nz-s*ny][t*ny*nz]
1554 tmp0 = __PS_SUM0(tmp4, tmp4, tmp0);
1555
1556 // tmp2 [m00][m01] : STORE
1557 //psq_st tmp2, 0(m), 0, 0
1558 //m[0][0] = tmp2[0];
1559 //m[0][1] = tmp2[1];
1560 __PSQ_STX(m, 0, tmp2, 0, 0);
1561
1562 // tmp5 = [t*nz*nz][t*nz*nz]
1563 tmp5 = __PS_MULS0(tmp5, tmp1);
1564
1565 // tmp3 [m10][m11] : STORE
1566 //psq_st tmp3, 16(m), 0, 0
1567 //m[1][0] = tmp3[0];
1568 //m[1][1] = tmp3[1];
1569 __PSQ_STX(m, 16, tmp3, 0, 0);
1570
1571 // tmp4 = [t*nx*nz-s*ny][t*ny*nz+s*nx] == [m20][m21]
1572 tmp4 = __PS_SUM1(tmp9, tmp0, tmp4);
1573
1574 // tmp6 [m12][m13] : STORE
1575 //psq_st tmp6, 24(m), 0, 0
1576 //m[1][2] = tmp6[0];
1577 //m[1][3] = tmp6[1];
1578 __PSQ_STX(m, 24, tmp6, 0, 0);
1579
1580 // tmp5 = [t*nz*nz+c][0] == [m22][m23]
1581 tmp5 = __PS_SUM0(tmp5, c00, cT2);
1582
1583 // tmp4 [m20][m21] : STORE
1584 //psq_st tmp4, 32(m), 0, 0
1585 //m[2][0] = tmp4[0];
1586 //m[2][1] = tmp4[1];
1587 __PSQ_STX(m, 32, tmp4, 0, 0);
1588
1589 // tmp5 [m22][m23] : STORE
1590 //psq_st tmp5, 40(m), 0, 0
1591 //m[2][2] = tmp5[0];
1592 //m[2][3] = tmp5[1];
1593 __PSQ_STX(m, 40, tmp5, 0, 0);
1594 }
1595
1596 /*---------------------------------------------------------------------*
1597 Paired-Single intrinsics version
1598 *---------------------------------------------------------------------*
1599 Note that this performs NO error checking.
1600 *---------------------------------------------------------------------*/
PSMTXRotAxisRad(Mtx m,const Vec * axis,f32 rad)1601 void PSMTXRotAxisRad(
1602 Mtx m,
1603 const Vec *axis,
1604 f32 rad )
1605 {
1606 f32 sinT, cosT;
1607
1608 sinT = sinf(rad);
1609 cosT = cosf(rad);
1610
1611 _PSMTXRotAxisRadInternal(m, axis, sinT, cosT);
1612 }
1613 #endif
1614
1615 /*---------------------------------------------------------------------*
1616
1617 Name: MTXTrans
1618
1619 Description: sets a translation matrix.
1620
1621 Arguments: m matrix to be set
1622
1623 xT x component of translation.
1624
1625 yT y component of translation.
1626
1627 zT z component of translation.
1628
1629 Return: none
1630
1631 *---------------------------------------------------------------------*/
1632 /*---------------------------------------------------------------------*
1633 C version
1634 *---------------------------------------------------------------------*/
C_MTXTrans(Mtx m,f32 xT,f32 yT,f32 zT)1635 void C_MTXTrans ( Mtx m, f32 xT, f32 yT, f32 zT )
1636 {
1637 ASSERTMSG( (m != 0), MTX_TRANS_1 );
1638
1639 m[0][0] = 1.0f; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = xT;
1640 m[1][0] = 0.0f; m[1][1] = 1.0f; m[1][2] = 0.0f; m[1][3] = yT;
1641 m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = 1.0f; m[2][3] = zT;
1642 }
1643
1644 #if !defined(WIN32) && !defined(WIN64)
1645 /*---------------------------------------------------------------------*
1646 Paired-Single intrinsics version
1647 *---------------------------------------------------------------------*
1648 Note that this performs NO error checking.
1649 *---------------------------------------------------------------------*/
PSMTXTrans(Mtx m,f32 xT,f32 yT,f32 zT)1650 void PSMTXTrans( Mtx m, f32 xT, f32 yT, f32 zT )
1651 {
1652 f32x2 xT2 = {0.0F, xT};
1653 f32x2 yT2 = {0.0F, yT};
1654 f32x2 zT2 = {1.0F, zT};
1655 __PSQ_ST(m, c10, 0, 0);
1656 __PSQ_STX(m, 8, xT2, 0, 0);
1657 __PSQ_STX(m, 16, c01, 0, 0);
1658 __PSQ_STX(m, 24, yT2, 0, 0);
1659 __PSQ_STX(m, 32, c00, 0, 0);
1660 __PSQ_STX(m, 40, zT2, 0, 0);
1661 }
1662 #endif
1663
1664 /*---------------------------------------------------------------------*
1665
1666 Name: MTXTransApply
1667
1668 Description: This function performs the operation equivalent to
1669 MTXTrans + MTXConcat.
1670
1671 Arguments: src matrix to be operated.
1672
1673 dst resultant matrix from concat.
1674
1675 xT x component of translation.
1676
1677 yT y component of translation.
1678
1679 zT z component of translation.
1680
1681 Return: none
1682
1683 *---------------------------------------------------------------------*/
1684 /*---------------------------------------------------------------------*
1685 C version
1686 *---------------------------------------------------------------------*/
C_MTXTransApply(MTX_CONST Mtx src,Mtx dst,f32 xT,f32 yT,f32 zT)1687 void C_MTXTransApply ( MTX_CONST Mtx src, Mtx dst, f32 xT, f32 yT, f32 zT )
1688 {
1689 ASSERTMSG( (src != 0), MTX_TRANSAPPLY_1 );
1690 ASSERTMSG( (dst != 0), MTX_TRANSAPPLY_1 );
1691
1692 if ( src != dst )
1693 {
1694 dst[0][0] = src[0][0]; dst[0][1] = src[0][1]; dst[0][2] = src[0][2];
1695 dst[1][0] = src[1][0]; dst[1][1] = src[1][1]; dst[1][2] = src[1][2];
1696 dst[2][0] = src[2][0]; dst[2][1] = src[2][1]; dst[2][2] = src[2][2];
1697 }
1698
1699 dst[0][3] = src[0][3] + xT;
1700 dst[1][3] = src[1][3] + yT;
1701 dst[2][3] = src[2][3] + zT;
1702 }
1703
1704 #if !defined(WIN32) && !defined(WIN64)
1705 /*---------------------------------------------------------------------*
1706 Paired-Single intrinsics version
1707 *---------------------------------------------------------------------*
1708 Note that this performs NO error checking.
1709 *---------------------------------------------------------------------*/
PSMTXTransApply(Mtx src,Mtx dst,f32 xT,f32 yT,f32 zT)1710 void PSMTXTransApply( Mtx src, Mtx dst, f32 xT, f32 yT, f32 zT )
1711 {
1712 f32x2 fp4, fp5, fp6, fp7, fp8, fp9;
1713 f32x2 xT10 = {xT, 0.0f};
1714 f32x2 yT10 = {yT, 0.0f};
1715 f32x2 zT10 = {zT, 0.0f};
1716
1717 //psq_l fp4, 0(src), 0, 0;
1718 fp4 = __PSQ_L(src, 0, 0);
1719
1720 //frsp xT, xT; // to make sure xT = single precision
1721 //psq_l fp5, 8(src), 0, 0;
1722 fp5 = __PSQ_LX(src, 8, 0, 0);
1723
1724 //frsp yT, yT; // to make sure yT = single precision
1725 //psq_l fp7, 24(src), 0, 0;
1726 fp7 = __PSQ_LX(src, 24, 0, 0);
1727
1728 //frsp zT, zT; // to make sure zT = single precision
1729 //psq_l fp8, 40(src), 0, 0;
1730 fp8 = __PSQ_LX(src, 40, 0, 0);
1731
1732 //psq_st fp4, 0(dst), 0, 0;
1733 __PSQ_ST(dst, fp4, 0, 0);
1734
1735 //ps_sum1 fp5, xT, fp5, fp5;
1736 fp5 = __PS_SUM1(xT10, fp5, fp5);
1737
1738 //psq_l fp6, 16(src), 0, 0;
1739 fp6 = __PSQ_LX(src, 16, 0, 0);
1740
1741 //psq_st fp5, 8(dst), 0, 0;
1742 __PSQ_STX(dst, 8, fp5, 0, 0);
1743
1744 //ps_sum1 fp7, yT, fp7, fp7;
1745 fp7 = __PS_SUM1(yT10, fp7, fp7);
1746
1747 //psq_l fp9, 32(src), 0, 0;
1748 fp9 = __PSQ_LX(src, 32, 0, 0);
1749
1750 //psq_st fp6, 16(dst), 0, 0;
1751 __PSQ_STX(dst, 16, fp6, 0, 0);
1752
1753 //ps_sum1 fp8, zT, fp8, fp8;
1754 fp8 = __PS_SUM1(zT10, fp8, fp8);
1755
1756 //psq_st fp7, 24(dst), 0, 0;
1757 __PSQ_STX(dst, 24, fp7, 0, 0);
1758
1759 //psq_st fp9, 32(dst), 0, 0;
1760 __PSQ_STX(dst, 32, fp9, 0, 0);
1761
1762 //psq_st fp8, 40(dst), 0, 0;
1763 __PSQ_STX(dst, 40, fp8, 0, 0);
1764 }
1765 #endif
1766
1767 /*---------------------------------------------------------------------*
1768
1769 Name: MTXScale
1770
1771 Description: sets a scaling matrix.
1772
1773
1774 Arguments: m matrix to be set
1775
1776 xS x scale factor.
1777
1778 yS y scale factor.
1779
1780 zS z scale factor.
1781
1782 Return: none
1783
1784 *---------------------------------------------------------------------*/
1785 /*---------------------------------------------------------------------*
1786 C version
1787 *---------------------------------------------------------------------*/
C_MTXScale(Mtx m,f32 xS,f32 yS,f32 zS)1788 void C_MTXScale ( Mtx m, f32 xS, f32 yS, f32 zS )
1789 {
1790 ASSERTMSG( (m != 0), MTX_SCALE_1 );
1791
1792
1793 m[0][0] = xS; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = 0.0f;
1794 m[1][0] = 0.0f; m[1][1] = yS; m[1][2] = 0.0f; m[1][3] = 0.0f;
1795 m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = zS; m[2][3] = 0.0f;
1796 }
1797
1798 #if !defined(WIN32) && !defined(WIN64)
1799 /*---------------------------------------------------------------------*
1800 Paired-Single intrinsics version
1801 *---------------------------------------------------------------------*
1802 Note that this performs NO error checking.
1803 *---------------------------------------------------------------------*/
PSMTXScale(Mtx m,f32 xS,f32 yS,f32 zS)1804 void PSMTXScale( Mtx m, f32 xS, f32 yS, f32 zS )
1805 {
1806 f32x2 xS2 = {xS, 0.0F};
1807 f32x2 yS2 = {0.0F, yS};
1808 f32x2 zS2 = {zS, 0.0F};
1809
1810 __PSQ_ST(m, xS2, 0, 0);
1811 __PSQ_STX(m, 8, c00, 0, 0);
1812 __PSQ_STX(m, 16, yS2, 0, 0);
1813 __PSQ_STX(m, 24, c00, 0, 0);
1814 __PSQ_STX(m, 32, c00, 0, 0);
1815 __PSQ_STX(m, 40, zS2, 0, 0);
1816 }
1817 #endif
1818
1819 /*---------------------------------------------------------------------*
1820
1821 Name: MTXScaleApply
1822
1823 Description: This function performs the operation equivalent to
1824 MTXScale + MTXConcat
1825
1826 Arguments: src matrix to be operated.
1827
1828 dst resultant matrix from concat.
1829
1830 xS x scale factor.
1831
1832 yS y scale factor.
1833
1834 zS z scale factor.
1835
1836 Return: none
1837
1838 *---------------------------------------------------------------------*/
1839 /*---------------------------------------------------------------------*
1840 C version
1841 *---------------------------------------------------------------------*/
C_MTXScaleApply(MTX_CONST Mtx src,Mtx dst,f32 xS,f32 yS,f32 zS)1842 void C_MTXScaleApply ( MTX_CONST Mtx src, Mtx dst, f32 xS, f32 yS, f32 zS )
1843 {
1844 ASSERTMSG( (src != 0), MTX_SCALEAPPLY_1 );
1845 ASSERTMSG( (dst != 0), MTX_SCALEAPPLY_2 );
1846
1847 dst[0][0] = src[0][0] * xS; dst[0][1] = src[0][1] * xS;
1848 dst[0][2] = src[0][2] * xS; dst[0][3] = src[0][3] * xS;
1849
1850 dst[1][0] = src[1][0] * yS; dst[1][1] = src[1][1] * yS;
1851 dst[1][2] = src[1][2] * yS; dst[1][3] = src[1][3] * yS;
1852
1853 dst[2][0] = src[2][0] * zS; dst[2][1] = src[2][1] * zS;
1854 dst[2][2] = src[2][2] * zS; dst[2][3] = src[2][3] * zS;
1855 }
1856
1857 #if !defined(WIN32) && !defined(WIN64)
1858 /*---------------------------------------------------------------------*
1859 Paired-Single intrinsics version
1860 *---------------------------------------------------------------------*
1861 Note that this performs NO error checking.
1862 *---------------------------------------------------------------------*/
1863
PSMTXScaleApply(MTX_CONST Mtx src,Mtx dst,f32 xS,f32 yS,f32 zS)1864 void PSMTXScaleApply ( MTX_CONST Mtx src, Mtx dst, f32 xS, f32 yS, f32 zS )
1865 {
1866 //f32x2 fp0;
1867 //f32x2 fp1;
1868 f32x2 fp2;
1869 //f32x2 fp3;
1870 f32x2 fp4;
1871 f32x2 fp5;
1872
1873 f32x2 fp6;
1874 f32x2 fp7;
1875 f32x2 fp8;
1876 //f32x2 fp9;
1877 //f32x2 fp10;
1878 //f32x2 fp11;
1879
1880 f32x2 xS2 = {xS, xS};
1881 f32x2 yS2 = {yS, yS};
1882 f32x2 zS2 = {zS, zS};
1883
1884 //psq_l fp4, 0(src), 0, 0;
1885 fp4 = __PSQ_LX(src, 0, 0, 0);
1886
1887 //psq_l fp5, 8(src), 0, 0;
1888 fp5 = __PSQ_LX(src, 8, 0, 0);
1889
1890 //ps_muls0 fp4, fp4, xS;
1891 fp4 = __PS_MUL(fp4, xS2);
1892
1893 //psq_l fp6, 16(src), 0, 0;
1894 fp6 = __PSQ_LX(src, 16, 0, 0);
1895
1896 //ps_muls0 fp5, fp5, xS;
1897 fp5 = __PS_MUL(fp5, xS2);
1898
1899 //psq_l fp7, 24(src), 0, 0;
1900 fp7 = __PSQ_LX(src, 24, 0, 0);
1901
1902 //ps_muls0 fp6, fp6, yS;
1903 fp6 = __PS_MUL(fp6, yS2);
1904
1905 //psq_l fp8, 32(src), 0, 0;
1906 fp8 = __PSQ_LX(src, 32, 0, 0);
1907
1908 //psq_st fp4, 0(dst), 0, 0;
1909 __PSQ_STX(dst, 0, fp4, 0, 0);
1910
1911 //ps_muls0 fp7, fp7, yS;
1912 fp7 = __PS_MUL(fp7, yS2);
1913
1914 //psq_l fp2, 40(src), 0, 0;
1915 fp2 = __PSQ_LX(src, 40, 0, 0);
1916
1917 //psq_st fp5, 8(dst), 0, 0;
1918 __PSQ_STX(dst, 8, fp5, 0, 0);
1919
1920 //ps_muls0 fp8, fp8, zS;
1921 fp8 = __PS_MUL(fp8, zS2);
1922
1923 //psq_st fp6, 16(dst), 0, 0;
1924 __PSQ_STX(dst, 16, fp6, 0, 0);
1925
1926 //ps_muls0 fp2, fp2, zS;
1927 fp2 = __PS_MUL(fp2, zS2);
1928
1929 //psq_st fp7, 24(dst), 0, 0;
1930 __PSQ_STX(dst, 24, fp7, 0, 0);
1931
1932 //psq_st fp8, 32(dst), 0, 0;
1933 __PSQ_STX(dst, 32, fp8, 0, 0);
1934
1935 //psq_st fp2, 40(dst), 0, 0;
1936 __PSQ_STX(dst, 40, fp2, 0, 0);
1937
1938 }
1939 #endif
1940
1941 /*---------------------------------------------------------------------*
1942
1943 Name: MTXReflect
1944
1945 Description: reflect a rotation matrix with respect to a plane.
1946
1947 Arguments: m matrix to be set
1948
1949 p point on the planar reflector.
1950
1951 n normal of the planar reflector.
1952
1953 Return: none
1954
1955 *---------------------------------------------------------------------*/
1956 /*---------------------------------------------------------------------*
1957 C version
1958 *---------------------------------------------------------------------*/
C_MTXReflect(Mtx m,const Vec * p,const Vec * n)1959 void C_MTXReflect ( Mtx m, const Vec *p, const Vec *n )
1960 {
1961 f32 vxy, vxz, vyz, pdotn;
1962
1963 vxy = -2.0f * n->x * n->y;
1964 vxz = -2.0f * n->x * n->z;
1965 vyz = -2.0f * n->y * n->z;
1966 pdotn = 2.0f * C_VECDotProduct(p, n);
1967
1968 m[0][0] = 1.0f - 2.0f * n->x * n->x;
1969 m[0][1] = vxy;
1970 m[0][2] = vxz;
1971 m[0][3] = pdotn * n->x;
1972
1973 m[1][0] = vxy;
1974 m[1][1] = 1.0f - 2.0f * n->y * n->y;
1975 m[1][2] = vyz;
1976 m[1][3] = pdotn * n->y;
1977
1978 m[2][0] = vxz;
1979 m[2][1] = vyz;
1980 m[2][2] = 1.0f - 2.0f * n->z * n->z;
1981 m[2][3] = pdotn * n->z;
1982 }
1983
1984 #if !defined(WIN32) && !defined(WIN64)
1985 /*---------------------------------------------------------------------*
1986 Paired-Single intrinsics version
1987 *---------------------------------------------------------------------*/
PSMTXReflect(Mtx m,const Vec * p,const Vec * n)1988 void PSMTXReflect ( Mtx m, const Vec *p, const Vec *n )
1989 {
1990 f32x2 vn_xy, vn_z1, n2vn_xy, n2vn_z1, pdotn;
1991 f32x2 tmp0, tmp1, tmp2, tmp3;
1992 f32x2 tmp4, tmp5, tmp6, tmp7;
1993
1994 // vn_z1 = [nz][1.0F] : LOAD
1995 //vn_z1[0] = n->z;
1996 //vn_z1[1] = 1.0F;
1997 vn_z1 = __PSQ_LX(n, 8, 1, 0);
1998
1999 // vn_xy = [nx][ny] : LOAD
2000 //vn_xy[0] = n->x;
2001 //vn_xy[1] = n->y;
2002 vn_xy = __PSQ_LX(n, 0, 0, 0);
2003
2004 // tmp0 = [px][py] : LOAD
2005 //tmp0[0] = p->x;
2006 //tmp0[1] = p->y;
2007 tmp0 = __PSQ_LX(p, 0, 0, 0);
2008
2009 // n2vn_z1 = [-2nz][-2.0F]
2010 n2vn_z1 = __PS_NMADD(vn_z1, c11, vn_z1);
2011
2012 // tmp1 = [pz][1.0F] : LOAD
2013 //psq_l tmp1, 8(p), 1, 0
2014 //tmp1[0] = p->z;
2015 //tmp1[1] = 1.0F;
2016 tmp1 = __PSQ_LX(p, 8, 1, 0);
2017
2018 // n2vn_xy = [-2nx][-2ny]
2019 n2vn_xy = __PS_NMADD(vn_xy, c11, vn_xy);
2020
2021 // tmp4 = [-2nx*nz][-2ny*nz] : [m20][m21]
2022 tmp4 = __PS_MULS0(vn_xy, n2vn_z1);
2023
2024 // pdotn = [-2(px*nx)][-2(py*ny)]
2025 pdotn = __PS_MUL(n2vn_xy, tmp0);
2026
2027 // tmp2 = [-2nx*nx][-2nx*ny]
2028 tmp2 = __PS_MULS0(vn_xy, n2vn_xy);
2029
2030 // pdotn = [-2(px*nx+py*ny)][?]
2031 pdotn = __PS_SUM0(pdotn, pdotn, pdotn);
2032
2033 // tmp3 = [-2nx*ny][-2ny*ny]
2034 tmp3 = __PS_MULS1(vn_xy, n2vn_xy);
2035
2036 // tmp4 = [m20][m21] : STORE
2037 //m[2][0] = tmp4[0];
2038 //m[2][1] = tmp4[1];
2039 __PSQ_STX(m, 32, tmp4, 0, 0);
2040
2041 // tmp2 = [1-2nx*nx][-2nx*ny] : [m00][m01]
2042 tmp2 = __PS_SUM0(tmp2, tmp2, c11);
2043
2044 // pdotn = [2(px*nx+py*ny+pz*nz)][?]
2045 pdotn = __PS_NMADD(n2vn_z1, tmp1, pdotn);
2046
2047 // tmp3 = [-2nx*ny][1-2ny*ny] : [m10][m11]
2048 tmp3 = __PS_SUM1(c11, tmp3, tmp3);
2049
2050 // tmp2 = [m00][m01] : STORE
2051 //m[0][0] = tmp2[0];
2052 //m[0][1] = tmp2[1];
2053 __PSQ_STX(m, 0, tmp2, 0, 0);
2054
2055 // tmp5 = [pdotn*nx][pdotn*ny]
2056 tmp5 = __PS_MULS0(vn_xy, pdotn);
2057
2058 // tmp6 = [-2nz][pdotn]
2059 tmp6 = __PS_MERGE00(n2vn_z1, pdotn);
2060
2061 // tmp3 = [m10][m11] : STORE
2062 //m[1][0] = tmp3[0];
2063 //m[1][1] = tmp3[1];
2064 __PSQ_STX(m, 16, tmp3, 0, 0);
2065
2066 // tmp7 = [-2nx*nz][pdotn*nx] : [m02][m03]
2067 tmp7 = __PS_MERGE00(tmp4, tmp5);
2068
2069 // tmp6 = [-2nz*nz][pdotn*nz]
2070 tmp6 = __PS_MULS0(tmp6, vn_z1);
2071
2072 // tmp5 = [-2ny*nz][pdotn*ny] : [m12][m13]
2073 tmp5 = __PS_MERGE11(tmp4, tmp5);
2074
2075 // tmp7 = [m02][m03] : STORE
2076 //m[0][2] = tmp7[0];
2077 //m[0][3] = tmp7[1];
2078 __PSQ_STX(m, 8, tmp7, 0, 0);
2079
2080 // tmp6 = [1-2nz*nz][pdotn*nz] : [m22][m23]
2081 tmp6 = __PS_SUM0(tmp6, tmp6, c11);
2082
2083 // tmp5 = [m12][m13] : STORE
2084 //m[1][2] = tmp5[0];
2085 //m[1][3] = tmp5[1];
2086 __PSQ_STX(m, 24, tmp5, 0, 0);
2087
2088 // tmp6 = [m22][m23] : STORE
2089 //m[2][2] = tmp6[0];
2090 //m[2][3] = tmp6[1];
2091 __PSQ_STX(m, 40, tmp6, 0, 0);
2092 }
2093 #endif
2094
2095
2096 /*---------------------------------------------------------------------*
2097
2098 VIEW SECTION
2099
2100 *---------------------------------------------------------------------*/
2101
2102 /*---------------------------------------------------------------------*
2103
2104 Name: MTXLookAt
2105
2106 Description: compute a matrix to transform points to camera coordinates.
2107
2108 Arguments: m matrix to be set
2109
2110 camPos camera position.
2111
2112 camUp camera 'up' direction.
2113
2114 target camera aim point.
2115
2116 Return: none
2117
2118 *---------------------------------------------------------------------*/
2119 /*---------------------------------------------------------------------*
2120 C version
2121 *---------------------------------------------------------------------*/
C_MTXLookAt(Mtx m,const Point3d * camPos,const Vec * camUp,const Point3d * target)2122 void C_MTXLookAt ( Mtx m, const Point3d *camPos, const Vec *camUp, const Point3d *target )
2123 {
2124 Vec vLook,vRight,vUp;
2125
2126 ASSERTMSG( (m != 0), MTX_LOOKAT_1 );
2127 ASSERTMSG( (camPos != 0), MTX_LOOKAT_2 );
2128 ASSERTMSG( (camUp != 0), MTX_LOOKAT_3 );
2129 ASSERTMSG( (target != 0), MTX_LOOKAT_4 );
2130
2131 // compute unit target vector
2132 // use negative value to look down (-Z) axis
2133 vLook.x = camPos->x - target->x;
2134 vLook.y = camPos->y - target->y;
2135 vLook.z = camPos->z - target->z;
2136 VECNormalize( &vLook,&vLook );
2137
2138 // vRight = camUp x vLook
2139 VECCrossProduct ( camUp, &vLook, &vRight );
2140 VECNormalize( &vRight,&vRight );
2141
2142 // vUp = vLook x vRight
2143 VECCrossProduct( &vLook, &vRight, &vUp );
2144 // Don't need to normalize vUp since it should already be unit length
2145 // VECNormalize( &vUp, &vUp );
2146
2147 m[0][0] = vRight.x;
2148 m[0][1] = vRight.y;
2149 m[0][2] = vRight.z;
2150 m[0][3] = -( camPos->x * vRight.x + camPos->y * vRight.y + camPos->z * vRight.z );
2151
2152 m[1][0] = vUp.x;
2153 m[1][1] = vUp.y;
2154 m[1][2] = vUp.z;
2155 m[1][3] = -( camPos->x * vUp.x + camPos->y * vUp.y + camPos->z * vUp.z );
2156
2157 m[2][0] = vLook.x;
2158 m[2][1] = vLook.y;
2159 m[2][2] = vLook.z;
2160 m[2][3] = -( camPos->x * vLook.x + camPos->y * vLook.y + camPos->z * vLook.z );
2161 }
2162
2163 /*---------------------------------------------------------------------*
2164
2165
2166 TEXTURE PROJECTION SECTION
2167
2168
2169 *---------------------------------------------------------------------*/
2170
2171 /*---------------------------------------------------------------------*
2172
2173 Name: MTXLightFrustum
2174
2175 Description: Compute a 3x4 projection matrix for texture projection
2176
2177 Arguments: m 3x4 matrix to be set
2178
2179 t top coord. of view volume at the near clipping plane
2180
2181 b bottom coord of view volume at the near clipping plane
2182
2183 lf left coord. of view volume at near clipping plane
2184
2185 r right coord. of view volume at near clipping plane
2186
2187 n positive distance from camera to near clipping plane
2188
2189 scaleS scale in the S direction for projected coordinates
2190 (usually 0.5)
2191
2192 scaleT scale in the T direction for projected coordinates
2193 (usually 0.5)
2194
2195 transS translate in the S direction for projected coordinates
2196 (usually 0.5)
2197
2198 transT translate in the T direction for projected coordinates
2199 (usually 0.5)
2200
2201 Return: none.
2202
2203 *---------------------------------------------------------------------*/
2204 /*---------------------------------------------------------------------*
2205 C version
2206 *---------------------------------------------------------------------*/
C_MTXLightFrustum(Mtx m,float t,float b,float lf,float r,float n,float scaleS,float scaleT,float transS,float transT)2207 void C_MTXLightFrustum ( Mtx m, float t, float b, float lf, float r, float n,
2208 float scaleS, float scaleT, float transS,
2209 float transT )
2210 {
2211 f32 tmp;
2212
2213 ASSERTMSG( (m != 0), MTX_LIGHT_FRUSTUM_1 );
2214 ASSERTMSG( (t != b), MTX_LIGHT_FRUSTUM_2 );
2215 ASSERTMSG( (lf != r), MTX_LIGHT_FRUSTUM_3 );
2216
2217 tmp = 1.0f / (r - lf);
2218 m[0][0] = ((2*n) * tmp) * scaleS;
2219 m[0][1] = 0.0f;
2220 m[0][2] = (((r + lf) * tmp) * scaleS) - transS;
2221 m[0][3] = 0.0f;
2222
2223 tmp = 1.0f / (t - b);
2224 m[1][0] = 0.0f;
2225 m[1][1] = ((2*n) * tmp) * scaleT;
2226 m[1][2] = (((t + b) * tmp) * scaleT) - transT;
2227 m[1][3] = 0.0f;
2228
2229 m[2][0] = 0.0f;
2230 m[2][1] = 0.0f;
2231 m[2][2] = -1.0f;
2232 m[2][3] = 0.0f;
2233 }
2234
2235 /*---------------------------------------------------------------------*
2236
2237 Name: MTXLightPerspective
2238
2239 Description: compute a 3x4 perspective projection matrix from
2240 field of view and aspect ratio for texture projection.
2241
2242 Arguments: m 3x4 matrix to be set
2243
2244 fovy total field of view in in degrees in the YZ plane
2245
2246 aspect ratio of view window width:height (X / Y)
2247
2248 scaleS scale in the S direction for projected coordinates
2249 (usually 0.5)
2250
2251 scaleT scale in the T direction for projected coordinates
2252 (usually 0.5)
2253
2254 transS translate in the S direction for projected coordinates
2255 (usually 0.5)
2256
2257 transT translate in the T direction for projected coordinates
2258 (usually 0.5)
2259
2260 Return: none
2261
2262 *---------------------------------------------------------------------*/
2263 /*---------------------------------------------------------------------*
2264 C version
2265 *---------------------------------------------------------------------*/
C_MTXLightPerspective(Mtx m,f32 fovY,f32 aspect,float scaleS,float scaleT,float transS,float transT)2266 void C_MTXLightPerspective ( Mtx m, f32 fovY, f32 aspect, float scaleS,
2267 float scaleT, float transS, float transT )
2268 {
2269 f32 angle;
2270 f32 cot;
2271
2272 ASSERTMSG( (m != 0), MTX_LIGHT_PERSPECTIVE_1 );
2273 ASSERTMSG( ( (fovY > 0.0) && ( fovY < 180.0) ), MTX_LIGHT_PERSPECTIVE_2 );
2274 ASSERTMSG( (aspect != 0), MTX_LIGHT_PERSPECTIVE_3 );
2275
2276 // find the cotangent of half the (YZ) field of view
2277 angle = fovY * 0.5f;
2278 angle = MTXDegToRad( angle );
2279
2280 cot = 1.0f / tanf(angle);
2281
2282 m[0][0] = (cot / aspect) * scaleS;
2283 m[0][1] = 0.0f;
2284 m[0][2] = -transS;
2285 m[0][3] = 0.0f;
2286
2287 m[1][0] = 0.0f;
2288 m[1][1] = cot * scaleT;
2289 m[1][2] = -transT;
2290 m[1][3] = 0.0f;
2291
2292 m[2][0] = 0.0f;
2293 m[2][1] = 0.0f;
2294 m[2][2] = -1.0f;
2295 m[2][3] = 0.0f;
2296 }
2297
2298 /*---------------------------------------------------------------------*
2299
2300 Name: MTXLightOrtho
2301
2302 Description: compute a 3x4 orthographic projection matrix.
2303
2304 Arguments: m matrix to be set
2305
2306 t top coord. of parallel view volume
2307
2308 b bottom coord of parallel view volume
2309
2310 lf left coord. of parallel view volume
2311
2312 r right coord. of parallel view volume
2313
2314 scaleS scale in the S direction for projected coordinates
2315 (usually 0.5)
2316
2317 scaleT scale in the T direction for projected coordinates
2318 (usually 0.5)
2319
2320 transS translate in the S direction for projected coordinates
2321 (usually 0.5)
2322
2323 transT translate in the T direction for projected coordinates
2324 (usually 0.5)
2325
2326 Return: none
2327
2328 *---------------------------------------------------------------------*/
2329 /*---------------------------------------------------------------------*
2330 C version
2331 *---------------------------------------------------------------------*/
C_MTXLightOrtho(Mtx m,f32 t,f32 b,f32 lf,f32 r,float scaleS,float scaleT,float transS,float transT)2332 void C_MTXLightOrtho ( Mtx m, f32 t, f32 b, f32 lf, f32 r, float scaleS,
2333 float scaleT, float transS, float transT )
2334 {
2335 f32 tmp;
2336
2337 ASSERTMSG( (m != 0), MTX_LIGHT_ORTHO_1 );
2338 ASSERTMSG( (t != b), MTX_LIGHT_ORTHO_2 );
2339 ASSERTMSG( (lf != r), MTX_LIGHT_ORTHO_3 );
2340
2341 tmp = 1.0f / (r - lf);
2342 m[0][0] = (2.0f * tmp * scaleS);
2343 m[0][1] = 0.0f;
2344 m[0][2] = 0.0f;
2345 m[0][3] = ((-(r + lf) * tmp) * scaleS) + transS;
2346
2347 tmp = 1.0f / (t - b);
2348 m[1][0] = 0.0f;
2349 m[1][1] = (2.0f * tmp) * scaleT;
2350 m[1][2] = 0.0f;
2351 m[1][3] = ((-(t + b) * tmp)* scaleT) + transT;
2352
2353 m[2][0] = 0.0f;
2354 m[2][1] = 0.0f;
2355 m[2][2] = 0.0f;
2356 m[2][3] = 1.0f;
2357 }
2358
2359 /*---------------------------------------------------------------------*
2360
2361 Name: MTXReorder
2362
2363 Description: Creates a reordered (column-major) matrix from a
2364 row-major matrix, using paired single operations.
2365 Reordered matrices are required for the MTXRO*
2366 functions, which operate faster than their non-reordered
2367 counterparts.
2368
2369 Arguments: src source matrix.
2370 dest destination matrix, note type is ROMtx.
2371
2372 Return: none
2373
2374 *---------------------------------------------------------------------*/
2375 /*---------------------------------------------------------------------*
2376 C version
2377 *---------------------------------------------------------------------*/
C_MTXReorder(MTX_CONST Mtx src,ROMtx dst)2378 void C_MTXReorder(MTX_CONST Mtx src, ROMtx dst)
2379 {
2380 dst[0][0] = src[0][0]; dst[0][1] = src[1][0]; dst[0][2] = src[2][0];
2381 dst[1][0] = src[0][1]; dst[1][1] = src[1][1]; dst[1][2] = src[2][1];
2382 dst[2][0] = src[0][2]; dst[2][1] = src[1][2]; dst[2][2] = src[2][2];
2383 dst[3][0] = src[0][3]; dst[3][1] = src[1][3]; dst[3][2] = src[2][3];
2384 }
2385
2386 #if !defined(WIN32) && !defined(WIN64)
2387 /*---------------------------------------------------------------------*
2388 Paired-Single intrinsics version
2389 *---------------------------------------------------------------------*/
PSMTXReorder(MTX_CONST Mtx src,register ROMtx dest)2390 void PSMTXReorder(MTX_CONST Mtx src, register ROMtx dest)
2391 {
2392 f32x2 S00_S01, S02_S03, S10_S11, S12_S13, S20_S21, S22_S23;
2393 f32x2 D00_D10, D11_D21, D02_D12, D22_D03, D13_D23, D20_D01;
2394
2395 //psq_l S00_S01, 0(src), 0, 0
2396 S00_S01 = __PSQ_L(src, 0, 0);
2397
2398 //psq_l S10_S11, 16(src), 0, 0
2399 S10_S11 = __PSQ_LX(src, 16, 0, 0);
2400
2401 //psq_l S20_S21, 32(src), 0, 0
2402 S20_S21 = __PSQ_LX(src, 32, 0, 0);
2403
2404 //psq_l S02_S03, 8(src), 0, 0
2405 S02_S03 = __PSQ_LX(src, 8, 0, 0);
2406
2407 //ps_merge00 D00_D10, S00_S01, S10_S11
2408 D00_D10 = __PS_MERGE00(S00_S01, S10_S11);
2409
2410 //psq_l S12_S13, 24(src), 0, 0
2411 S12_S13 = __PSQ_LX(src, 24, 0, 0);
2412
2413 //ps_merge01 D20_D01, S20_S21, S00_S01
2414 D20_D01 = __PS_MERGE01(S20_S21, S00_S01);
2415
2416 //psq_l S22_S23, 40(src), 0, 0
2417 S22_S23 = __PSQ_LX(src, 40, 0, 0);
2418
2419 //ps_merge11 D11_D21, S10_S11, S20_S21
2420 D11_D21 = __PS_MERGE11(S10_S11, S20_S21);
2421
2422 //psq_st D00_D10, 0(dest), 0, 0
2423 __PSQ_ST(dest, D00_D10, 0, 0);
2424
2425 //ps_merge00 D02_D12, S02_S03, S12_S13
2426 D02_D12 = __PS_MERGE00(S02_S03, S12_S13);
2427
2428 //psq_st D20_D01, 8(dest), 0, 0
2429 __PSQ_STX(dest, 8, D20_D01, 0, 0);
2430
2431 //ps_merge01 D22_D03, S22_S23, S02_S03
2432 D22_D03 = __PS_MERGE01(S22_S23, S02_S03);
2433
2434 //psq_st D11_D21, 16(dest),0, 0
2435 __PSQ_STX(dest, 16, D11_D21, 0, 0);
2436
2437 //ps_merge11 D13_D23, S12_S13, S22_S23
2438 D13_D23 = __PS_MERGE11(S12_S13, S22_S23);
2439
2440 //psq_st D02_D12, 24(dest),0, 0
2441 __PSQ_STX(dest, 24, D02_D12, 0, 0);
2442
2443 //psq_st D22_D03, 32(dest),0,0
2444 __PSQ_STX(dest, 32, D22_D03, 0, 0);
2445
2446 //psq_st D13_D23, 40(dest),0,0
2447 __PSQ_STX(dest, 40, D13_D23, 0, 0);
2448 }
2449
2450 /*===========================================================================*/
2451
2452
2453 extern void _ASM_MTXRotAxisRadInternal(Mtx m, const Vec *axis, f32 sT, f32 cT);
2454
ASM_MTXRotAxisRad(Mtx m,const Vec * axis,f32 rad)2455 void ASM_MTXRotAxisRad(Mtx m,
2456 const Vec *axis,
2457 f32 rad ) {
2458 f32 sinT, cosT;
2459
2460 sinT = sinf(rad);
2461 cosT = cosf(rad);
2462
2463 _ASM_MTXRotAxisRadInternal(m, axis, sinT, cosT);
2464 }
2465
ASM_MTXRotRad(Mtx m,char axis,f32 rad)2466 void ASM_MTXRotRad ( Mtx m, char axis, f32 rad )
2467 {
2468 f32 sinA, cosA;
2469
2470 sinA = sinf(rad);
2471 cosA = cosf(rad);
2472
2473 ASM_MTXRotTrig( m, axis, sinA, cosA );
2474 }
2475
ASM_QUATDivide(const Quaternion * p,const Quaternion * q,Quaternion * r)2476 void ASM_QUATDivide( const Quaternion *p, const Quaternion *q, Quaternion *r)
2477 {
2478 Quaternion qtmp;
2479
2480 ASM_QUATInverse(q, &qtmp);
2481 ASM_QUATMultiply(&qtmp, p, r);
2482 }
2483 #endif
2484