1 /*---------------------------------------------------------------------------*
2 Project: matrix vector Library
3 File: mtx44.c
4
5 Copyright (C) Nintendo. All rights reserved.
6
7 These coded instructions, statements, and computer programs contain
8 proprietary information of Nintendo of America Inc. and/or Nintendo
9 Company Ltd., and are protected by Federal copyright law. They may
10 not be disclosed to third parties or copied or duplicated in any form,
11 in whole or in part, without the prior written consent of Nintendo.
12
13 *---------------------------------------------------------------------------*/
14
15 #include <math.h>
16 #include <stdio.h>
17 #include <cafe/mtx.h>
18 #include <cafe/mtx/mtx44.h>
19 #include "mtxAssert.h"
20 #include "mtx44Assert.h"
21
22 /*---------------------------------------------------------------------*
23 Constants
24 *---------------------------------------------------------------------*/
25 static const f32x2 c00 = {0.0F, 0.0F};
26 static const f32x2 c01 = {0.0F, 1.0F};
27 static const f32x2 c10 = {1.0F, 0.0F};
28 static const f32x2 c11 = {1.0F, 1.0F};
29 //static const f32x2 c22 = {2.0F, 2.0F};
30 static const f32x2 c33 = {3.0F, 3.0F};
31 static const f32x2 c0505 = {0.5F, 0.5F};
32
33 /*---------------------------------------------------------------------*
34
35
36 PROJECTION SECTION
37
38
39 *---------------------------------------------------------------------*/
40
41 /*---------------------------------------------------------------------*
42
43 Name: MTXFrustum
44
45 Description: compute a 4x4 perspective projection matrix from a
46 specified view volume.
47
48
49 Arguments: m 4x4 matrix to be set
50
51 t top coord. of view volume at the near clipping plane
52
53 b bottom coord of view volume at the near clipping plane
54
55 lf left coord. of view volume at near clipping plane
56
57 r right coord. of view volume at near clipping plane
58
59 n positive distance from camera to near clipping plane
60
61 f positive distance from camera to far clipping plane
62
63
64 Return: none
65
66 *---------------------------------------------------------------------*/
67 /*---------------------------------------------------------------------*
68 C version
69 *---------------------------------------------------------------------*/
C_MTXFrustum(Mtx44 m,f32 t,f32 b,f32 lf,f32 r,f32 n,f32 f)70 void C_MTXFrustum ( Mtx44 m, f32 t, f32 b, f32 lf, f32 r, f32 n, f32 f )
71 {
72 f32 tmp;
73
74 ASSERTMSG( (m != 0), MTX_FRUSTUM_1 );
75 ASSERTMSG( (t != b), MTX_FRUSTUM_2 );
76 ASSERTMSG( (lf != r), MTX_FRUSTUM_3 );
77 ASSERTMSG( (n != f), MTX_FRUSTUM_4 );
78
79 tmp = 1.0f / (r - lf);
80 m[0][0] = (2*n) * tmp;
81 m[0][1] = 0.0f;
82 m[0][2] = (r + lf) * tmp;
83 m[0][3] = 0.0f;
84
85 tmp = 1.0f / (t - b);
86 m[1][0] = 0.0f;
87 m[1][1] = (2*n) * tmp;
88 m[1][2] = (t + b) * tmp;
89 m[1][3] = 0.0f;
90
91 m[2][0] = 0.0f;
92 m[2][1] = 0.0f;
93
94 tmp = 1.0f / (f - n);
95
96 // scale z to (-w, w) range (different than Wii's -w...0 range)
97 m[2][2] = -(f + n) * tmp;
98 m[2][3] = -(2*f*n) * tmp;
99
100 m[3][0] = 0.0f;
101 m[3][1] = 0.0f;
102 m[3][2] = -1.0f;
103 m[3][3] = 0.0f;
104 }
105
106 /*---------------------------------------------------------------------*
107
108 Name: MTXPerspective
109
110 Description: compute a 4x4 perspective projection matrix from
111 field of view and aspect ratio.
112
113
114 Arguments: m 4x4 matrix to be set
115
116 fovy total field of view in in degrees in the YZ plane
117
118 aspect ratio of view window width:height (X / Y)
119
120 n positive distance from camera to near clipping plane
121
122 f positive distance from camera to far clipping plane
123
124
125 Return: none
126
127 *---------------------------------------------------------------------*/
128 /*---------------------------------------------------------------------*
129 C version
130 *---------------------------------------------------------------------*/
C_MTXPerspective(Mtx44 m,f32 fovY,f32 aspect,f32 n,f32 f)131 void C_MTXPerspective ( Mtx44 m, f32 fovY, f32 aspect, f32 n, f32 f )
132 {
133 f32 angle;
134 f32 cot;
135 f32 tmp;
136
137 ASSERTMSG( (m != 0), MTX_PERSPECTIVE_1 );
138 ASSERTMSG( ( (fovY > 0.0) && ( fovY < 180.0) ), MTX_PERSPECTIVE_2 );
139 ASSERTMSG( (aspect != 0), MTX_PERSPECTIVE_3 );
140
141 // find the cotangent of half the (YZ) field of view
142 angle = fovY * 0.5f;
143 angle = MTXDegToRad( angle );
144
145 cot = 1.0f / tanf(angle);
146
147 m[0][0] = cot / aspect;
148 m[0][1] = 0.0f;
149 m[0][2] = 0.0f;
150 m[0][3] = 0.0f;
151
152 m[1][0] = 0.0f;
153 m[1][1] = cot;
154 m[1][2] = 0.0f;
155 m[1][3] = 0.0f;
156
157 m[2][0] = 0.0f;
158 m[2][1] = 0.0f;
159
160 tmp = 1.0f / (f - n);
161
162 // scale z to (-w, +w) range (different than Wii's -w...0 range)
163 m[2][2] = -(f + n) * tmp;
164 m[2][3] = -(2*f*n) * tmp;
165
166 m[3][0] = 0.0f;
167 m[3][1] = 0.0f;
168 m[3][2] = -1.0f;
169 m[3][3] = 0.0f;
170 }
171
172 /*---------------------------------------------------------------------*
173
174 Name: MTXOrtho
175
176 Description: compute a 4x4 orthographic projection matrix.
177
178
179 Arguments: m 4x4 matrix to be set
180
181 t top coord. of parallel view volume
182
183 b bottom coord of parallel view volume
184
185 lf left coord. of parallel view volume
186
187 r right coord. of parallel view volume
188
189 n positive distance from camera to near clipping plane
190
191 f positive distance from camera to far clipping plane
192
193
194 Return: none
195
196 *---------------------------------------------------------------------*/
197 /*---------------------------------------------------------------------*
198 C version
199 *---------------------------------------------------------------------*/
C_MTXOrtho(Mtx44 m,f32 t,f32 b,f32 lf,f32 r,f32 n,f32 f)200 void C_MTXOrtho ( Mtx44 m, f32 t, f32 b, f32 lf, f32 r, f32 n, f32 f )
201 {
202 f32 tmp;
203
204 ASSERTMSG( (m != 0), MTX_ORTHO_1 );
205 ASSERTMSG( (t != b), MTX_ORTHO_2 );
206 ASSERTMSG( (lf != r), MTX_ORTHO_3 );
207 ASSERTMSG( (n != f), MTX_ORTHO_4 );
208
209 tmp = 1.0f / (r - lf);
210 m[0][0] = 2.0f * tmp;
211 m[0][1] = 0.0f;
212 m[0][2] = 0.0f;
213 m[0][3] = -(r + lf) * tmp;
214
215 tmp = 1.0f / (t - b);
216 m[1][0] = 0.0f;
217 m[1][1] = 2.0f * tmp;
218 m[1][2] = 0.0f;
219 m[1][3] = -(t + b) * tmp;
220
221 m[2][0] = 0.0f;
222 m[2][1] = 0.0f;
223
224 tmp = 1.0f / (f - n);
225
226 // scale z to (-1, 1) range (different than Wii's -1...0 range)
227 m[2][2] = -2.0f * tmp;
228 m[2][3] = -(f + n) * tmp;
229
230 m[3][0] = 0.0f;
231 m[3][1] = 0.0f;
232 m[3][2] = 0.0f;
233 m[3][3] = 1.0f;
234 }
235
236 /*---------------------------------------------------------------------*
237
238
239 GENERAL SECTION
240
241
242 *---------------------------------------------------------------------*/
243
244 /* NOTE: Prototypes for these functions are defined in "mtx44ext.h". */
245
246 /*---------------------------------------------------------------------*
247 Name: MTX44Identity
248
249 Description: sets a matrix to identity
250
251 Arguments: m : matrix to be set
252
253 Return: none
254
255 *---------------------------------------------------------------------*/
256 /*---------------------------------------------------------------------*
257 C version
258 *---------------------------------------------------------------------*/
C_MTX44Identity(Mtx44 m)259 void C_MTX44Identity( Mtx44 m )
260 {
261 ASSERTMSG( (m != 0), MTX44_IDENTITY_1 );
262
263 m[0][0] = 1.0f; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = 0.0f;
264 m[1][0] = 0.0f; m[1][1] = 1.0f; m[1][2] = 0.0f; m[1][3] = 0.0f;
265 m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = 1.0f; m[2][3] = 0.0f;
266 m[3][0] = 0.0f; m[3][1] = 0.0f; m[3][2] = 0.0f; m[3][3] = 1.0f;
267 }
268
269 #if !defined(WIN32) && !defined(WIN64)
270 /*---------------------------------------------------------------------*
271 Paired-Single assembler version
272 *---------------------------------------------------------------------*
273 Note that this performs NO error checking.
274 *---------------------------------------------------------------------*/
275
PSMTX44Identity(register Mtx44 m)276 void PSMTX44Identity( register Mtx44 m )
277 {
278 __PSQ_ST(m, c10, 0, 0);
279 __PSQ_STX(m, 8, c00, 0, 0);
280 __PSQ_STX(m, 16, c01, 0, 0);
281 __PSQ_STX(m, 24, c00, 0, 0);
282 __PSQ_STX(m, 32, c00, 0, 0);
283 __PSQ_STX(m, 40, c10, 0, 0);
284 __PSQ_STX(m, 48, c00, 0, 0);
285 __PSQ_STX(m, 56, c01, 0, 0);
286 }
287 #endif
288
289 /*---------------------------------------------------------------------*
290 Name: MTX44Copy
291
292 Description: copies the contents of one matrix into another
293
294 Arguments: src source matrix for copy
295 dst destination matrix for copy
296
297
298 Return: none
299 *---------------------------------------------------------------------*/
300 /*---------------------------------------------------------------------*
301 C version
302 *---------------------------------------------------------------------*/
C_MTX44Copy(MTX_CONST Mtx44 src,Mtx44 dst)303 void C_MTX44Copy( MTX_CONST Mtx44 src, Mtx44 dst )
304 {
305 ASSERTMSG( (src != 0) , MTX44_COPY_1 );
306 ASSERTMSG( (dst != 0) , MTX44_COPY_2 );
307
308 if( src == dst )
309 {
310 return;
311 }
312
313 dst[0][0] = src[0][0]; dst[0][1] = src[0][1]; dst[0][2] = src[0][2]; dst[0][3] = src[0][3];
314 dst[1][0] = src[1][0]; dst[1][1] = src[1][1]; dst[1][2] = src[1][2]; dst[1][3] = src[1][3];
315 dst[2][0] = src[2][0]; dst[2][1] = src[2][1]; dst[2][2] = src[2][2]; dst[2][3] = src[2][3];
316 dst[3][0] = src[3][0]; dst[3][1] = src[3][1]; dst[3][2] = src[3][2]; dst[3][3] = src[3][3];
317 }
318
319 #if !defined(WIN32) && !defined(WIN64)
320 /*---------------------------------------------------------------------*
321 Paired-Single assembler version
322 *---------------------------------------------------------------------*
323 Note that this performs NO error checking.
324 *---------------------------------------------------------------------*/
PSMTX44Copy(MTX_CONST Mtx44 src,Mtx44 dst)325 void PSMTX44Copy( MTX_CONST Mtx44 src, Mtx44 dst )
326 {
327 f32x2 fp1;
328
329 //psq_l fp1, 0(src), 0, 0;
330 fp1 = __PSQ_L(src, 0, 0);
331
332 //psq_st fp1, 0(dst), 0, 0;
333 __PSQ_ST(dst, fp1, 0, 0);
334
335 //psq_l fp1, 8(src), 0, 0;
336 fp1 = __PSQ_LX(src, 8, 0, 0);
337
338 //psq_st fp1, 8(dst), 0, 0;
339 __PSQ_STX(dst, 8, fp1, 0, 0);
340
341 //psq_l fp1, 16(src), 0, 0;
342 fp1 = __PSQ_LX(src, 16, 0, 0);
343
344 //psq_st fp1, 16(dst), 0, 0;
345 __PSQ_STX(dst, 16, fp1, 0, 0);
346
347 //psq_l fp1, 24(src), 0, 0;
348 fp1 = __PSQ_LX(src, 24, 0, 0);
349
350 //psq_st fp1, 24(dst), 0, 0;
351 __PSQ_STX(dst, 24, fp1, 0, 0);
352
353 //psq_l fp1, 32(src), 0, 0;
354 fp1 = __PSQ_LX(src, 32, 0, 0);
355
356 //psq_st fp1, 32(dst), 0, 0;
357 __PSQ_STX(dst, 32, fp1, 0, 0);
358
359 //psq_l fp1, 40(src), 0, 0;
360 fp1 = __PSQ_LX(src, 40, 0, 0);
361
362 //psq_st fp1, 40(dst), 0, 0;
363 __PSQ_STX(dst, 40, fp1, 0, 0);
364
365 //psq_l fp1, 48(src), 0, 0;
366 fp1 = __PSQ_LX(src, 48, 0, 0);
367
368 //psq_st fp1, 48(dst), 0, 0;
369 __PSQ_STX(dst, 48, fp1, 0, 0);
370
371 //psq_l fp1, 56(src), 0, 0;
372 fp1 = __PSQ_LX(src, 56, 0, 0);
373
374 //psq_st fp1, 56(dst), 0, 0;
375 __PSQ_STX(dst, 56, fp1, 0, 0);
376 }
377 #endif
378
379 /*---------------------------------------------------------------------*
380 Name: MTX44Concat
381
382 Description: concatenates two matrices.
383 order of operation is A x B = AB.
384 ok for any of ab == a == b.
385
386 saves a MTXCopy operation if ab != to a or b.
387
388 Arguments: a first matrix for concat.
389 b second matrix for concat.
390 ab resultant matrix from concat.
391
392 Return: none
393 *---------------------------------------------------------------------*/
394 /*---------------------------------------------------------------------*
395 C version
396 *---------------------------------------------------------------------*/
C_MTX44Concat(MTX_CONST Mtx44 a,MTX_CONST Mtx44 b,Mtx44 ab)397 void C_MTX44Concat( MTX_CONST Mtx44 a, MTX_CONST Mtx44 b, Mtx44 ab )
398 {
399 Mtx44 mTmp;
400 Mtx44Ptr m;
401
402 ASSERTMSG( (a != 0), MTX44_CONCAT_1 );
403 ASSERTMSG( (b != 0), MTX44_CONCAT_2 );
404 ASSERTMSG( (ab != 0), MTX44_CONCAT_3 );
405
406 if( (ab == a) || (ab == b) )
407 {
408 m = mTmp;
409 }
410 else
411 {
412 m = ab;
413 }
414
415 // compute (a x b) -> m
416
417 m[0][0] = a[0][0]*b[0][0] + a[0][1]*b[1][0] + a[0][2]*b[2][0] + a[0][3]*b[3][0];
418 m[0][1] = a[0][0]*b[0][1] + a[0][1]*b[1][1] + a[0][2]*b[2][1] + a[0][3]*b[3][1];
419 m[0][2] = a[0][0]*b[0][2] + a[0][1]*b[1][2] + a[0][2]*b[2][2] + a[0][3]*b[3][2];
420 m[0][3] = a[0][0]*b[0][3] + a[0][1]*b[1][3] + a[0][2]*b[2][3] + a[0][3]*b[3][3];
421
422 m[1][0] = a[1][0]*b[0][0] + a[1][1]*b[1][0] + a[1][2]*b[2][0] + a[1][3]*b[3][0];
423 m[1][1] = a[1][0]*b[0][1] + a[1][1]*b[1][1] + a[1][2]*b[2][1] + a[1][3]*b[3][1];
424 m[1][2] = a[1][0]*b[0][2] + a[1][1]*b[1][2] + a[1][2]*b[2][2] + a[1][3]*b[3][2];
425 m[1][3] = a[1][0]*b[0][3] + a[1][1]*b[1][3] + a[1][2]*b[2][3] + a[1][3]*b[3][3];
426
427 m[2][0] = a[2][0]*b[0][0] + a[2][1]*b[1][0] + a[2][2]*b[2][0] + a[2][3]*b[3][0];
428 m[2][1] = a[2][0]*b[0][1] + a[2][1]*b[1][1] + a[2][2]*b[2][1] + a[2][3]*b[3][1];
429 m[2][2] = a[2][0]*b[0][2] + a[2][1]*b[1][2] + a[2][2]*b[2][2] + a[2][3]*b[3][2];
430 m[2][3] = a[2][0]*b[0][3] + a[2][1]*b[1][3] + a[2][2]*b[2][3] + a[2][3]*b[3][3];
431
432 m[3][0] = a[3][0]*b[0][0] + a[3][1]*b[1][0] + a[3][2]*b[2][0] + a[3][3]*b[3][0];
433 m[3][1] = a[3][0]*b[0][1] + a[3][1]*b[1][1] + a[3][2]*b[2][1] + a[3][3]*b[3][1];
434 m[3][2] = a[3][0]*b[0][2] + a[3][1]*b[1][2] + a[3][2]*b[2][2] + a[3][3]*b[3][2];
435 m[3][3] = a[3][0]*b[0][3] + a[3][1]*b[1][3] + a[3][2]*b[2][3] + a[3][3]*b[3][3];
436
437 // overwrite a or b if needed
438 if(m == mTmp)
439 {
440 C_MTX44Copy( *((MTX_CONST Mtx44 *)&mTmp), ab );
441 }
442 }
443
444
445 #if !defined(WIN32) && !defined(WIN64)
446 /*---------------------------------------------------------------------*
447 Paired-Single assembler version
448 *---------------------------------------------------------------------*
449 Note that this performs NO error checking.
450 *---------------------------------------------------------------------*/
451
PSMTX44Concat(MTX_CONST Mtx44 a,MTX_CONST Mtx44 b,Mtx44 ab)452 void PSMTX44Concat( MTX_CONST Mtx44 a, MTX_CONST Mtx44 b, Mtx44 ab )
453 {
454 f32x2 fp0, fp1, fp2, fp3, fp4, fp5, fp6, fp7, fp8, fp9, fp10, fp11, fp12, fp13;
455
456 //psq_l fp0 , 0(a), 0, 0; // a00,a01
457 //fp0[0] = a[0][0];
458 //fp0[1] = a[0][1];
459 fp0 = __PSQ_L(a, 0, 0);
460
461 //psq_l fp2 , 0(b), 0, 0; // b00,b01
462 //fp2[0] = b[0][0];
463 //fp2[1] = b[0][1];
464 fp2 = __PSQ_L(b, 0, 0);
465
466 //ps_muls0 fp6 , fp2, fp0; // b00a00,b01a00
467 fp6 = __PS_MULS0(fp2, fp0);
468
469 //psq_l fp3 , 16(b), 0, 0; // b10,b11
470 //fp3[0] = b[1][0];
471 //fp3[1] = b[1][1];
472 fp3 = __PSQ_LX(b, 16, 0, 0);
473
474 //psq_l fp4 , 32(b), 0, 0; // b20,b21
475 //fp4[0] = b[2][0];
476 //fp4[1] = b[2][1];
477 fp4 = __PSQ_LX(b, 32, 0, 0);
478
479 //ps_madds1 fp6 , fp3, fp0, fp6; // b00a00+b10a01,b01a00+b11a01
480 fp6 = __PS_MADDS1(fp3, fp0, fp6);
481
482 //psq_l fp1 , 8(a), 0, 0; // a02,a03
483 //fp1[0] = a[0][2];
484 //fp1[1] = a[0][3];
485 fp1 = __PSQ_LX(a, 8, 0, 0);
486
487 //psq_l fp5 , 48(b), 0, 0; // b30,b31
488 //fp5[0] = b[3][0];
489 //fp5[1] = b[3][1];
490 fp5 = __PSQ_LX(b, 48, 0, 0);
491
492 // b00a00+b10a01+b20a02,b01a00+b11a01+b21a02
493 //ps_madds0 fp6 , fp4, fp1, fp6;
494 fp6 = __PS_MADDS0(fp4, fp1, fp6);
495
496 //psq_l fp0 , 16(a), 0, 0; // a10,a11
497 //fp0[0] = a[1][0];
498 //fp0[1] = a[1][1];
499 fp0 = __PSQ_LX(a, 16, 0, 0);
500
501 // b00a00+b10a01+b20a02+b30a03,b01a00+b11a01+b21a02+b31a03
502 //ps_madds1 fp6 , fp5, fp1, fp6;
503 fp6 = __PS_MADDS1(fp5, fp1, fp6);
504
505 //psq_l fp1 , 24(a), 0, 0; // a12,a13
506 //fp1[0] = a[1][2];
507 //fp1[1] = a[1][3];
508 fp1 = __PSQ_LX(a, 24, 0, 0);
509
510 //ps_muls0 fp8 , fp2, fp0; // b00a10,b01a10
511 fp8 = __PS_MULS0(fp2, fp0);
512
513 //ps_madds1 fp8 , fp3, fp0, fp8; // b00a10+b10a11,b01a11+b11a11
514 fp8 = __PS_MADDS1(fp3, fp0, fp8);
515
516 //psq_l fp0 , 32(a), 0, 0; // a20,a21
517 //fp0[0] = a[2][0];
518 //fp0[1] = a[2][1];
519 fp0 = __PSQ_LX(a, 32, 0, 0);
520
521 // b00a10+b10a11+b20a12,b01a11+b11a11+b21a12
522 //ps_madds0 fp8 , fp4, fp1, fp8;
523 fp8 = __PS_MADDS0(fp4, fp1, fp8);
524
525 // b00a10+b10a11+b20a12+b30a13,b01a10+b11a11+b21a12+b31a13
526 //ps_madds1 fp8 , fp5, fp1, fp8;
527 fp8 = __PS_MADDS1(fp5, fp1, fp8);
528
529 //psq_l fp1 , 40(a), 0, 0; // a22,a23
530 //fp1[0] = a[2][2];
531 //fp1[1] = a[2][3];
532 fp1 = __PSQ_LX(a, 40, 0, 0);
533
534 //ps_muls0 fp10, fp2, fp0; // b00a20,b01a20
535 fp10 = __PS_MULS0(fp2, fp0);
536
537 //ps_madds1 fp10, fp3, fp0, fp10; // b00a20+b10a21,b01a20+b11a21
538 fp10 = __PS_MADDS1(fp3, fp0, fp10);
539
540 //psq_l fp0 , 48(a), 0, 0; // a30,a31
541 //fp0[0] = a[3][0];
542 //fp0[1] = a[3][1];
543 fp0 = __PSQ_LX(a, 48, 0, 0);
544
545 // b00a20+b10a21+b20a22,b01a20+b11a21+b21a22
546 //ps_madds0 fp10, fp4, fp1, fp10;
547 fp10 = __PS_MADDS0(fp4, fp1, fp10);
548
549 // b00a20+b10a21+b20a22+b30a23,b01a20+b11a21+b21a22+b31a23
550 //ps_madds1 fp10, fp5, fp1, fp10;
551 fp10 = __PS_MADDS1(fp5, fp1, fp10);
552
553 //psq_l fp1 , 56(a), 0, 0; // a32,a33
554 //fp1[0] = a[3][2];
555 //fp1[1] = a[3][3];
556 fp1 = __PSQ_LX(a, 56, 0, 0);
557
558 //ps_muls0 fp12, fp2, fp0; // b00a30,b01a30
559 fp12 = __PS_MULS0(fp2, fp0);
560
561 //psq_l fp2 , 8(b), 0, 0; // b02,b03
562 //fp2[0] = b[0][2];
563 //fp2[1] = b[0][3];
564 fp2 = __PSQ_LX(b, 8, 0, 0);
565
566 //ps_madds1 fp12, fp3, fp0, fp12; // b00a30+b10a31,b01a30+b11a31
567 fp12 = __PS_MADDS1(fp3, fp0, fp12);
568
569 //psq_l fp0 , 0(a), 0, 0; // a00,a01
570 //fp0[0] = a[0][0];
571 //fp0[1] = a[0][1];
572 fp0 = __PSQ_LX(a, 0, 0, 0);
573
574 // b00a30+b10a31+b20a32,b01a30+b11a31+b21a32
575 //ps_madds0 fp12, fp4, fp1, fp12;
576 fp12 = __PS_MADDS0(fp4, fp1, fp12);
577
578 //psq_l fp3 , 24(b), 0, 0; // b12,b13
579 //fp3[0] = b[1][2];
580 //fp3[1] = b[1][3];
581 fp3 = __PSQ_LX(b, 24, 0, 0);
582
583 // b00a30+b10a31+b20a32+b30a33,b01a30+b11a31+b21a32+b31a33
584 //ps_madds1 fp12, fp5, fp1, fp12;
585 fp12 = __PS_MADDS1(fp5, fp1, fp12);
586
587 //psq_l fp1 , 8(a), 0, 0; // a02,a03
588 //fp1[0] = a[0][2];
589 //fp1[1] = a[0][3];
590 fp1 = __PSQ_LX(a, 8, 0, 0);
591
592 //ps_muls0 fp7 , fp2, fp0; // b02a00,b03a00
593 fp7 = __PS_MULS0(fp2, fp0);
594
595 //psq_l fp4 , 40(b), 0, 0; // b22,b23
596 //fp4[0] = b[2][2];
597 //fp4[1] = b[2][3];
598 fp4 = __PSQ_LX(b, 40, 0, 0);
599
600 //ps_madds1 fp7 , fp3, fp0, fp7; // b02a00+b12a01,b03a00+b13a01
601 fp7 = __PS_MADDS1(fp3, fp0, fp7);
602
603 //psq_l fp5 , 56(b), 0, 0; // b32,b33
604 //fp5[0] = b[3][2];
605 //fp5[1] = b[3][3];
606 fp5 = __PSQ_LX(b, 56, 0, 0);
607
608 // b02a00+b12a01+b22a02,b03a00+b13a01+b23a02
609 //ps_madds0 fp7 , fp4, fp1, fp7;
610 fp7 = __PS_MADDS0(fp4, fp1, fp7);
611
612 //psq_l fp0 , 16(a), 0, 0; // a10,a11
613 //fp0[0] = a[1][0];
614 //fp0[1] = a[1][1];
615 fp0 = __PSQ_LX(a, 16, 0, 0);
616
617 // b02a00+b12a01+b22a02+b32a03,b03a00+b13a01+b23a02+b33a03
618 //ps_madds1 fp7 , fp5, fp1, fp7;
619 fp7 = __PS_MADDS1(fp5, fp1, fp7);
620
621 //psq_l fp1 , 24(a), 0, 0; // a12,a13
622 //fp1[0] = a[1][2];
623 //fp1[1] = a[1][3];
624 fp1 = __PSQ_LX(a, 24, 0, 0);
625
626 //ps_muls0 fp9 , fp2, fp0; // b02a10,b03a10
627 fp9 = __PS_MULS0(fp2, fp0);
628
629 //psq_st fp6 , 0(ab), 0, 0; // ab00,ab01
630 //ab[0][0] = fp6[0];
631 //ab[0][1] = fp6[1];
632 __PSQ_STX(ab, 0, fp6, 0, 0);
633
634 //ps_madds1 fp9 , fp3, fp0, fp9; // b02a10+b12a11,b03a10+b13a11
635 fp9 = __PS_MADDS1(fp3, fp0, fp9);
636
637 //psq_l fp0 , 32(a), 0, 0; // a20,a21
638 //fp0[0] = a[2][0];
639 //fp0[1] = a[2][1];
640 fp0 = __PSQ_LX(a, 32, 0, 0);
641
642 // b02a10+b12a11+b22a12,b03a10+b13a11+b23a12
643 //ps_madds0 fp9, fp4, fp1, fp9;
644 fp9 = __PS_MADDS0(fp4, fp1, fp9);
645
646 //psq_st fp8 ,16(ab), 0, 0; // ab10,ab11
647 //ab[1][0] = fp8[0];
648 //ab[1][1] = fp8[1];
649 __PSQ_STX(ab, 16, fp8, 0, 0);
650
651 // b02a10+b12a11+b22a12+b32a13,b03a10+b13a11+b23a12+b33a13
652 //ps_madds1 fp9 , fp5, fp1, fp9;
653 fp9 = __PS_MADDS1(fp5, fp1, fp9);
654
655 //psq_l fp1 , 40(a), 0, 0; // a22,a23
656 //fp1[0] = a[2][2];
657 //fp1[1] = a[2][3];
658 fp1 = __PSQ_LX(a, 40, 0, 0);
659
660 //ps_muls0 fp11, fp2, fp0; // b02a20,b03a20
661 fp11 = __PS_MULS0(fp2, fp0);
662
663 //psq_st fp10,32(ab), 0, 0; // ab20,ab21
664 //ab[2][0] = fp10[0];
665 //ab[2][1] = fp10[1];
666 __PSQ_STX(ab, 32, fp10, 0, 0);
667
668 //ps_madds1 fp11, fp3, fp0, fp11; // b02a20+b12a21,b03a20+b13a21
669 fp11 = __PS_MADDS1(fp3, fp0, fp11);
670
671 //psq_l fp0 , 48(a), 0, 0; // a30,a31
672 //fp0[0] = a[3][0];
673 //fp0[1] = a[3][1];
674 fp0 = __PSQ_LX(a, 48, 0, 0);
675
676 // b02a20+b12a21+b22a22,b03a20+b13a21+b23a22
677 //ps_madds0 fp11, fp4, fp1, fp11;
678 fp11 = __PS_MADDS0(fp4, fp1, fp11);
679
680 //psq_st fp12,48(ab), 0, 0; // ab30,ab31
681 //ab[3][0] = fp12[0];
682 //ab[3][1] = fp12[1];
683 __PSQ_STX(ab, 48, fp12, 0, 0);
684
685 // b02a20+b12a21+b22a22+b32a23,b03a20+b13a21+b23a22+b33a23
686 //ps_madds1 fp11, fp5, fp1, fp11;
687 fp11 = __PS_MADDS1(fp5, fp1, fp11);
688
689 //psq_l fp1, 56(a), 0, 0; // a32,a33
690 //fp1[0] = a[3][2];
691 //fp1[1] = a[3][3];
692 fp1 = __PSQ_LX(a, 56, 0, 0);
693
694 //ps_muls0 fp13, fp2, fp0; // b02a30,b03a30
695 fp13 = __PS_MULS0(fp2, fp0);
696
697 //psq_st fp7 , 8(ab), 0, 0; // ab02,ab03
698 //ab[0][2] = fp7[0];
699 //ab[0][3] = fp7[1];
700 __PSQ_STX(ab, 8, fp7, 0, 0);
701
702 //ps_madds1 fp13, fp3, fp0, fp13; // b02a30+b12a31,b03a30+b13a31
703 fp13 = __PS_MADDS1(fp3, fp0, fp13);
704
705 //psq_st fp9 ,24(ab), 0, 0; // ab12,ab13
706 //ab[1][2] = fp9[0];
707 //ab[1][3] = fp9[1];
708 __PSQ_STX(ab, 24, fp9, 0, 0);
709
710 // b02a30+b12a31+b22a32,b03a30+b13a31+b23a32
711 //ps_madds0 fp13, fp4, fp1, fp13;
712 fp13 = __PS_MADDS0(fp4, fp1, fp13);
713
714 //psq_st fp11,40(ab), 0, 0; // ab22,ab23
715 //ab[2][2] = fp11[0];
716 //ab[2][3] = fp11[1];
717 __PSQ_STX(ab, 40, fp11, 0, 0);
718
719 // b02a30+b12a31+b22a32+b32a33,b03a30+b13a31+b23a32+b33a33
720 //ps_madds1 fp13, fp5, fp1, fp13;
721 fp13 = __PS_MADDS1(fp5, fp1, fp13);
722
723 //psq_st fp13,56(ab), 0, 0; // ab32,ab33
724 //ab[3][2] = fp13[0];
725 //ab[3][3] = fp13[1];
726 __PSQ_STX(ab, 56, fp13, 0, 0);
727 }
728 #endif
729
730
731 /*---------------------------------------------------------------------*
732 Name: MTX44Transpose
733
734 Description: computes the transpose of a matrix.
735
736 Arguments: src source matrix.
737 xPose destination (transposed) matrix.
738 ok if src == xPose.
739
740 Return: none
741 *---------------------------------------------------------------------*/
742 /*---------------------------------------------------------------------*
743 C version
744 *---------------------------------------------------------------------*/
C_MTX44Transpose(MTX_CONST Mtx44 src,Mtx44 xPose)745 void C_MTX44Transpose ( MTX_CONST Mtx44 src, Mtx44 xPose )
746 {
747 Mtx44 mTmp;
748 Mtx44Ptr m;
749
750 ASSERTMSG( (src != 0), MTX44_TRANSPOSE_1 );
751 ASSERTMSG( (xPose != 0), MTX44_TRANSPOSE_2 );
752
753 if(src == xPose)
754 {
755 m = mTmp;
756 }
757 else
758 {
759 m = xPose;
760 }
761
762 m[0][0] = src[0][0]; m[0][1] = src[1][0]; m[0][2] = src[2][0]; m[0][3] = src[3][0];
763 m[1][0] = src[0][1]; m[1][1] = src[1][1]; m[1][2] = src[2][1]; m[1][3] = src[3][1];
764 m[2][0] = src[0][2]; m[2][1] = src[1][2]; m[2][2] = src[2][2]; m[2][3] = src[3][2];
765 m[3][0] = src[0][3]; m[3][1] = src[1][3]; m[3][2] = src[2][3]; m[3][3] = src[3][3];
766
767 // copy back if needed
768 if( m == mTmp )
769 {
770 C_MTX44Copy( *((MTX_CONST Mtx44 *)&mTmp), xPose );
771 }
772 }
773
774 #if !defined(WIN32) && !defined(WIN64)
775 /*---------------------------------------------------------------------*
776 Paired-Single assembler version
777 *---------------------------------------------------------------------*
778 Note that this performs NO error checking.
779 *---------------------------------------------------------------------*/
PSMTX44Transpose(MTX_CONST Mtx44 src,Mtx44 xPose)780 void PSMTX44Transpose ( MTX_CONST Mtx44 src, Mtx44 xPose )
781 {
782 f32x2 fp0, fp1, fp2, fp3, fp4, fp5;
783
784 //psq_l fp0, 0(src), 0, 0; // fp0 <= s00,s01
785 fp0 = __PSQ_L(src, 0, 0);
786
787 //psq_l fp1, 16(src), 0, 0; // fp1 <= s10,s11
788 fp1 = __PSQ_LX(src, 16, 0, 0);
789
790 //ps_merge00 fp4, fp0, fp1; // fp4 <= t00,t10
791 fp4 = __PS_MERGE00(fp0, fp1);
792
793 //psq_l fp2, 8(src), 0, 0; // fp2 <= s02,s03
794 fp2 = __PSQ_LX(src, 8, 0, 0);
795
796 //psq_st fp4, 0(xPose), 0, 0;
797 __PSQ_ST(xPose, fp4, 0, 0);
798
799 //ps_merge11 fp5, fp0, fp1; // fp5 <= t01,t11
800 fp5 = __PS_MERGE11(fp0, fp1);
801
802 //psq_l fp3, 24(src), 0, 0; // fp3 <= s12,s13
803 fp3 = __PSQ_LX(src, 24, 0, 0);
804
805 //psq_st fp5, 16(xPose), 0, 0;
806 __PSQ_STX(xPose, 16, fp5, 0, 0);
807
808 //ps_merge00 fp4, fp2, fp3; // fp4 <= t02,t12
809 fp4 = __PS_MERGE00(fp2, fp3);
810
811 //psq_l fp0, 32(src), 0, 0; // fp0 <= s20,s21
812 fp0 = __PSQ_LX(src, 32, 0, 0);
813
814 //psq_st fp4, 32(xPose), 0, 0;
815 __PSQ_STX(xPose, 32, fp4, 0, 0);
816
817 //ps_merge11 fp5, fp2, fp3; // fp5 <= t03,t13
818 fp5 = __PS_MERGE11(fp2, fp3);
819
820 //psq_l fp1, 48(src), 0, 0; // fp1 <= s30,s31
821 fp1 = __PSQ_LX(src, 48, 0, 0);
822
823 //psq_st fp5, 48(xPose), 0, 0;
824 __PSQ_STX(xPose, 48, fp5, 0, 0);
825
826 //ps_merge00 fp4, fp0, fp1; // fp4 <= t20,t30
827 fp4 = __PS_MERGE00(fp0, fp1);
828
829 //psq_l fp2, 40(src), 0, 0; // fp2 <= s22,s23
830 fp2 = __PSQ_LX(src, 40, 0, 0);
831
832 //psq_st fp4, 8(xPose), 0, 0;
833 __PSQ_STX(xPose, 8, fp4, 0, 0);
834
835 //ps_merge11 fp5, fp0, fp1; // fp5 <= t21,t31
836 fp5 = __PS_MERGE11(fp0, fp1);
837
838 //psq_l fp3, 56(src), 0, 0; // fp2 <= s32,s33
839 fp3 = __PSQ_LX(src, 56, 0, 0);
840
841 //psq_st fp5, 24(xPose), 0, 0;
842 __PSQ_STX(xPose, 24, fp5, 0, 0);
843
844 //ps_merge00 fp4, fp2, fp3; // fp4 <= s22,s32
845 fp4 = __PS_MERGE00(fp2, fp3);
846
847 //psq_st fp4, 40(xPose), 0, 0;
848 __PSQ_STX(xPose, 40, fp4, 0, 0);
849
850 //ps_merge11 fp5, fp2, fp3; // fp5 <= s23,s33
851 fp5 = __PS_MERGE11(fp2, fp3);
852
853 //psq_st fp5, 56(xPose), 0, 0;
854 __PSQ_STX(xPose, 56, fp5, 0, 0);
855 }
856 #endif
857
858 /*---------------------------------------------------------------------*
859 Name: MTX44Inverse
860
861 Description: computes a fast inverse of a matrix.
862 uses Gauss-Jordan(with partial pivoting)
863
864 Arguments: src source matrix.
865 inv destination (inverse) matrix.
866 ok if src == inv.
867
868 Return: 0 if src is not invertible.
869 1 on success.
870 *---------------------------------------------------------------------*/
871 /*---------------------------------------------------------------------*
872 C version only
873 *---------------------------------------------------------------------*/
874 #define NUM 4
875 #define SWAPF(a,b) { f32 tmp; tmp = (a); (a) = (b); (b)=tmp; }
876
C_MTX44Inverse(MTX_CONST Mtx44 src,Mtx44 inv)877 u32 C_MTX44Inverse( MTX_CONST Mtx44 src, Mtx44 inv )
878 {
879 Mtx44 gjm;
880 s32 i, j, k;
881 f32 w;
882
883 ASSERTMSG( (src != 0), MTX44_INVERSE_1 );
884 ASSERTMSG( (inv != 0), MTX44_INVERSE_2 );
885
886 C_MTX44Copy(src, gjm);
887 C_MTX44Identity(inv);
888
889 for ( i = 0 ; i < NUM ; ++i )
890 {
891 f32 max = 0.0f;
892 s32 swp = i;
893
894 // ---- partial pivoting -----
895 for( k = i ; k < NUM ; k++ )
896 {
897 f32 ftmp;
898 ftmp = fabsf(gjm[k][i]);
899 if ( ftmp > max )
900 {
901 max = ftmp;
902 swp = k;
903 }
904 }
905
906 // check singular matrix
907 //(or can't solve inverse matrix with this algorithm)
908 if ( max == 0.0f )
909 {
910 return 0;
911 }
912
913 // swap row
914 if( swp != i )
915 {
916 for ( k = 0 ; k < NUM ; k++ )
917 {
918 SWAPF(gjm[i][k], gjm[swp][k]);
919 SWAPF(inv[i][k], inv[swp][k]);
920 }
921 }
922
923 // ---- pivoting end ----
924
925 w = 1.0F / gjm[i][i];
926 for ( j = 0 ; j < NUM ; ++j )
927 {
928 gjm[i][j] *= w;
929 inv[i][j] *= w;
930 }
931
932 for ( k = 0 ; k < NUM ; ++k )
933 {
934 if ( k == i )
935 continue;
936
937 w = gjm[k][i];
938 for ( j = 0 ; j < NUM ; ++j )
939 {
940 gjm[k][j] -= gjm[i][j] * w;
941 inv[k][j] -= inv[i][j] * w;
942 }
943 }
944 }
945
946 return 1;
947 }
948
949 #undef SWAPF
950 #undef NUM
951
952 /*---------------------------------------------------------------------*
953
954
955 MODEL SECTION
956
957
958 *---------------------------------------------------------------------*/
959
960 /* NOTE: Prototypes for these functions are defined in "mtx44ext.h". */
961
962 /*---------------------------------------------------------------------*
963 Name: MTX44Trans
964
965 Description: sets a translation matrix.
966
967 Arguments: m matrix to be set
968 xT x component of translation.
969 yT y component of translation.
970 zT z component of translation.
971
972 Return: none
973 *---------------------------------------------------------------------*/
974 /*---------------------------------------------------------------------*
975 C version
976 *---------------------------------------------------------------------*/
C_MTX44Trans(Mtx44 m,f32 xT,f32 yT,f32 zT)977 void C_MTX44Trans ( Mtx44 m, f32 xT, f32 yT, f32 zT )
978 {
979 ASSERTMSG( (m != 0), MTX44_TRANS_1 );
980
981 m[0][0] = 1.0f; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = xT;
982 m[1][0] = 0.0f; m[1][1] = 1.0f; m[1][2] = 0.0f; m[1][3] = yT;
983 m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = 1.0f; m[2][3] = zT;
984 m[3][0] = 0.0f; m[3][1] = 0.0f; m[3][2] = 0.0f; m[3][3] = 1.0f;
985 }
986
987 #if !defined(WIN32) && !defined(WIN64)
988 /*---------------------------------------------------------------------*
989 Paired-Single assembler version
990 *---------------------------------------------------------------------*
991 Note that this performs NO error checking.
992 *---------------------------------------------------------------------*/
PSMTX44Trans(Mtx44 m,f32 xT,f32 yT,f32 zT)993 void PSMTX44Trans( Mtx44 m, f32 xT, f32 yT, f32 zT )
994 {
995 f32x2 xT2 = {0.0F, xT};
996 f32x2 yT2 = {0.0F, yT};
997 f32x2 zT2 = {1.0F, zT};
998 __PSQ_ST(m, c10, 0, 0);
999 __PSQ_STX(m, 8, xT2, 0, 0);
1000 __PSQ_STX(m, 16, c01, 0, 0);
1001 __PSQ_STX(m, 24, yT2, 0, 0);
1002 __PSQ_STX(m, 32, c00, 0, 0);
1003 __PSQ_STX(m, 40, zT2, 0, 0);
1004 __PSQ_STX(m, 48, c00, 0, 0);
1005 __PSQ_STX(m, 56, c01, 0, 0);
1006 }
1007 #endif
1008
1009 /*---------------------------------------------------------------------*
1010 Name: MTX44TransApply
1011
1012 Description: This function performs the operation equivalent to
1013 MTXTrans + MTXConcat.
1014
1015 Arguments: src matrix to be operated.
1016 dst resultant matrix from concat.
1017 xT x component of translation.
1018 yT y component of translation.
1019 zT z component of translation.
1020
1021 Return: none
1022 *---------------------------------------------------------------------*/
1023 /*---------------------------------------------------------------------*
1024 C version
1025 *---------------------------------------------------------------------*/
C_MTX44TransApply(MTX_CONST Mtx44 src,Mtx44 dst,f32 xT,f32 yT,f32 zT)1026 void C_MTX44TransApply ( MTX_CONST Mtx44 src, Mtx44 dst, f32 xT, f32 yT, f32 zT )
1027 {
1028 ASSERTMSG( (src != 0), MTX44_TRANSAPPLY_1 );
1029 ASSERTMSG( (dst != 0), MTX44_TRANSAPPLY_1 );
1030
1031 if ( src != dst )
1032 {
1033 dst[0][0] = src[0][0]; dst[0][1] = src[0][1]; dst[0][2] = src[0][2];
1034 dst[1][0] = src[1][0]; dst[1][1] = src[1][1]; dst[1][2] = src[1][2];
1035 dst[2][0] = src[2][0]; dst[2][1] = src[2][1]; dst[2][2] = src[2][2];
1036 dst[3][0] = src[3][0]; dst[3][1] = src[3][1]; dst[3][2] = src[3][2];
1037 dst[3][3] = src[3][3];
1038 }
1039
1040 dst[0][3] = src[0][3] + xT;
1041 dst[1][3] = src[1][3] + yT;
1042 dst[2][3] = src[2][3] + zT;
1043 }
1044
1045 #if !defined(WIN32) && !defined(WIN64)
1046 /*---------------------------------------------------------------------*
1047 Paired-Single assembler version
1048 *---------------------------------------------------------------------*
1049 Note that this performs NO error checking.
1050 *---------------------------------------------------------------------*/
PSMTX44TransApply(MTX_CONST Mtx44 src,Mtx44 dst,f32 xT,f32 yT,f32 zT)1051 void PSMTX44TransApply ( MTX_CONST Mtx44 src, Mtx44 dst, f32 xT, f32 yT, f32 zT )
1052 {
1053 f32x2 fp4, fp5, fp6, fp7, fp8; //fp0, fp1, fp2, fp3,
1054 f32x2 xT2 = {xT, 0.0F};
1055 f32x2 yT2 = {yT, 0.0F};
1056 f32x2 zT2 = {zT, 0.0F};
1057
1058 //psq_l fp4, 0(src), 0, 0;
1059 fp4 = __PSQ_L(src, 0, 0);
1060
1061 //frsp xT, xT; // to make sure xS = single precision
1062 //psq_l fp5, 8(src), 0, 0;
1063 fp5 = __PSQ_LX(src, 8, 0, 0);
1064
1065 //frsp yT, yT; // to make sure yS = single precision
1066 //psq_l fp6, 16(src), 0, 0;
1067 fp6 = __PSQ_LX(src, 16, 0, 0);
1068
1069 //frsp zT, zT; // to make sure zS = single precision
1070 //psq_l fp7, 24(src), 0, 0;
1071 fp7 = __PSQ_LX(src, 24, 0, 0);
1072
1073 //psq_st fp4, 0(dst), 0, 0;
1074 __PSQ_ST(dst, fp4, 0, 0);
1075
1076 //ps_sum1 fp5, xT, fp5, fp5;
1077 fp5 = __PS_SUM1(xT2, fp5, fp5);
1078
1079 //psq_l fp4, 40(src), 0, 0;
1080 fp4 = __PSQ_LX(src, 40, 0, 0);
1081
1082 //psq_st fp6, 16(dst), 0, 0;
1083 __PSQ_STX(dst, 16, fp6, 0, 0);
1084
1085 //ps_sum1 fp7, yT, fp7, fp7;
1086 fp7 = __PS_SUM1(yT2, fp7, fp7);
1087
1088 //psq_l fp8, 32(src), 0, 0;
1089 fp8 = __PSQ_LX(src, 32, 0, 0);
1090
1091 //psq_st fp5, 8(dst), 0, 0;
1092 __PSQ_STX(dst, 8, fp5, 0, 0);
1093
1094 //ps_sum1 fp4, zT, fp4, fp4;
1095 fp4 = __PS_SUM1(zT2, fp4, fp4);
1096
1097 //psq_st fp7, 24(dst), 0, 0;
1098 __PSQ_STX(dst, 24, fp7, 0, 0);
1099
1100 //psq_st fp8, 32(dst), 0, 0;
1101 __PSQ_STX(dst, 32, fp8, 0, 0);
1102
1103 //psq_l fp5, 48(src), 0, 0;
1104 fp5 = __PSQ_LX(src, 48, 0, 0);
1105
1106 //psq_l fp6, 56(src), 0, 0;
1107 fp6 = __PSQ_LX(src, 56, 0, 0);
1108
1109 //psq_st fp4, 40(dst), 0, 0;
1110 __PSQ_STX(dst, 40, fp4, 0, 0);
1111
1112 //psq_st fp5, 48(dst), 0, 0;
1113 __PSQ_STX(dst, 48, fp5, 0, 0);
1114
1115 //psq_st fp6, 56(dst), 0, 0;
1116 __PSQ_STX(dst, 56, fp6, 0, 0);
1117 }
1118 #endif
1119
1120 /*---------------------------------------------------------------------*
1121 Name: MTX44Scale
1122
1123 Description: sets a scaling matrix.
1124
1125 Arguments: m matrix to be set
1126 xS x scale factor.
1127 yS y scale factor.
1128 zS z scale factor.
1129
1130 Return: none
1131 *---------------------------------------------------------------------*/
1132 /*---------------------------------------------------------------------*
1133 C version
1134 *---------------------------------------------------------------------*/
C_MTX44Scale(Mtx44 m,f32 xS,f32 yS,f32 zS)1135 void C_MTX44Scale ( Mtx44 m, f32 xS, f32 yS, f32 zS )
1136 {
1137 ASSERTMSG( (m != 0), MTX44_SCALE_1 );
1138
1139 m[0][0] = xS; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = 0.0f;
1140 m[1][0] = 0.0f; m[1][1] = yS; m[1][2] = 0.0f; m[1][3] = 0.0f;
1141 m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = zS; m[2][3] = 0.0f;
1142 m[3][0] = 0.0f; m[3][1] = 0.0f; m[3][2] = 0.0f; m[3][3] = 1.0f;
1143 }
1144
1145
1146 #if !defined(WIN32) && !defined(WIN64)
1147 /*---------------------------------------------------------------------*
1148 Paired-Single assembler version
1149 *---------------------------------------------------------------------*
1150 Note that this performs NO error checking.
1151 *---------------------------------------------------------------------*/
PSMTX44Scale(Mtx44 m,f32 xS,f32 yS,f32 zS)1152 void PSMTX44Scale( Mtx44 m, f32 xS, f32 yS, f32 zS )
1153 {
1154 f32x2 xS2 = {xS, 0.0F};
1155 f32x2 yS2 = {0.0F, yS};
1156 f32x2 zS2 = {zS, 0.0F};
1157
1158 __PSQ_ST(m, xS2, 0, 0);
1159 __PSQ_STX(m, 8, c00, 0, 0);
1160 __PSQ_STX(m, 16, yS2, 0, 0);
1161 __PSQ_STX(m, 24, c00, 0, 0);
1162 __PSQ_STX(m, 32, c00, 0, 0);
1163 __PSQ_STX(m, 40, zS2, 0, 0);
1164 __PSQ_STX(m, 48, c00, 0, 0);
1165 __PSQ_STX(m, 56, c01, 0, 0);
1166 }
1167 #endif
1168
1169 /*---------------------------------------------------------------------*
1170 Name: MTX44ScaleApply
1171
1172 Description: This function performs the operation equivalent to
1173 MTXScale + MTXConcat
1174
1175 Arguments: src matrix to be operated.
1176 dst resultant matrix from concat.
1177 xS x scale factor.
1178 yS y scale factor.
1179 zS z scale factor.
1180
1181 Return: none
1182 *---------------------------------------------------------------------*/
1183 /*---------------------------------------------------------------------*
1184 C version
1185 *---------------------------------------------------------------------*/
C_MTX44ScaleApply(MTX_CONST Mtx44 src,Mtx44 dst,f32 xS,f32 yS,f32 zS)1186 void C_MTX44ScaleApply ( MTX_CONST Mtx44 src, Mtx44 dst, f32 xS, f32 yS, f32 zS )
1187 {
1188 ASSERTMSG( (src != 0), MTX44_SCALEAPPLY_1 );
1189 ASSERTMSG( (dst != 0), MTX44_SCALEAPPLY_2 );
1190
1191 dst[0][0] = src[0][0] * xS; dst[0][1] = src[0][1] * xS;
1192 dst[0][2] = src[0][2] * xS; dst[0][3] = src[0][3] * xS;
1193
1194 dst[1][0] = src[1][0] * yS; dst[1][1] = src[1][1] * yS;
1195 dst[1][2] = src[1][2] * yS; dst[1][3] = src[1][3] * yS;
1196
1197 dst[2][0] = src[2][0] * zS; dst[2][1] = src[2][1] * zS;
1198 dst[2][2] = src[2][2] * zS; dst[2][3] = src[2][3] * zS;
1199
1200 dst[3][0] = src[3][0] ; dst[3][1] = src[3][1];
1201 dst[3][2] = src[3][2] ; dst[3][3] = src[3][3];
1202 }
1203
1204 #if !defined(WIN32) && !defined(WIN64)
1205 /*---------------------------------------------------------------------*
1206 Paired-Single assembler version
1207 *---------------------------------------------------------------------*
1208 Note that this performs NO error checking.
1209 *---------------------------------------------------------------------*/
1210
PSMTX44ScaleApply(MTX_CONST Mtx44 src,Mtx44 dst,f32 xS,f32 yS,f32 zS)1211 void PSMTX44ScaleApply ( MTX_CONST Mtx44 src, Mtx44 dst, f32 xS, f32 yS, f32 zS )
1212 {
1213 f32x2 fp4, fp5, fp6, fp7, fp8, fp9, fp10, fp11; //fp0, fp1, fp2, fp3,
1214 f32x2 xS2 = {xS, xS};
1215 f32x2 yS2 = {yS, yS};
1216 f32x2 zS2 = {zS, zS};
1217
1218 //psq_l fp4, 0(src), 0, 0; // fp4 <- src00,src01
1219 //fp4[0] = src[0][0];
1220 //fp4[1] = src[0][1];
1221 fp4 = __PSQ_L(src, 0, 0);
1222
1223 //frsp xS, xS; // to make sure xS = single precision
1224 //psq_l fp5, 8(src), 0, 0; // fp5 <- src02,src03
1225 //fp5[0] = src[0][2];
1226 //fp5[1] = src[0][3];
1227 fp5 = __PSQ_LX(src, 8, 0, 0);
1228
1229 //frsp yS, yS; // to make sure yS = single precision
1230 //psq_l fp6, 16(src), 0, 0; // fp6 <- src10,src11
1231 //fp6[0] = src[1][0];
1232 //fp6[1] = src[1][1];
1233 fp6 = __PSQ_LX(src, 16, 0, 0);
1234
1235 //ps_muls0 fp4, fp4, xS; // fp4 <- src00*xS,src01*xS
1236 fp4 = __PS_MULS0(fp4, xS2);
1237
1238 //psq_l fp7, 24(src), 0, 0; // fp7 <- src12,src13
1239 //fp7[0] = src[1][2];
1240 //fp7[1] = src[1][3];
1241 fp7 = __PSQ_LX(src, 24, 0, 0);
1242
1243 //ps_muls0 fp5, fp5, xS; // fp5 <- src02*xS,src03*xS
1244 fp5 = __PS_MULS0(fp5, xS2);
1245
1246 //psq_l fp8, 32(src), 0, 0; // fp8 <- src20,src21
1247 //fp8[0] = src[2][0];
1248 //fp8[1] = src[2][1];
1249 fp8 = __PSQ_LX(src, 32, 0, 0);
1250
1251 //frsp zS, zS; // to make sure zS = single precision
1252 //psq_st fp4, 0(dst), 0, 0; // dst00,dst01
1253 //dst[0][0] = fp4[0];
1254 //dst[0][1] = fp4[1];
1255 __PSQ_ST(dst, fp4, 0, 0);
1256
1257 //ps_muls0 fp6, fp6, yS; // fp6 <- src10*yS,src11*yS
1258 fp6 = __PS_MULS0(fp6, yS2);
1259
1260 //psq_l fp9, 40(src), 0, 0; // fp9 <- src22,src23
1261 //fp9[0] = src[2][2];
1262 //fp9[1] = src[2][3];
1263 fp9 = __PSQ_LX(src, 40, 0, 0);
1264
1265 //psq_st fp5, 8(dst), 0, 0; // dst02,dst03
1266 //dst[0][2] = fp5[0];
1267 //dst[0][3] = fp5[1];
1268 __PSQ_STX(dst, 8, fp5, 0, 0);
1269
1270 //ps_muls0 fp7, fp7, yS; // fp7 <- src12*yS,src13*yS
1271 fp7 = __PS_MULS0(fp7, yS2);
1272
1273 //psq_l fp10, 48(src), 0, 0; // fp10 <- src30src31
1274 //fp10[0] = src[3][0];
1275 //fp10[1] = src[3][1];
1276 fp10 = __PSQ_LX(src, 48, 0, 0);
1277
1278 //psq_st fp6, 16(dst), 0, 0; // dst10,dst11
1279 //dst[1][0] = fp6[0];
1280 //dst[1][1] = fp6[1];
1281 __PSQ_STX(dst, 16, fp6, 0, 0);
1282
1283 //ps_muls0 fp8, fp8, zS; // fp8 <- src20*zS,src21*zS
1284 fp8 = __PS_MULS0(fp8, zS2);
1285
1286 //psq_l fp11, 56(src), 0, 0; // fp11 <- src32,src33
1287 //fp11[0] = src[3][2];
1288 //fp11[1] = src[3][3];
1289 fp11 = __PSQ_LX(src, 56, 0, 0);
1290
1291 //psq_st fp7, 24(dst), 0, 0; // dst12,dst13
1292 //dst[1][2] = fp7[0];
1293 //dst[1][3] = fp7[1];
1294 __PSQ_STX(dst, 24, fp7, 0, 0);
1295
1296 //ps_muls0 fp9, fp9, zS; // fp9 <- src22*zS,src23*zS
1297 fp9 = __PS_MULS0(fp9, zS2);
1298
1299 //psq_st fp8, 32(dst), 0, 0; // dst20,dst21
1300 //dst[2][0] = fp8[0];
1301 //dst[2][1] = fp8[1];
1302 __PSQ_STX(dst, 32, fp8, 0, 0);
1303
1304 //psq_st fp9, 40(dst), 0, 0; // dst22,dst23
1305 //dst[2][2] = fp9[0];
1306 //dst[2][3] = fp9[1];
1307 __PSQ_STX(dst, 40, fp9, 0, 0);
1308
1309 //psq_st fp10, 48(dst), 0, 0; // dst30,dst31
1310 //dst[3][0] = fp10[0];
1311 //dst[3][1] = fp10[1];
1312 __PSQ_STX(dst, 48, fp10, 0, 0);
1313
1314 //psq_st fp11, 56(dst), 0, 0; // dst32,dst33
1315 //dst[3][2] = fp11[0];
1316 //dst[3][3] = fp11[1];
1317 __PSQ_STX(dst, 56, fp11, 0, 0);
1318 }
1319 #endif
1320
1321
1322 /*---------------------------------------------------------------------*
1323 Name: MTX44RotRad
1324
1325 Description: sets a rotation matrix about one of the X, Y or Z axes
1326
1327 Arguments: m matrix to be set
1328 axis major axis about which to rotate.
1329 axis is passed in as a character.
1330 it must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1331 deg rotation angle in radians.
1332 note: counter-clockwise rotation is positive.
1333
1334 Return: none
1335 *---------------------------------------------------------------------*/
1336 /*---------------------------------------------------------------------*
1337 C version
1338 *---------------------------------------------------------------------*/
C_MTX44RotRad(Mtx44 m,char axis,f32 rad)1339 void C_MTX44RotRad ( Mtx44 m, char axis, f32 rad )
1340 {
1341 f32 sinA, cosA;
1342
1343 ASSERTMSG( (m != 0), MTX44_ROTRAD_1 );
1344
1345 // verification of "axis" will occur in MTXRotTrig
1346
1347 sinA = sinf(rad);
1348 cosA = cosf(rad);
1349
1350 C_MTX44RotTrig( m, axis, sinA, cosA );
1351 }
1352
1353 #if !defined(WIN32) && !defined(WIN64)
1354 /*---------------------------------------------------------------------*
1355 Paired-Single assembler version
1356 *---------------------------------------------------------------------*
1357 Note that this performs NO error checking.
1358 *---------------------------------------------------------------------*/
1359
PSMTX44RotRad(Mtx44 m,char axis,f32 rad)1360 void PSMTX44RotRad ( Mtx44 m, char axis, f32 rad )
1361 {
1362 f32 sinA, cosA;
1363
1364 sinA = sinf(rad);
1365 cosA = cosf(rad);
1366
1367 PSMTX44RotTrig( m, axis, sinA, cosA );
1368 }
1369 #endif
1370
1371 /*---------------------------------------------------------------------*
1372 Name: MTX44RotTrig
1373
1374 Arguments: m matrix to be set
1375 axis major axis about which to rotate.
1376 axis is passed in as a character.
1377 It must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1378 sinA sine of rotation angle.
1379 cosA cosine of rotation angle.
1380 note: counter-clockwise rotation is positive.
1381
1382 Return: none
1383 *---------------------------------------------------------------------*/
1384 /*---------------------------------------------------------------------*
1385 C version
1386 *---------------------------------------------------------------------*/
C_MTX44RotTrig(Mtx44 m,char axis,f32 sinA,f32 cosA)1387 void C_MTX44RotTrig ( Mtx44 m, char axis, f32 sinA, f32 cosA )
1388 {
1389 ASSERTMSG( (m != 0), MTX44_ROTTRIG_1 );
1390
1391 axis |= 0x20;
1392 switch(axis)
1393 {
1394
1395 case 'x':
1396 m[0][0] = 1.0f; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = 0.0f;
1397 m[1][0] = 0.0f; m[1][1] = cosA; m[1][2] = -sinA; m[1][3] = 0.0f;
1398 m[2][0] = 0.0f; m[2][1] = sinA; m[2][2] = cosA; m[2][3] = 0.0f;
1399 m[3][0] = 0.0f; m[3][1] = 0.0f; m[3][2] = 0.0f; m[3][3] = 1.0f;
1400 break;
1401
1402 case 'y':
1403 m[0][0] = cosA; m[0][1] = 0.0f; m[0][2] = sinA; m[0][3] = 0.0f;
1404 m[1][0] = 0.0f; m[1][1] = 1.0f; m[1][2] = 0.0f; m[1][3] = 0.0f;
1405 m[2][0] = -sinA; m[2][1] = 0.0f; m[2][2] = cosA; m[2][3] = 0.0f;
1406 m[3][0] = 0.0f; m[3][1] = 0.0f; m[3][2] = 0.0f; m[3][3] = 1.0f;
1407 break;
1408
1409 case 'z':
1410 m[0][0] = cosA; m[0][1] = -sinA; m[0][2] = 0.0f; m[0][3] = 0.0f;
1411 m[1][0] = sinA; m[1][1] = cosA; m[1][2] = 0.0f; m[1][3] = 0.0f;
1412 m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = 1.0f; m[2][3] = 0.0f;
1413 m[3][0] = 0.0f; m[3][1] = 0.0f; m[3][2] = 0.0f; m[3][3] = 1.0f;
1414 break;
1415
1416 default:
1417 ASSERTMSG( 0, MTX44_ROTTRIG_2 );
1418 break;
1419 }
1420 }
1421
1422 #if !defined(WIN32) && !defined(WIN64)
1423 /*---------------------------------------------------------------------*
1424 Paired-Single assembler version
1425 *---------------------------------------------------------------------*
1426 Note that this performs NO error checking.
1427 *---------------------------------------------------------------------*/
PSMTX44RotTrig(Mtx44 m,char axis,f32 sinA,f32 cosA)1428 void PSMTX44RotTrig( Mtx44 m, char axis, f32 sinA, f32 cosA )
1429 {
1430 f32x2 ftmp0, ftmp1, ftmp4; //ftmp2, ftmp3,
1431 f32x2 sinA10 = {sinA, 0.0F};
1432 f32x2 cosA10 = {cosA, 0.0F};
1433
1434 switch(axis)
1435 {
1436 case 'x':
1437 case 'X':
1438 //psq_st c_one, 0(m), 1, 0; // m00 <= 1.0
1439 __PSQ_ST(m, c11, 1, 0);
1440
1441 //psq_st c_zero, 4(m), 0, 0; // m01,m02 <= 0.0,0.0
1442 __PSQ_STX(m, 4, c00, 0, 0);
1443
1444 //ps_neg ftmp0, sinA; // ftmp0 <= -sinA
1445 ftmp0 = __PS_NEG(sinA10);
1446
1447 //psq_st c_zero, 12(m), 0, 0; // m03,m10 <= 0.0,0.0
1448 __PSQ_STX(m, 12, c00, 0, 0);
1449
1450 //ps_merge00 ftmp1, sinA, cosA; // ftmp1 <= sinA,cosA
1451 ftmp1 = __PS_MERGE00(sinA10, cosA10);
1452
1453 //psq_st c_zero, 28(m), 0, 0; // m13,m20 <= 0.0,0.0
1454 __PSQ_STX(m, 12, c00, 0, 0);
1455
1456 //ps_merge00 ftmp0, cosA, ftmp0; // ftmp0 <= cosA,-sinA
1457 ftmp0 = __PS_MERGE00(cosA10, ftmp0);
1458
1459 //psq_st c_zero, 44(m), 0, 0; // m23,m30 <= 0.0,0.0
1460 __PSQ_STX(m, 44, c00, 0, 0);
1461
1462 //psq_st c_zero, 52(m), 0, 0; // m23,m30 <= 0.0,0.0
1463 __PSQ_STX(m, 52, c00, 0, 0);
1464
1465 //psq_st ftmp1, 36(m), 0, 0; // m21,m22 <= sinA,cosA
1466 __PSQ_STX(m, 36, ftmp1, 0, 0);
1467
1468 //psq_st ftmp0, 20(m), 0, 0; // m11,m12 <= cosA,-sinA
1469 __PSQ_STX(m, 20, ftmp0, 0, 0);
1470
1471 //psq_st c_one, 60(m), 1, 0; // m33 <= 0.0
1472 __PSQ_STX(m, 60, c11, 1, 0);
1473
1474 break;
1475
1476 case 'y':
1477 case 'Y':
1478
1479 //psq_st c_zero, 48(m), 0, 0; // m30,m31 <= 0.0,0.0
1480 __PSQ_STX(m, 48, c00, 0, 0);
1481
1482 //ps_neg ftmp0, sinA; // ftmp0 <= -sinA,0.0
1483 ftmp0 = __PS_NEG(sinA10);
1484
1485 //psq_st c_zero, 24(m), 0, 0; // m12,m13 <= 0.0,0.0
1486 __PSQ_STX(m, 24, c00, 0, 0);
1487
1488 //psq_st cosA10, 0(m), 0, 0; // m00,m01 <= cosA,0.0
1489 __PSQ_ST(m, cosA10, 0, 0);
1490
1491 //psq_st c01, 16(m), 0, 0; // m10,m11 <= 0.0,1.0
1492 __PSQ_STX(m, 16, c01, 0, 0);
1493
1494 //psq_st sinA10, 8(m), 0, 0; // m02,m03 <= sinA,0.0
1495 __PSQ_STX(m, 8, sinA10, 0, 0);
1496
1497 //psq_st ftmp0, 32(m), 0, 0; // m20,m21 <= -sinA,0.0
1498 __PSQ_STX(m, 32, ftmp0, 0, 0);
1499
1500 //psq_st cosA10, 40(m), 0, 0; // m22,m23 <= cosA,0.0
1501 __PSQ_STX(m, 40, cosA10, 0, 0);
1502
1503 //psq_st c01, 56(m), 0, 0; // m32,m33 <= 0.0,1.0
1504 __PSQ_STX(m, 56, c01, 0, 0);
1505
1506 break;
1507
1508 case 'z':
1509 case 'Z':
1510 //psq_st c_zero, 8(m), 0, 0; // m02,m03 <= 0.0,0.0
1511 __PSQ_STX(m, 8, c00, 0, 0);
1512
1513 //ps_neg ftmp0, sinA; // ftmp0 <= -sinA
1514 ftmp0 = __PS_NEG(sinA10);
1515
1516 //psq_st c_zero, 24(m), 0, 0; // m12,m13 <= 0.0,0.0
1517 __PSQ_STX(m, 24, c00, 0, 0);
1518
1519 //ps_merge00 ftmp1, sinA, cosA; // ftmp1 <= sinA,cosA
1520 ftmp1 = __PS_MERGE00(sinA10, cosA10);
1521
1522 //psq_st c_zero, 32(m), 0, 0; // m20,m21 <= 0.0,0.0
1523 __PSQ_STX(m, 32, c00, 0, 0);
1524
1525 //psq_st c_zero, 48(m), 0, 0; // m30,m31 <= 0.0,0.0
1526 __PSQ_STX(m, 48, c00, 0, 0);
1527
1528 //psq_st ftmp1, 16(m), 0, 0; // m10,m11 <= sinA,cosA
1529 __PSQ_STX(m, 16, ftmp1, 0, 0);
1530
1531 //ps_merge00 ftmp4, cosA, ftmp0; // ftmp4 <= cosA, -sinA
1532 ftmp4 = __PS_MERGE00(cosA10, ftmp0);
1533
1534 //psq_st ftmp2, 40(m), 0, 0; // m22,m23 <= 1.0,0.0
1535 __PSQ_STX(m, 40, c10, 0, 0);
1536
1537 //psq_st ftmp3, 56(m), 0, 0; // m32,m33 <= 0.0,1.0
1538 __PSQ_STX(m, 56, c01, 0, 0);
1539
1540 //psq_st ftmp4, 0(m), 0, 0; // m00,m00 <= cosA,-sinA
1541 __PSQ_ST(m, ftmp4, 0, 0);
1542
1543 break;
1544
1545 default:
1546 ASSERTMSG( 0, MTX44_ROTTRIG_2 );
1547 break;
1548 }
1549 }
1550 #endif
1551
1552 /*---------------------------------------------------------------------*
1553 Name: C_MTX44RotAxisRad
1554 *---------------------------------------------------------------------*/
1555 /*---------------------------------------------------------------------*
1556 C version
1557 *---------------------------------------------------------------------*/
C_MTX44RotAxisRad(Mtx44 m,const Vec * axis,f32 rad)1558 void C_MTX44RotAxisRad( Mtx44 m, const Vec *axis, f32 rad )
1559 {
1560 Vec vN;
1561 f32 s, c; // sinTheta, cosTheta
1562 f32 t; // ( 1 - cosTheta )
1563 f32 x, y, z; // x, y, z components of normalized axis
1564 f32 xSq, ySq, zSq; // x, y, z squared
1565
1566 ASSERTMSG( (m != 0), MTX44_ROTAXIS_1 );
1567 ASSERTMSG( (axis != 0), MTX44_ROTAXIS_2 );
1568
1569 s = sinf(rad);
1570 c = cosf(rad);
1571 t = 1.0f - c;
1572
1573 C_VECNormalize( axis, &vN );
1574
1575 x = vN.x;
1576 y = vN.y;
1577 z = vN.z;
1578
1579 xSq = x * x;
1580 ySq = y * y;
1581 zSq = z * z;
1582
1583 m[0][0] = ( t * xSq ) + ( c );
1584 m[0][1] = ( t * x * y ) - ( s * z );
1585 m[0][2] = ( t * x * z ) + ( s * y );
1586 m[0][3] = 0.0f;
1587
1588 m[1][0] = ( t * x * y ) + ( s * z );
1589 m[1][1] = ( t * ySq ) + ( c );
1590 m[1][2] = ( t * y * z ) - ( s * x );
1591 m[1][3] = 0.0f;
1592
1593 m[2][0] = ( t * x * z ) - ( s * y );
1594 m[2][1] = ( t * y * z ) + ( s * x );
1595 m[2][2] = ( t * zSq ) + ( c );
1596 m[2][3] = 0.0f;
1597
1598 m[3][0] = 0.0f;
1599 m[3][1] = 0.0f;
1600 m[3][2] = 0.0f;
1601 m[3][3] = 1.0f;
1602 }
1603
1604 #if !defined(WIN32) && !defined(WIN64)
1605 /*---------------------------------------------------------------------*
1606 Paired-Single assembler version
1607 *---------------------------------------------------------------------*
1608 Note that this performs NO error checking.
1609 *---------------------------------------------------------------------*/
1610
__PSMTX44RotAxisRadInternal(Mtx44 m,const Vec * axis,f32 sT,f32 cT)1611 static void __PSMTX44RotAxisRadInternal(
1612 Mtx44 m,
1613 const Vec *axis,
1614 f32 sT,
1615 f32 cT )
1616 {
1617 f32x2 tT2;
1618 f32x2 sT2 = {sT, sT};
1619 f32x2 cT2 = {cT, cT};
1620 f32x2 tmp0, tmp1, tmp2, tmp3, tmp4;
1621 f32x2 tmp5, tmp6, tmp7, tmp8, tmp9;
1622
1623 // tmp0 = [x][y] : LOAD
1624 //psq_l tmp0, 0(axis), 0, 0
1625 //tmp0[0] = axis->x;
1626 //tmp0[1] = axis->y;
1627 tmp0 = __PSQ_LX(axis, 0, 0, 0);
1628
1629 // tmp1 = [z][z] : LOAD
1630 //lfs tmp1, 8(axis)
1631 tmp1[0] = axis->z;
1632 tmp1[1] = axis->z;
1633
1634 // tmp2 = [x*x][y*y]
1635 //ps_mul tmp2, tmp0, tmp0
1636 tmp2 = __PS_MUL(tmp0, tmp0);
1637
1638 // tmp7 = [1.0F]
1639 //fadds tmp7, tmp9, tmp9
1640
1641 // tmp3 = [x*x+z*z][y*y+z*z]
1642 //ps_madd tmp3, tmp1, tmp1, tmp2
1643 tmp3 = __PS_MADD(tmp1, tmp1, tmp2);
1644
1645 // fc0 = [0.0F]
1646 //fsubs fc0, tmp9, tmp9
1647
1648 // tmp4 = [S = x*x+y*y+z*z][z]
1649 //ps_sum0 tmp4, tmp3, tmp1, tmp2
1650 tmp4 = __PS_SUM0(tmp3, tmp1, tmp2);
1651
1652 // tT = 1.0F - cT
1653 //fsubs tT, tmp7, cT
1654 tT2 = __PS_SUB(c11, cT2);
1655
1656 // tmp5 = [1.0/sqrt(S)] :estimation[E]
1657 //frsqrte tmp5, tmp4
1658 tmp5 = __PS_RSQRTE(tmp4);
1659
1660 // tmp7 = [0][1]
1661 //ps_merge00 tmp7, fc0, tmp7
1662 tmp7 = __PS_MERGE00(c00, c11);
1663
1664 // Newton-Rapson refinement step
1665 // E' = E/2(3.0 - E*E*S)
1666 //fmuls tmp2, tmp5, tmp5 // E*E
1667 tmp2 = __PS_MUL(tmp5, tmp5);
1668
1669 //fmuls tmp3, tmp5, tmp9 // E/2
1670 tmp3 = __PS_MUL(tmp5, c0505);
1671
1672 // fc0 [m30=0][m31=0] : STORE
1673 //psq_st fc0, 48(m), 0, 0
1674 //m[3][0] = 0.0F;
1675 //m[3][1] = 0.0F;
1676 __PSQ_STX(m, 48, c00, 0, 0);
1677
1678 //fnmsubs tmp2, tmp2, tmp4, tmp8 // (3-E*E*S)
1679 tmp2 = __PS_NMSUB(tmp2, tmp4, c33);
1680
1681 //fmuls tmp5, tmp2, tmp3 // (E/2)(3-E*E*S)
1682 tmp5 = __PS_MUL(tmp2, tmp3);
1683
1684 // tmp7 [m32=0][m33=1] : STORE
1685 //psq_st tmp7, 56(m), 0, 0
1686 //m[3][2] = 0.0F;
1687 //m[3][3] = 1.0F;
1688 __PSQ_STX(m, 56, tmp7, 0, 0);
1689
1690 // cT = [c][c]
1691 //ps_merge00 cT, cT, cT
1692
1693 // tmp0 = [nx = x/sqrt(S)][ny = y/sqrt(S)]
1694 //ps_muls0 tmp0, tmp0, tmp5
1695 tmp0 = __PS_MULS0(tmp0, tmp5);
1696
1697 // tmp1 = [nz = z/sqrt(S)][nz = z/sqrt(S)]
1698 //ps_muls0 tmp1, tmp1, tmp5
1699 tmp1 = __PS_MULS0(tmp1, tmp5);
1700
1701 // tmp4 = [t*nx][t*ny]
1702 //ps_muls0 tmp4, tmp0, tT
1703 tmp4 = __PS_MULS0(tmp0, tT2);
1704
1705 // tmp9 = [s*nx][s*ny]
1706 //ps_muls0 tmp9, tmp0, sT
1707 tmp9 = __PS_MULS0(tmp0, sT2);
1708
1709 // tmp5 = [t*nz][t*nz]
1710 //ps_muls0 tmp5, tmp1, tT
1711 tmp5 = __PS_MULS0(tmp1, tT2);
1712
1713 // tmp3 = [t*nx*ny][t*ny*ny]
1714 //ps_muls1 tmp3, tmp4, tmp0
1715 tmp3 = __PS_MULS1(tmp4, tmp0);
1716
1717 // tmp2 = [t*nx*nx][t*ny*nx]
1718 //ps_muls0 tmp2, tmp4, tmp0
1719 tmp2 = __PS_MULS0(tmp4, tmp0);
1720
1721 // tmp4 = [t*nx*nz][t*ny*nz]
1722 //ps_muls0 tmp4, tmp4, tmp1
1723 tmp4 = __PS_MULS0(tmp4, tmp1);
1724
1725 // tmp6 = [t*nx*nx-s*nz][t*ny*ny-s*nz]
1726 //fnmsubs tmp6, tmp1, sT, tmp2
1727 tmp6 = __PS_NMSUB(tmp1, sT2, tmp2);
1728
1729 // tmp7 = [t*nx*ny+s*nz][t*ny*ny+s*nz]
1730 //fmadds tmp7, tmp1, sT, tmp3
1731 tmp7 = __PS_MADD(tmp1, sT2, tmp3);
1732
1733 // tmp0 = [-s*nx][-s*ny]
1734 //ps_neg tmp0, tmp9
1735 tmp0 = __PS_NEG(tmp9);
1736
1737 // tmp8 = [t*nx*nz+s*ny][0] == [m02][m03]
1738 //ps_sum0 tmp8, tmp4, fc0, tmp9
1739 tmp8 = __PS_SUM0(tmp4, c00, tmp9);
1740
1741 // tmp2 = [t*nx*nx+c][t*nx*ny-s*nz] == [m00][m01]
1742 //ps_sum0 tmp2, tmp2, tmp6, cT
1743 tmp2 = __PS_SUM0(tmp2, tmp6, cT2);
1744
1745 // tmp3 = [t*nx*ny+s*nz][t*ny*ny+c] == [m10][m11]
1746 //ps_sum1 tmp3, cT, tmp7, tmp3
1747 tmp3 = __PS_SUM1(cT2, tmp7, tmp3);
1748
1749 // tmp6 = [t*ny*nz-s*nx][0] == [m12][m13]
1750 //ps_sum0 tmp6, tmp0, fc0 ,tmp4
1751 tmp6 = __PS_SUM0(tmp0, c00, tmp4);
1752
1753 // tmp8 [m02][m03] : STORE
1754 //psq_st tmp8, 8(m), 0, 0
1755 //m[0][2] = tmp8[0];
1756 //m[0][3] = tmp8[1];
1757 __PSQ_STX(m, 8, tmp8, 0, 0);
1758
1759 // tmp0 = [t*nx*nz-s*ny][t*ny*nz]
1760 //ps_sum0 tmp0, tmp4, tmp4, tmp0
1761 tmp0 = __PS_SUM0(tmp4, tmp4, tmp0);
1762
1763 // tmp2 [m00][m01] : STORE
1764 //psq_st tmp2, 0(m), 0, 0
1765 //m[0][0] = tmp2[0];
1766 //m[0][1] = tmp2[1];
1767 __PSQ_STX(m, 0, tmp2, 0, 0);
1768
1769 // tmp5 = [t*nz*nz][t*nz*nz]
1770 //ps_muls0 tmp5, tmp5, tmp1
1771 tmp5 = __PS_MULS0(tmp5, tmp1);
1772
1773 // tmp3 [m10][m11] : STORE
1774 //psq_st tmp3, 16(m), 0, 0
1775 //m[1][0] = tmp3[0];
1776 //m[1][1] = tmp3[1];
1777 __PSQ_STX(m, 16, tmp3, 0, 0);
1778
1779 // tmp4 = [t*nx*nz-s*ny][t*ny*nz+s*nx] == [m20][m21]
1780 //ps_sum1 tmp4, tmp9, tmp0, tmp4
1781 tmp4 = __PS_SUM1(tmp9, tmp0, tmp4);
1782
1783 // tmp6 [m12][m13] : STORE
1784 //psq_st tmp6, 24(m), 0, 0
1785 //m[1][2] = tmp6[0];
1786 //m[1][3] = tmp6[1];
1787 __PSQ_STX(m, 24, tmp6, 0, 0);
1788
1789 // tmp5 = [t*nz*nz+c][0] == [m22][m23]
1790 //ps_sum0 tmp5, tmp5, fc0, cT
1791 tmp5 = __PS_SUM0(tmp5, c00, cT2);
1792
1793 // tmp4 [m20][m21] : STORE
1794 //psq_st tmp4, 32(m), 0, 0
1795 //m[2][0] = tmp4[0];
1796 //m[2][1] = tmp4[1];
1797 __PSQ_STX(m, 32, tmp4, 0, 0);
1798
1799 // tmp5 [m22][m23] : STORE
1800 //psq_st tmp5, 40(m), 0, 0
1801 //m[2][2] = tmp5[0];
1802 //m[2][3] = tmp5[1];
1803 __PSQ_STX(m, 40, tmp5, 0, 0);
1804
1805 }
1806
PSMTX44RotAxisRad(Mtx44 m,const Vec * axis,f32 rad)1807 void PSMTX44RotAxisRad( Mtx44 m, const Vec *axis, f32 rad )
1808 {
1809 f32 sinT, cosT;
1810
1811 sinT = sinf(rad);
1812 cosT = cosf(rad);
1813
1814 __PSMTX44RotAxisRadInternal(m, axis, sinT, cosT);
1815 }
1816 #endif
1817
1818
1819 /*---------------------------------------------------------------------------*
1820 MATRIX CONVERSION
1821 *---------------------------------------------------------------------------*/
C_MTX34To44(MTX_CONST Mtx src,Mtx44 dst)1822 void C_MTX34To44 ( MTX_CONST Mtx src, Mtx44 dst)
1823 {
1824 dst[0][0] = src[0][0]; dst[0][1] = src[0][1]; dst[0][2] = src[0][2]; dst[0][3] = src[0][3];
1825 dst[1][0] = src[1][0]; dst[1][1] = src[1][1]; dst[1][2] = src[1][2]; dst[1][3] = src[1][3];
1826 dst[2][0] = src[2][0]; dst[2][1] = src[2][1]; dst[2][2] = src[2][2]; dst[2][3] = src[2][3];
1827 dst[3][0] = 0.0f; dst[3][1] = 0.0f; dst[3][2] = 0.0f; dst[3][3] = 1.0f;
1828 }
1829
1830
1831 #if !defined(WIN32) && !defined(WIN64)
1832 /*---------------------------------------------------------------------*
1833 Paired-Single assembler version
1834 *---------------------------------------------------------------------*
1835 Note that this performs NO error checking.
1836 *---------------------------------------------------------------------*/
PSMTX34To44(MTX_CONST Mtx src,Mtx44 dst)1837 void PSMTX34To44( MTX_CONST Mtx src, Mtx44 dst )
1838 {
1839 f32x2 fp1;
1840
1841 //psq_l fp1, 0(src), 0, 0;
1842 fp1 = __PSQ_L(src, 0, 0);
1843
1844 //psq_st fp1, 0(dst), 0, 0;
1845 __PSQ_ST(dst, fp1, 0, 0);
1846
1847 //psq_l fp1, 8(src), 0, 0;
1848 fp1 = __PSQ_LX(src, 8, 0, 0);
1849
1850 //psq_st fp1, 8(dst), 0, 0;
1851 __PSQ_STX(dst, 8, fp1, 0, 0);
1852
1853 //psq_l fp1, 16(src), 0, 0;
1854 fp1 = __PSQ_LX(src, 16, 0, 0);
1855
1856 //psq_st fp1, 16(dst), 0, 0;
1857 __PSQ_STX(dst, 16, fp1, 0, 0);
1858
1859 //psq_l fp1, 24(src), 0, 0;
1860 fp1 = __PSQ_LX(src, 24, 0, 0);
1861
1862 //psq_st fp1, 24(dst), 0, 0;
1863 __PSQ_STX(dst, 24, fp1, 0, 0);
1864
1865 //psq_l fp1, 32(src), 0, 0;
1866 fp1 = __PSQ_LX(src, 32, 0, 0);
1867
1868 //psq_st fp1, 32(dst), 0, 0;
1869 __PSQ_STX(dst, 32, fp1, 0, 0);
1870
1871 //psq_l fp1, 40(src), 0, 0;
1872 fp1 = __PSQ_LX(src, 40, 0, 0);
1873
1874 //psq_st fp1, 40(dst), 0, 0;
1875 __PSQ_STX(dst, 40, fp1, 0, 0);
1876
1877 //psq_st c00, 48(dst), 0, 0;
1878 __PSQ_STX(dst, 48, c00, 0, 0);
1879
1880 //psq_st c01, 56(dst), 0, 0;
1881 __PSQ_STX(dst, 56, c01, 0, 0);
1882 }
1883
1884 /*===========================================================================*/
1885
1886
1887 extern void _ASM_MTX44RotAxisRadInternal(Mtx m, const Vec *axis, f32 sT, f32 cT);
1888
ASM_MTX44RotAxisRad(Mtx44 m,const Vec * axis,f32 rad)1889 void ASM_MTX44RotAxisRad(Mtx44 m, const Vec *axis, f32 rad) {
1890 f32 sinT, cosT;
1891
1892 sinT = sinf(rad);
1893 cosT = cosf(rad);
1894
1895 _ASM_MTX44RotAxisRadInternal(m, axis, sinT, cosT);
1896 }
1897
ASM_MTX44RotRad(Mtx44 m,char axis,f32 rad)1898 void ASM_MTX44RotRad ( Mtx44 m, char axis, f32 rad )
1899 {
1900 f32 sinA, cosA;
1901
1902 sinA = sinf(rad);
1903 cosA = cosf(rad);
1904
1905 ASM_MTX44RotTrig( m, axis, sinA, cosA );
1906 }
1907 #endif
1908