1 /*---------------------------------------------------------------------------*
2 Project: matrix vector Library
3 File: mtx.c
4
5 Copyright 1998-2006 Nintendo. All rights reserved.
6
7 These coded instructions, statements, and computer programs contain
8 proprietary information of Nintendo of America Inc. and/or Nintendo
9 Company Ltd., and are protected by Federal copyright law. They may
10 not be disclosed to third parties or copied or duplicated in any form,
11 in whole or in part, without the prior written consent of Nintendo.
12
13
14 $Log: mtx.c,v $
15 Revision 1.3 02/20/2006 04:25:42 mitu
16 changed include path from dolphin/ to revolution/.
17
18 Revision 1.2 12/30/2005 06:01:51 hirose
19 Temporary workaround for CW3.0 Alpha3 problem.
20
21
22 30 02/06/26 13:49 Hirose
23 Fixed a register allocation problem with DEBUG build.
24
25 29 02/06/20 11:21 Hirose
26 Added MTXConcatArray.
27
28 28 02/04/11 13:10 Hirose
29 const type specifier support. (worked by Hiratsu@IRD)
30
31 27 02/02/18 9:27 Hirose
32 Workaround for floating-point precision mismatch issue.
33
34 26 9/13/01 4:28p Hirose
35 Removed unnecessary possibilities of zero division exception.
36
37 25 8/28/01 3:48p Hirose
38 Removed one refinement process in the calculation of square root.
39
40 24 7/30/01 10:21p Hirose
41 Changes for function definitions.
42
43 23 7/23/01 8:45p Hirose
44 Now all model matrix functions and matrix-vector functions have PS
45 version.
46
47 22 7/07/01 7:37p Hirose
48 added PS versions of MTXTrans* / MTXScale* functions.
49
50 21 3/29/01 8:23p Hirose
51 fixed assertion messages
52
53 20 3/29/01 3:06p Hirose
54 added MTXInvXpose
55
56 19 3/16/01 11:43p Hirose
57 added PSMTXInverse
58
59 18 2/22/01 11:58p Hirose
60 Moved some sections into other files.
61 Added some new Paired-Single functions. Etc.
62
63 17 7/31/00 2:07p Carl
64 Removed redundant verify in MTXRotRad.
65 Minor potential optimization in VECNormalize.
66
67 16 7/21/00 4:19p Carl
68 Removed as many divides as possible.
69
70 15 7/21/00 2:58p Carl
71 Removed unnecessary VECNormalize in MTXLookAt.
72
73 14 7/12/00 4:41p John
74 Substitutes MTXConcat and MTXMultVecArray with their paired-singles
75 equivalent for Gekko non-debug builds.
76
77 13 7/07/00 7:09p Dante
78 PC Compatibility
79
80 12 5/10/00 1:50p Hirose
81 added radian-based rotation matrix functions
82 moved paired-single matrix stuff into an another source file
83
84 11 4/13/00 11:26a Danm
85 Restore high performance version of ROMulVecArray.
86
87 10 4/10/00 11:56a Danm
88 Added 2 matrix skinning support.
89
90 9 3/27/00 6:38p Carl
91 Fixed other comments.
92
93 8 3/27/00 6:20p Carl
94 Changed comment regarding z scale in MTXFrustum.
95
96 7 3/22/00 2:01p John
97 Added VECSquareDistance and VECDistance.
98
99 6 2/14/00 2:04p Mikepc
100 de-tabbed code.
101
102 5 2/11/00 4:22p Tian
103 Cleaned up reordered paired single code
104
105 4 1/27/00 5:21p Tian
106 Documented paired single ops.
107
108 3 1/27/00 4:56p Tian
109 Implemented fast paired single matrix ops : PSMTXReorder, PSMTXConcat,
110 PSMTXROMultVecArray, PSMTXMultVecArray, PSMTXMultS16VecArray,
111 PSMTXROMultS16VecArray
112
113 2 12/06/99 12:40p Alligator
114 modify proj mtx to scale/translate z to (0, w) range for hardware
115
116 20 11/17/99 4:32p Ryan
117 update to fin win32 compiler warnings
118
119 19 11/12/99 12:08p Mikepc
120 changed #include<dolphin/mtx/mtxAssert.h> to #include"mtxAssert.h" to
121 reflect new location of mtxAssert.h file.
122
123 18 11/10/99 7:21p Hirose
124 added singular case check for VECHalfAngle
125
126 17 11/10/99 4:40p Alligator
127 added MTXReflect
128
129 16 10/06/99 12:28p Yasu
130 Appended MTXMultVecSR and MTXMultVecArraySR
131
132 15 8/31/99 5:41p Shiki
133 Revised to use sqrtf() instead of sqrt().
134
135 14 8/31/99 5:18p Shiki
136 Revised to use sinf()/cosf() instead of sin()/cos().
137
138 13 7/28/99 3:38p Ryan
139
140 12 7/28/99 11:30a Ryan
141 Added Texture Projection functions
142
143 11 7/02/99 12:53p Mikepc
144 changed mtx row/col
145
146 $NoKeywords: $
147 *---------------------------------------------------------------------------*/
148 #ifdef WIN32
149 #include <win32/win32.h>
150 #endif
151
152 #include <math.h>
153 #include <revolution/mtx.h>
154 #include "mtxAssert.h"
155
156 #if ( __MWERKS__ == 0x00004100 )
157 #pragma defer_codegen on // This will be removed when compiler is fixed.
158 #endif
159
160 /*---------------------------------------------------------------------*
161 Constants
162 *---------------------------------------------------------------------*/
163 static f32 Unit01[] = { 0.0f, 1.0f };
164
165
166 /*---------------------------------------------------------------------*
167
168
169
170
171 GENERAL SECTION
172
173
174
175
176 *---------------------------------------------------------------------*/
177
178
179 /*---------------------------------------------------------------------*
180
181 Name: MTXIdentity
182
183 Description: sets a matrix to identity
184
185 Arguments: m : matrix to be set
186
187 Return : none
188
189 *---------------------------------------------------------------------*/
190 /*---------------------------------------------------------------------*
191 C version
192 *---------------------------------------------------------------------*/
C_MTXIdentity(Mtx m)193 void C_MTXIdentity ( Mtx m )
194 {
195
196 ASSERTMSG( (m != 0), MTX_IDENTITY_1 );
197
198
199 m[0][0] = 1.0f; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = 0.0f;
200
201 m[1][0] = 0.0f; m[1][1] = 1.0f; m[1][2] = 0.0f; m[1][3] = 0.0f;
202
203 m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = 1.0f; m[2][3] = 0.0f;
204
205 }
206
207 /*---------------------------------------------------------------------*
208 Paired-Single assembler version
209 *---------------------------------------------------------------------*
210 Note that this performs NO error checking.
211 Actually there is not much performance advantage.
212 *---------------------------------------------------------------------*/
213 #ifdef GEKKO
PSMTXIdentity(register Mtx m)214 void PSMTXIdentity
215 (
216 register Mtx m // r3
217 )
218 {
219 register f32 c_zero = 0.0F; // { 0.0F, 0.0F };
220 register f32 c_one = 1.0F; // { 1.0.0F, 1.0.0F };
221 register f32 c_01;
222 register f32 c_10;
223
224 asm
225 {
226 psq_st c_zero, 8(m), 0, 0 // m[0][2], m[0][3]
227 ps_merge01 c_01, c_zero, c_one // { 0.1F, 1.0F }
228 psq_st c_zero, 24(m), 0, 0 // m[1][2], m[1][3]
229 ps_merge10 c_10, c_one, c_zero // fp2 = { 1.0F, 0.0F }
230 psq_st c_zero, 32(m), 0, 0 // m[2][0], m[2][1]
231 psq_st c_01, 16(m), 0, 0 // m[1][0], m[1][1]
232 psq_st c_10, 0(m), 0, 0 // m[0][0], m[0][1]
233 psq_st c_10, 40(m), 0, 0 // m[2][2], m[2][3]
234 }
235 }
236 #endif // GEKKO
237
238 /*---------------------------------------------------------------------*
239
240 Name: MTXCopy
241
242 Description: copies the contents of one matrix into another
243
244 Arguments: src source matrix for copy
245 dst destination matrix for copy
246
247
248 Return : none
249
250 *---------------------------------------------------------------------*/
251 /*---------------------------------------------------------------------*
252 C version
253 *---------------------------------------------------------------------*/
C_MTXCopy(const Mtx src,Mtx dst)254 void C_MTXCopy ( const Mtx src, Mtx dst )
255 {
256
257 ASSERTMSG( (src != 0) , MTX_COPY_1 );
258 ASSERTMSG( (dst != 0) , MTX_COPY_2 );
259
260 if( src == dst )
261 {
262 return;
263 }
264
265
266 dst[0][0] = src[0][0]; dst[0][1] = src[0][1]; dst[0][2] = src[0][2]; dst[0][3] = src[0][3];
267
268 dst[1][0] = src[1][0]; dst[1][1] = src[1][1]; dst[1][2] = src[1][2]; dst[1][3] = src[1][3];
269
270 dst[2][0] = src[2][0]; dst[2][1] = src[2][1]; dst[2][2] = src[2][2]; dst[2][3] = src[2][3];
271
272 }
273
274 /*---------------------------------------------------------------------*
275 Paired-Single assembler version
276 *---------------------------------------------------------------------*
277 Note that this performs NO error checking.
278 *---------------------------------------------------------------------*/
279 #ifdef GEKKO
PSMTXCopy(const register Mtx src,register Mtx dst)280 asm void PSMTXCopy
281 (
282 const register Mtx src, // r3
283 register Mtx dst // r4
284 )
285 {
286 nofralloc
287
288 psq_l fp0, 0(src), 0, 0
289 psq_st fp0, 0(dst), 0, 0
290 psq_l fp1, 8(src), 0, 0
291 psq_st fp1, 8(dst), 0, 0
292 psq_l fp2, 16(src), 0, 0
293 psq_st fp2, 16(dst), 0, 0
294 psq_l fp3, 24(src), 0, 0
295 psq_st fp3, 24(dst), 0, 0
296 psq_l fp4, 32(src), 0, 0
297 psq_st fp4, 32(dst), 0, 0
298 psq_l fp5, 40(src), 0, 0
299 psq_st fp5, 40(dst), 0, 0
300
301 blr
302 }
303 #endif // GEKKO
304
305 /*---------------------------------------------------------------------*
306
307 Name: MTXConcat
308
309 Description: concatenates two matrices.
310 order of operation is A x B = AB.
311 ok for any of ab == a == b.
312
313 saves a MTXCopy operation if ab != to a or b.
314
315 Arguments: a first matrix for concat.
316 b second matrix for concat.
317 ab resultant matrix from concat.
318
319 Return : none
320
321 *---------------------------------------------------------------------*/
322
323 /*---------------------------------------------------------------------*
324 C version
325 *---------------------------------------------------------------------*/
C_MTXConcat(const Mtx a,const Mtx b,Mtx ab)326 void C_MTXConcat ( const Mtx a, const Mtx b, Mtx ab )
327 {
328 Mtx mTmp;
329 MtxPtr m;
330
331 ASSERTMSG( (a != 0), MTX_CONCAT_1 );
332 ASSERTMSG( (b != 0), MTX_CONCAT_2 );
333 ASSERTMSG( (ab != 0), MTX_CONCAT_3 );
334
335 if( (ab == a) || (ab == b) )
336 {
337 m = mTmp;
338 }
339
340 else
341 {
342 m = ab;
343 }
344
345 // compute (a x b) -> m
346
347 m[0][0] = a[0][0]*b[0][0] + a[0][1]*b[1][0] + a[0][2]*b[2][0];
348 m[0][1] = a[0][0]*b[0][1] + a[0][1]*b[1][1] + a[0][2]*b[2][1];
349 m[0][2] = a[0][0]*b[0][2] + a[0][1]*b[1][2] + a[0][2]*b[2][2];
350 m[0][3] = a[0][0]*b[0][3] + a[0][1]*b[1][3] + a[0][2]*b[2][3] + a[0][3];
351
352 m[1][0] = a[1][0]*b[0][0] + a[1][1]*b[1][0] + a[1][2]*b[2][0];
353 m[1][1] = a[1][0]*b[0][1] + a[1][1]*b[1][1] + a[1][2]*b[2][1];
354 m[1][2] = a[1][0]*b[0][2] + a[1][1]*b[1][2] + a[1][2]*b[2][2];
355 m[1][3] = a[1][0]*b[0][3] + a[1][1]*b[1][3] + a[1][2]*b[2][3] + a[1][3];
356
357 m[2][0] = a[2][0]*b[0][0] + a[2][1]*b[1][0] + a[2][2]*b[2][0];
358 m[2][1] = a[2][0]*b[0][1] + a[2][1]*b[1][1] + a[2][2]*b[2][1];
359 m[2][2] = a[2][0]*b[0][2] + a[2][1]*b[1][2] + a[2][2]*b[2][2];
360 m[2][3] = a[2][0]*b[0][3] + a[2][1]*b[1][3] + a[2][2]*b[2][3] + a[2][3];
361
362
363 // overwrite a or b if needed
364 if(m == mTmp)
365 {
366 C_MTXCopy( mTmp, ab );
367 }
368 }
369
370 /*---------------------------------------------------------------------*
371 Paired-Single assembler version
372 *---------------------------------------------------------------------*
373 Note that this performs NO error checking.
374 *---------------------------------------------------------------------*/
375 #ifdef GEKKO
PSMTXConcat(const register Mtx mA,const register Mtx mB,register Mtx mAB)376 asm void PSMTXConcat
377 (
378 const register Mtx mA, // r3
379 const register Mtx mB, // r4
380 register Mtx mAB // r5
381 )
382 {
383 nofralloc
384
385 #define A00_A01 fp0
386 #define A02_A03 fp1
387 #define A10_A11 fp2
388 #define A12_A13 fp3
389 #define A20_A21 fp4
390 #define A22_A23 fp5
391
392 #define B00_B01 fp6
393 #define B02_B03 fp7
394 #define B10_B11 fp8
395 #define B12_B13 fp9
396 #define B20_B21 fp10
397 #define B22_B23 fp11
398
399 #define D00_D01 fp12
400 #define D02_D03 fp13
401 #define D10_D11 fp14
402 #define D12_D13 fp15
403 #define D20_D21 fp2
404 #define D22_D23 fp0
405
406 #define UNIT01 fp31
407
408 // don't save LR since we don't make any function calls
409 // mflr r0
410 // stw r0, 4(r1)
411 stwu r1, -64(r1)
412 psq_l A00_A01, 0(mA), 0, 0
413 stfd fp14, 8(r1)
414 psq_l B00_B01, 0(mB), 0, 0
415 addis r6, 0, Unit01@ha
416 psq_l B02_B03, 8(mB), 0, 0
417 stfd fp15, 16(r1)
418 addi r6, r6, Unit01@l
419 stfd fp31, 40(r1)
420 psq_l B10_B11, 16(mB), 0, 0
421 // D00_D01 = b00a00 , b01a00
422 ps_muls0 D00_D01, B00_B01, A00_A01
423 psq_l A10_A11, 16(mA), 0, 0
424 // D02_D03 = b02a00 , b03a00
425 ps_muls0 D02_D03, B02_B03, A00_A01
426 psq_l UNIT01, 0(r6), 0, 0
427 // D10_D11 = a10b00 , a10b01
428 ps_muls0 D10_D11, B00_B01, A10_A11
429 psq_l B12_B13, 24(mB), 0, 0
430 // D12_D13 = a10b02 , a10b03
431 ps_muls0 D12_D13, B02_B03, A10_A11
432 psq_l A02_A03, 8(mA), 0, 0
433 // fp12 = b10a01 + b00a00 , b11a01 + b01a00
434 ps_madds1 D00_D01, B10_B11, A00_A01, D00_D01
435 psq_l A12_A13, 24(mA), 0, 0
436 // D10_D11 = a10b00 + a11b10 , a10b01 + a11b11
437 ps_madds1 D10_D11, B10_B11, A10_A11, D10_D11
438 psq_l B20_B21, 32(mB), 0, 0
439 // D02_D03 = b12a01 + b02a00 , b13a01 + b03a00
440 ps_madds1 D02_D03, B12_B13, A00_A01, D02_D03 // YYY LAST TIME FP0 IS USED
441 psq_l B22_B23, 40(mB), 0, 0
442 // D12_D13 = a10b02 + a11b12, a10b03+a11b13
443 ps_madds1 D12_D13, B12_B13, A10_A11, D12_D13 // YYY LAST TIME FP2 IS USED
444 psq_l A20_A21, 32(mA), 0, 0
445 psq_l A22_A23, 40(mA), 0, 0
446 // D00_D01 = b20a02 + b10a01 + b00a00 , b21a02 + b11a01 + b01a00
447 ps_madds0 D00_D01, B20_B21, A02_A03, D00_D01 // m00, m01 computed
448 // D02_D03 = b12a01 + b02a00 + b22a02 , b13a01 + b03a00 + b23a02
449 ps_madds0 D02_D03, B22_B23, A02_A03, D02_D03
450 // D10_D11 = a10b00 + a11b10 +a12b20, a10b01 + a11b11 + a12b21
451 ps_madds0 D10_D11, B20_B21, A12_A13, D10_D11 // m10, m11 computed
452 // D12_D13 = a10b02 + a11b12 + a12b22, a10b03+a11b13 + a12b23 + a13
453 ps_madds0 D12_D13, B22_B23, A12_A13, D12_D13
454
455
456 // store m00m01
457 psq_st D00_D01, 0(mAB), 0, 0 // YYY LAST TIME FP12 IS USED
458
459 // D20_D21 = a20b00, a20b01
460 ps_muls0 D20_D21, B00_B01, A20_A21 // YYY LAST TIME FP6 IS USED
461 // get a03 from fp1 and add to D02_D03
462 ps_madds1 D02_D03, UNIT01, A02_A03, D02_D03 // m02, m03 computed
463 // YYY LAST TIME FP1 IS USED
464 // D22_D23 = a20b02, a20b03
465 ps_muls0 D22_D23, B02_B03, A20_A21 // YYY LAST TIME FP7 IS USED
466 // store m10m11
467 psq_st D10_D11, 16(mAB), 0, 0
468 // get a13 from fp3 and add to D12_D13
469 ps_madds1 D12_D13, UNIT01, A12_A13, D12_D13 // m12, m13 computed
470 // store m02m03
471 psq_st D02_D03, 8(mAB), 0, 0 // YYY LAST TIME D02_D03 IS USED
472
473 // D20_D21 = a20b00 + a21b10, a20b01 + a21b11
474 ps_madds1 D20_D21, B10_B11, A20_A21, D20_D21 // YYY LAST TIME FP8 IS USED
475 // D22_D23 = a20b02 + a21b12, a20b03 + a21b13
476 ps_madds1 D22_D23, B12_B13, A20_A21, D22_D23
477 // D20_D21 = a20b00 + a21b10 + a22b20, a20b01 + a21b11 + a22b21
478 ps_madds0 D20_D21, B20_B21, A22_A23, D20_D21
479 // Restore fp14
480 lfd fp14, 8(r1) // D10_D11
481 // store m12m13
482 psq_st D12_D13, 24(mAB), 0, 0
483 // D22_D23 = a20b02 + a21b12 + a22b22, a20b03 + a21b13 + a22b23 + a23
484 ps_madds0 D22_D23, B22_B23, A22_A23, D22_D23
485 // store m20m21
486 psq_st D20_D21, 32(mAB), 0, 0
487 // get a23 from fp5 and add to fp17
488 ps_madds1 D22_D23, UNIT01, A22_A23, D22_D23
489 // restore stack frame
490 lfd fp15, 16(r1) // D12_D13
491 // store m22m23
492 psq_st D22_D23, 40(mAB), 0, 0
493
494 lfd fp31, 40(r1)
495 addi r1, r1, 64
496
497
498 blr
499
500
501 #undef A00_A01
502 #undef A02_A03
503 #undef A10_A11
504 #undef A12_A13
505 #undef A20_A21
506 #undef A22_A23
507
508 #undef B00_B01
509 #undef B02_B03
510 #undef B10_B11
511 #undef B12_B13
512 #undef B20_B21
513 #undef B22_B23
514
515 #undef D00_D01
516 #undef D02_D03
517 #undef D10_D11
518 #undef D12_D13
519 #undef D20_D21
520 #undef D22_D23
521
522 #undef UNIT01
523
524
525 /*
526 / m[0][0] = a[0][0]*b[0][0] + a[0][1]*b[1][0] + a[0][2]*b[2][0];
527 fp12m[0][1] = a[0][0]*b[0][1] + a[0][1]*b[1][1] + a[0][2]*b[2][1];
528 / m[0][2] = a[0][0]*b[0][2] + a[0][1]*b[1][2] + a[0][2]*b[2][2];
529 fp13m[0][3] = a[0][0]*b[0][3] + a[0][1]*b[1][3] + a[0][2]*b[2][3] + a[0][3];
530
531 / m[1][0] = a[1][0]*b[0][0] + a[1][1]*b[1][0] + a[1][2]*b[2][0];
532 fp14m[1][1] = a[1][0]*b[0][1] + a[1][1]*b[1][1] + a[1][2]*b[2][1];
533 / m[1][2] = a[1][0]*b[0][2] + a[1][1]*b[1][2] + a[1][2]*b[2][2];
534 fp15m[1][3] = a[1][0]*b[0][3] + a[1][1]*b[1][3] + a[1][2]*b[2][3] + a[1][3];
535
536 / m[2][0] = a[2][0]*b[0][0] + a[2][1]*b[1][0] + a[2][2]*b[2][0];
537 fp16m[2][1] = a[2][0]*b[0][1] + a[2][1]*b[1][1] + a[2][2]*b[2][1];
538 / m[2][2] = a[2][0]*b[0][2] + a[2][1]*b[1][2] + a[2][2]*b[2][2];
539 fp17m[2][3] = a[2][0]*b[0][3] + a[2][1]*b[1][3] + a[2][2]*b[2][3] + a[2][3];
540 OPTIMIZATIONS :
541 fp0 instead of fp17
542 fp2 instead of fp16
543 psq_l fp0, 0(mA), 0, 0 // a[0][0], a[0][1]
544 psq_l fp1, 8(mA), 0, 0 // a[0][2], a[0][3]
545 psq_l fp2, 16(mA), 0, 0 // a[1][0], a[1][1]
546 psq_l fp3, 24(mA), 0, 0 // a[1][2], a[1][3]
547 psq_l fp4, 32(mA), 0, 0 // a[2][0], a[2][1]
548 psq_l fp5, 40(mA), 0, 0 // a[2][2], a[2][3]
549
550 psq_l fp6, 0(mB), 0, 0 // b[0][0], b[0][1]
551 psq_l fp7, 8(mB), 0, 0 // b[0][2], b[0][3]
552 psq_l fp8, 16(mB), 0, 0 // b[1][0], b[1][1]
553 psq_l fp9, 24(mB), 0, 0 // b[1][2], b[1][3]
554 psq_l fp10, 32(mB), 0, 0 // b[2][0], b[2][1]
555 psq_l fp11, 40(mB), 0, 0 // b[2][2], b[2][3]
556
557 */
558 }
559 #endif // GEKKO
560
561
562 /*---------------------------------------------------------------------*
563
564 Name: MTXConcatArray
565
566 Description: concatenates a matrix to an array of matrices.
567 order of operation is A x B(array) = AB(array).
568
569 Arguments: a first matrix for concat.
570 srcBase array base of second matrix for concat.
571 dstBase array base of resultant matrix from concat.
572 count number of matrices in srcBase, dstBase arrays.
573
574 note: cannot check for array overflow
575
576 Return : none
577
578 *---------------------------------------------------------------------*/
579
580 /*---------------------------------------------------------------------*
581 C version
582 *---------------------------------------------------------------------*/
C_MTXConcatArray(const Mtx a,const Mtx * srcBase,Mtx * dstBase,u32 count)583 void C_MTXConcatArray ( const Mtx a, const Mtx* srcBase, Mtx* dstBase, u32 count )
584 {
585 u32 i;
586
587 ASSERTMSG( (a != 0), "MTXConcatArray(): NULL MtxPtr 'a' " );
588 ASSERTMSG( (srcBase != 0), "MTXConcatArray(): NULL MtxPtr 'srcBase' " );
589 ASSERTMSG( (dstBase != 0), "MTXConcatArray(): NULL MtxPtr 'dstBase' " );
590 ASSERTMSG( (count > 1), "MTXConcatArray(): count must be greater than 1." );
591
592 for ( i = 0 ; i < count ; i++ )
593 {
594 C_MTXConcat(a, *srcBase, *dstBase);
595
596 srcBase++;
597 dstBase++;
598 }
599 }
600
601 /*---------------------------------------------------------------------*
602 Paired-Single assembler version
603 *---------------------------------------------------------------------*
604 Note that this performs NO error checking.
605 *---------------------------------------------------------------------*/
606 #ifdef GEKKO
607
608 // This code can be compiled only with register allocation optimization.
609 #if ( defined(__MWERKS__) && defined(_DEBUG) )
610 #pragma global_optimizer on
611 #pragma optimization_level 1
612 #endif
613
PSMTXConcatArray(const register Mtx a,const register Mtx * srcBase,register Mtx * dstBase,register u32 count)614 void PSMTXConcatArray (
615 const register Mtx a,
616 const register Mtx* srcBase,
617 register Mtx* dstBase,
618 register u32 count )
619 {
620 register f32 va0, va1, va2, va3, va4, va5;
621 register f32 vb0, vb1, vb2, vb3, vb4, vb5;
622 register f32 vd0, vd1, vd2, vd3, vd4, vd5;
623 register f32 u01;
624 register f32* u01Ptr = Unit01;
625
626 asm
627 {
628 // [a00][a01]
629 psq_l va0, 0(a), 0, 0
630 // [a02][a03]
631 psq_l va1, 8(a), 0, 0
632 // [a10][a11]
633 psq_l va2, 16(a), 0, 0
634 // [a12][a13]
635 psq_l va3, 24(a), 0, 0
636 // count--
637 subi count, count, 1
638 // [a20][a21]
639 psq_l va4, 32(a), 0, 0
640 // [a22][a23]
641 psq_l va5, 40(a), 0, 0
642 // Loop count
643 mtctr count
644 // [0][1]
645 psq_l u01, 0(u01Ptr), 0, 0
646
647 //---------------------------------
648 // [b00][b01]
649 psq_l vb0, 0(srcBase), 0, 0
650 // [b10][b11]
651 psq_l vb2, 16(srcBase), 0, 0
652
653 // [a00*b00][a00*b01]
654 ps_muls0 vd0, vb0, va0
655 // [a10*b00][a10*b01]
656 ps_muls0 vd2, vb0, va2
657 // [a20*b00][a20*b01]
658 ps_muls0 vd4, vb0, va4
659
660 // [b20][b21]
661 psq_l vb4, 32(srcBase), 0, 0
662
663 // [a00*b00 + a01*b10][a00*b01 + a01*b11]
664 ps_madds1 vd0, vb2, va0, vd0
665 // [a10*b00 + a11*b10][a10*b01 + a11*b11]
666 ps_madds1 vd2, vb2, va2, vd2
667 // [a20*b00 + a21*b10][a20*b01 + a21*b11]
668 ps_madds1 vd4, vb2, va4, vd4
669
670 // [b02][b03]
671 psq_l vb1, 8(srcBase), 0, 0
672
673 // [a00*b00 + a01*b10 + a02*b20][a00*b01 + a01*b11 + a02*b21]
674 ps_madds0 vd0, vb4, va1, vd0
675 // [a10*b00 + a11*b10 + a12*b20][a10*b01 + a11*b11 + a12*b21]
676 ps_madds0 vd2, vb4, va3, vd2
677 // [a20*b00 + a21*b10 + a22*b20][a20*b01 + a21*b11 + a22*b21]
678 ps_madds0 vd4, vb4, va5, vd4
679
680 // [b12][b13]
681 psq_l vb3, 24(srcBase), 0, 0
682 // [a00*b00 + a01*b10 + a02*b20][a00*b01 + a01*b11 + a02*b21]
683 psq_st vd0, 0(dstBase), 0, 0
684
685 // [a00*b02][a00*b03]
686 ps_muls0 vd1, vb1, va0
687 // [a10*b02][a10*b03]
688 ps_muls0 vd3, vb1, va2
689 // [a20*b02][a20*b03]
690 ps_muls0 vd5, vb1, va4
691
692 // [b22][b23]
693 psq_l vb5, 40(srcBase), 0, 0
694 // [a10*b00 + a11*b10 + a12*b20][a10*b01 + a11*b11 + a12*b21]
695 psq_st vd2, 16(dstBase), 0, 0
696
697 // [a00*b02 + a01*b12][a00*b03 + a01*b13]
698 ps_madds1 vd1, vb3, va0, vd1
699 // [a10*b02 + a11*b12][a10*b03 + a11*b13]
700 ps_madds1 vd3, vb3, va2, vd3
701 // [a20*b02 + a21*b12][a20*b03 + a21*b13]
702 ps_madds1 vd5, vb3, va4, vd5
703
704 _loop:
705
706 // ++srcBase
707 addi srcBase, srcBase, sizeof(Mtx)
708
709 // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23]
710 ps_madds0 vd1, vb5, va1, vd1
711 // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23]
712 ps_madds0 vd3, vb5, va3, vd3
713 // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23]
714 ps_madds0 vd5, vb5, va5, vd5
715
716 // [b00][b01]
717 psq_l vb0, 0(srcBase), 0, 0
718 // [a20*b00 + a21*b10 + a22*b20][a20*b01 + a21*b11 + a22*b21]
719 psq_st vd4, 32(dstBase), 0, 0
720
721 // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23 + a03]
722 ps_madd vd1, u01, va1, vd1
723 // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23 + a13]
724 ps_madd vd3, u01, va3, vd3
725 // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23 + a23]
726 ps_madd vd5, u01, va5, vd5
727
728 // [b10][b11]
729 psq_l vb2, 16(srcBase), 0, 0
730 // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23 + a03]
731 psq_st vd1, 8(dstBase), 0, 0
732
733 // [a00*b00][a00*b01]
734 ps_muls0 vd0, vb0, va0
735 // [a10*b00][a10*b01]
736 ps_muls0 vd2, vb0, va2
737 // [a20*b00][a20*b01]
738 ps_muls0 vd4, vb0, va4
739
740 // [b20][b21]
741 psq_l vb4, 32(srcBase), 0, 0
742 // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23 + a13]
743 psq_st vd3, 24(dstBase), 0, 0
744
745 // [a00*b00 + a01*b10][a00*b01 + a01*b11]
746 ps_madds1 vd0, vb2, va0, vd0
747 // [a10*b00 + a11*b10][a10*b01 + a11*b11]
748 ps_madds1 vd2, vb2, va2, vd2
749 // [a20*b00 + a21*b10][a20*b01 + a21*b11]
750 ps_madds1 vd4, vb2, va4, vd4
751
752 // [b02][b03]
753 psq_l vb1, 8(srcBase), 0, 0
754 // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23 + a23]
755 psq_st vd5, 40(dstBase), 0, 0
756 // ++dstBase
757 addi dstBase, dstBase, sizeof(Mtx)
758
759 // [a00*b00 + a01*b10 + a02*b20][a00*b01 + a01*b11 + a02*b21]
760 ps_madds0 vd0, vb4, va1, vd0
761 // [a10*b00 + a11*b10 + a12*b20][a10*b01 + a11*b11 + a12*b21]
762 ps_madds0 vd2, vb4, va3, vd2
763 // [a20*b00 + a21*b10 + a22*b20][a20*b01 + a21*b11 + a22*b21]
764 ps_madds0 vd4, vb4, va5, vd4
765
766 // [b12][b13]
767 psq_l vb3, 24(srcBase), 0, 0
768 // [a00*b00 + a01*b10 + a02*b20][a00*b01 + a01*b11 + a02*b21]
769 psq_st vd0, 0(dstBase), 0, 0
770
771 // [a00*b02][a00*b03]
772 ps_muls0 vd1, vb1, va0
773 // [a10*b02][a10*b03]
774 ps_muls0 vd3, vb1, va2
775 // [a20*b02][a20*b03]
776 ps_muls0 vd5, vb1, va4
777
778 // [b22][b23]
779 psq_l vb5, 40(srcBase), 0, 0
780 // [a10*b00 + a11*b10 + a12*b20][a10*b01 + a11*b11 + a12*b21]
781 psq_st vd2, 16(dstBase), 0, 0
782
783 // [a00*b02 + a01*b12][a00*b03 + a01*b13]
784 ps_madds1 vd1, vb3, va0, vd1
785 // [a10*b02 + a11*b12][a10*b03 + a11*b13]
786 ps_madds1 vd3, vb3, va2, vd3
787 // [a20*b02 + a21*b12][a20*b03 + a21*b13]
788 ps_madds1 vd5, vb3, va4, vd5
789
790 // LOOP
791 bdnz _loop
792
793 // [a20*b00 + a21*b10 + a22*b20][a20*b01 + a21*b11 + a22*b21]
794 psq_st vd4, 32(dstBase), 0, 0
795
796 // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23]
797 ps_madds0 vd1, vb5, va1, vd1
798 // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23]
799 ps_madds0 vd3, vb5, va3, vd3
800 // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23]
801 ps_madds0 vd5, vb5, va5, vd5
802
803 // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23 + a03]
804 ps_madd vd1, u01, va1, vd1
805 // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23 + a13]
806 ps_madd vd3, u01, va3, vd3
807 // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23 + a23]
808 ps_madd vd5, u01, va5, vd5
809
810 // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23 + a03]
811 psq_st vd1, 8(dstBase), 0, 0
812 // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23 + a13]
813 psq_st vd3, 24(dstBase), 0, 0
814 // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23 + a23]
815 psq_st vd5, 40(dstBase), 0, 0
816 //---------------------------------
817 }
818 }
819
820 #if ( defined(__MWERKS__) && defined(_DEBUG) )
821 #pragma optimization_level 0
822 #pragma global_optimizer reset
823 #endif
824
825 #endif
826
827 /*---------------------------------------------------------------------*
828
829 Name: MTXTranspose
830
831 Description: computes the transpose of a matrix.
832 As matrices are 3x4, fourth column (translation component) is
833 lost and becomes (0,0,0).
834
835 This function is intended for use in computing an
836 inverse-transpose matrix to transform normals for lighting.
837 In this case, lost translation component doesn't matter.
838
839
840 Arguments: src source matrix.
841 xPose destination (transposed) matrix.
842 ok if src == xPose.
843
844
845
846 Return : none
847
848 *---------------------------------------------------------------------*/
849 /*---------------------------------------------------------------------*
850 C version
851 *---------------------------------------------------------------------*/
C_MTXTranspose(const Mtx src,Mtx xPose)852 void C_MTXTranspose ( const Mtx src, Mtx xPose )
853 {
854 Mtx mTmp;
855 MtxPtr m;
856
857
858 ASSERTMSG( (src != 0), MTX_TRANSPOSE_1 );
859 ASSERTMSG( (xPose != 0), MTX_TRANSPOSE_2 );
860
861
862 if(src == xPose)
863 {
864 m = mTmp;
865 }
866 else
867 {
868 m = xPose;
869 }
870
871
872 m[0][0] = src[0][0]; m[0][1] = src[1][0]; m[0][2] = src[2][0]; m[0][3] = 0.0f;
873 m[1][0] = src[0][1]; m[1][1] = src[1][1]; m[1][2] = src[2][1]; m[1][3] = 0.0f;
874 m[2][0] = src[0][2]; m[2][1] = src[1][2]; m[2][2] = src[2][2]; m[2][3] = 0.0f;
875
876
877 // copy back if needed
878 if( m == mTmp )
879 {
880 C_MTXCopy( mTmp, xPose );
881 }
882 }
883
884 /*---------------------------------------------------------------------*
885 Paired-Single assembler version
886 *---------------------------------------------------------------------*
887 Note that this performs NO error checking.
888 *---------------------------------------------------------------------*/
889 #ifdef GEKKO
PSMTXTranspose(const register Mtx src,register Mtx xPose)890 void PSMTXTranspose ( const register Mtx src, register Mtx xPose )
891 {
892 register f32 c_zero = 0.0F;
893 register f32 row0a, row1a, row0b, row1b;
894 register f32 trns0, trns1, trns2;
895
896 asm
897 {
898 psq_l row0a, 0(src), 0, 0 // [0][0], [0][1]
899 stfs c_zero, 44(xPose) // 0 -> [2][3]
900 psq_l row1a, 16(src), 0, 0 // [1][0], [1][1]
901 ps_merge00 trns0, row0a, row1a // [0][0], [1][0]
902 psq_l row0b, 8(src), 1, 0 // [0][2], 1
903 ps_merge11 trns1, row0a, row1a // [0][1], [1][1]
904 psq_l row1b, 24(src), 1, 0 // [1][2], 1
905 psq_st trns0, 0(xPose), 0, 0 // [0][0], [1][0] -> [0][0], [0][1]
906 psq_l row0a, 32(src), 0, 0 // [2][0], [2][1]
907 ps_merge00 trns2, row0b, row1b // [0][2], [1][2]
908 psq_st trns1, 16(xPose), 0, 0 // [0][1], [1][1] -> [1][0], [1][1]
909 ps_merge00 trns0, row0a, c_zero // [2][0], 0
910 psq_st trns2, 32(xPose), 0, 0 // [0][2], [1][2] -> [2][0], [2][1]
911 ps_merge10 trns1, row0a, c_zero // [2][1], 0
912 psq_st trns0, 8(xPose), 0, 0 // [2][0], 0 -> [0][2], [0][3]
913 lfs row0b, 40(src) // [2][2]
914 psq_st trns1, 24(xPose), 0, 0 // [2][1], 0 -> [1][2], [1][3]
915 stfs row0b, 40(xPose) // [2][2] -> [2][2]
916 }
917 }
918 #endif // GEKKO
919
920 /*---------------------------------------------------------------------*
921
922 Name: MTXInverse
923
924 Description: computes a fast inverse of a matrix.
925 this algorithm works for matrices with a fourth row of
926 (0,0,0,1).
927
928 for a matrix
929 M = | A C | where A is the upper 3x3 submatrix,
930 | 0 1 | C is a 1x3 column vector
931
932
933 INV(M) = | inv(A) (inv(A))*(-C) |
934 | 0 1 |
935
936
937
938 Arguments: src source matrix.
939 inv destination (inverse) matrix.
940 ok if src == inv.
941
942
943
944 Return : 0 if src is not invertible.
945 1 on success.
946
947 *---------------------------------------------------------------------*/
948 /*---------------------------------------------------------------------*
949 C version
950 *---------------------------------------------------------------------*/
C_MTXInverse(const Mtx src,Mtx inv)951 u32 C_MTXInverse ( const Mtx src, Mtx inv )
952 {
953 Mtx mTmp;
954 MtxPtr m;
955 f32 det;
956
957 ASSERTMSG( (src != 0), MTX_INVERSE_1 );
958 ASSERTMSG( (inv != 0), MTX_INVERSE_2 );
959
960 if( src == inv )
961 {
962 m = mTmp;
963 }
964 else
965 {
966 m = inv;
967 }
968
969
970 // compute the determinant of the upper 3x3 submatrix
971 det = src[0][0]*src[1][1]*src[2][2] + src[0][1]*src[1][2]*src[2][0] + src[0][2]*src[1][0]*src[2][1]
972 - src[2][0]*src[1][1]*src[0][2] - src[1][0]*src[0][1]*src[2][2] - src[0][0]*src[2][1]*src[1][2];
973
974
975 // check if matrix is singular
976 if( det == 0.0f )
977 {
978 return 0;
979 }
980
981
982 // compute the inverse of the upper submatrix:
983
984 // find the transposed matrix of cofactors of the upper submatrix
985 // and multiply by (1/det)
986
987 det = 1.0f / det;
988
989
990 m[0][0] = (src[1][1]*src[2][2] - src[2][1]*src[1][2]) * det;
991 m[0][1] = -(src[0][1]*src[2][2] - src[2][1]*src[0][2]) * det;
992 m[0][2] = (src[0][1]*src[1][2] - src[1][1]*src[0][2]) * det;
993
994 m[1][0] = -(src[1][0]*src[2][2] - src[2][0]*src[1][2]) * det;
995 m[1][1] = (src[0][0]*src[2][2] - src[2][0]*src[0][2]) * det;
996 m[1][2] = -(src[0][0]*src[1][2] - src[1][0]*src[0][2]) * det;
997
998 m[2][0] = (src[1][0]*src[2][1] - src[2][0]*src[1][1]) * det;
999 m[2][1] = -(src[0][0]*src[2][1] - src[2][0]*src[0][1]) * det;
1000 m[2][2] = (src[0][0]*src[1][1] - src[1][0]*src[0][1]) * det;
1001
1002
1003 // compute (invA)*(-C)
1004 m[0][3] = -m[0][0]*src[0][3] - m[0][1]*src[1][3] - m[0][2]*src[2][3];
1005 m[1][3] = -m[1][0]*src[0][3] - m[1][1]*src[1][3] - m[1][2]*src[2][3];
1006 m[2][3] = -m[2][0]*src[0][3] - m[2][1]*src[1][3] - m[2][2]*src[2][3];
1007
1008 // copy back if needed
1009 if( m == mTmp )
1010 {
1011 C_MTXCopy( mTmp,inv );
1012 }
1013
1014 return 1;
1015 }
1016
1017 /*---------------------------------------------------------------------*
1018 Paired-Single assembler version
1019 *---------------------------------------------------------------------*
1020 Note that this performs NO error checking.
1021 Results may be a little bit different from the C version
1022 because it doesn't perform exactly same calculation.
1023 *---------------------------------------------------------------------*/
1024 #ifdef GEKKO
PSMTXInverse(const register Mtx src,register Mtx inv)1025 asm u32 PSMTXInverse ( const register Mtx src, register Mtx inv )
1026 {
1027 nofralloc
1028
1029 // fp0 [ 00 ][ 1.0F ] : Load
1030 psq_l fp0, 0(src), 1, 0
1031 // fp1 [ 01 ][ 02 ] : Load
1032 psq_l fp1, 4(src), 0, 0
1033 // fp2 [ 10 ][ 1.0F ] : Load
1034 psq_l fp2, 16(src), 1, 0
1035 // fp6 [ 02 ][ 00 ]
1036 ps_merge10 fp6, fp1, fp0
1037 // fp3 [ 11 ][ 12 ] : Load
1038 psq_l fp3, 20(src), 0, 0
1039 // fp4 [ 20 ][ 1.0F ] : Load
1040 psq_l fp4, 32(src), 1, 0
1041 // fp7 [ 12 ][ 10 ]
1042 ps_merge10 fp7, fp3, fp2
1043 // fp5 [ 21 ][ 22 ] : Load
1044 psq_l fp5, 36(src), 0, 0
1045
1046 // fp11[ 11*02 ][ 00*12 ]
1047 ps_mul fp11, fp3, fp6
1048 // fp13[ 21*12 ][ 10*22 ]
1049 ps_mul fp13, fp5, fp7
1050 // fp8 [ 22 ][ 20 ]
1051 ps_merge10 fp8, fp5, fp4
1052 // fp11[ 01*12 - 11*02 ][ 10*02 - 00*12 ] -> free(fp7[ 12 ][ 10 ])
1053 ps_msub fp11, fp1, fp7, fp11
1054 // fp12[ 01*22 ][ 20*02 ]
1055 ps_mul fp12, fp1, fp8
1056 // fp13[ 11*22 - 21*12 ][ 20*12 - 10*22 ] -> free(fp8[ 22 ][ 20 ])
1057 ps_msub fp13, fp3, fp8, fp13
1058 // fp10[ 20*11 ][ N/A ]
1059 ps_mul fp10, fp3, fp4
1060 // fp12[ 21*02 - 01*22 ][ 00*22 - 20*02 ] -> free(fp6[ 02 ][ 00 ])
1061 ps_msub fp12, fp5, fp6, fp12
1062 // fp9 [ 00*21 ][ N/A ]
1063 ps_mul fp9, fp0, fp5
1064 // fp8 [ 10*01 ][ N/A ]
1065 ps_mul fp8, fp1, fp2
1066
1067 // fp6 [ 0.0F ][ 0.0F ]
1068 ps_sub fp6, fp6, fp6
1069 // fp10[ 10*21 - 20*11 ][ N/A ] -> free(fp5[ 21 ][ 22 ])
1070 ps_msub fp10, fp2, fp5, fp10
1071 // fp7 [ 00*(11*22-21*12) ][ N/A ]
1072 ps_mul fp7, fp0, fp13
1073 // fp9 [ 20*01 - 00*21 ][ N/A ]
1074 ps_msub fp9, fp1, fp4, fp9
1075 // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) ][ N/A ]
1076 ps_madd fp7, fp2, fp12, fp7
1077 // fp8 [ 00*11 - 10*01 ][ N/A ]
1078 ps_msub fp8, fp0, fp3, fp8
1079 // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) + 20*(01*12-11*02) ][ N/A ] : det
1080 ps_madd fp7, fp4, fp11, fp7
1081
1082 // ( det == 0 ) ?
1083 ps_cmpo0 cr0, fp7, fp6
1084 bne _regular
1085
1086 // return value (singular)
1087 addi r3, 0, 0
1088 blr
1089
1090 _regular:
1091
1092 // fp0 [ rdet = 1/det ][ N/A ]
1093 fres fp0, fp7
1094
1095 // Refinement : ( E = est. of 1/K ) -> ( E' = 2E - K*E*E )
1096 ps_add fp6, fp0, fp0
1097 ps_mul fp5, fp0, fp0
1098 ps_nmsub fp0, fp7, fp5, fp6
1099
1100
1101 // fp1 [ 03 ][ 03 ] : Load
1102 lfs fp1, 12(src)
1103 // fp13[ ( 11*22 - 21*12 ) * rdet ][ ( 20*12 - 10*22 ) * rdet ] : i[0][0], i[1][0]
1104 ps_muls0 fp13, fp13, fp0
1105 // fp2 [ 13 ][ 13 ] : Load
1106 lfs fp2, 28(src)
1107 // fp12[ ( 21*02 - 01*22 ) * rdet ][ ( 00*22 - 20*02 ) * rdet ] : i[0][1], i[1][1]
1108 ps_muls0 fp12, fp12, fp0
1109 // fp3 [ 23 ][ 23 ] : Load
1110 lfs fp3, 44(src)
1111 // fp11[ ( 01*12 - 11*02 ) * rdet ][ ( 10*02 - 00*12 ) * rdet ] : i[0][2], i[1][2]
1112 ps_muls0 fp11, fp11, fp0
1113 // fp5 [ i00 ][ i01 ]
1114 ps_merge00 fp5, fp13, fp12
1115 // fp10[ ( 10*21 - 20*11 ) * rdet ] : i[2][0]
1116 ps_muls0 fp10, fp10, fp0
1117 // fp4 [ i10 ][ i11 ]
1118 ps_merge11 fp4, fp13, fp12
1119 // fp9 [ ( 20*01 - 00*21 ) * rdet ] : i[2][1]
1120 ps_muls0 fp9, fp9, fp0
1121
1122 // [ i00 ][ i01 ] : Store fp5 -> free(fp5[ i00 ][ i01 ])
1123 psq_st fp5, 0(inv), 0, 0
1124 // fp6 [ i00*03 ][ i10*03 ]
1125 ps_mul fp6, fp13, fp1
1126 // [ i10 ][ i11 ] : Store fp4 -> free(fp4[ i10 ][ i11 ])
1127 psq_st fp4, 16(inv), 0, 0
1128 // fp8 [ ( 00*11 - 10*01 ) * rdet ] : i[2][2]
1129 ps_muls0 fp8, fp8, fp0
1130 // fp6 [ i00*03+i01*13 ][ i10*03+i11*13 ]
1131 ps_madd fp6, fp12, fp2, fp6
1132 // [ i20 ] : Store fp10
1133 psq_st fp10, 32(inv), 1, 0
1134 // fp6 [ -i00*03-i01*13-i02*23 ][ -i10*03-i11*13-i12*23 ] : i[0][3], i[1][3]
1135 ps_nmadd fp6, fp11, fp3, fp6
1136 // [ i21 ] : Store fp9
1137 psq_st fp9, 36(inv), 1, 0
1138 // fp7 [ i20*03 ][ N/A ]
1139 ps_mul fp7, fp10, fp1
1140 // fp5 [ i02 ][ i03 ]
1141 ps_merge00 fp5, fp11, fp6
1142 // [ i22 ] : Store fp8
1143 psq_st fp8, 40(inv), 1, 0
1144 // fp4 [ i12 ][ i13 ]
1145 ps_merge11 fp4, fp11, fp6
1146 // [ i02 ][ i03 ] : Store fp5
1147 psq_st fp5, 8(inv), 0, 0
1148 // fp7 [ i20*03+i21*13 ][ N/A ]
1149 ps_madd fp7, fp9, fp2, fp7
1150 // [ i12 ][ i13 ] : Store fp4
1151 psq_st fp4, 24(inv), 0, 0
1152 // fp7 [ -i20*03-i21*13-i22*23 ][ N/A ] : i[2][3]
1153 ps_nmadd fp7, fp8, fp3, fp7
1154 // return value (regular)
1155 addi r3, 0, 1
1156 // [ i23 ] : Store fp7
1157 psq_st fp7, 44(inv), 1, 0
1158
1159 blr
1160 }
1161 #endif // GEKKO
1162
1163 /*---------------------------------------------------------------------*
1164
1165 Name: MTXInvXpose
1166
1167 Description: computes a fast inverse-transpose of a matrix.
1168 this algorithm works for matrices with a fourth row of
1169 (0,0,0,1). Commonly used for calculating normal transform
1170 matrices.
1171
1172 This function is equivalent to the combination of
1173 two functions MTXInverse + MTXTranspose.
1174
1175 Arguments: src source matrix.
1176 invx destination (inverse-transpose) matrix.
1177 ok if src == invx.
1178
1179 Return : 0 if src is not invertible.
1180 1 on success.
1181
1182 *---------------------------------------------------------------------*/
1183 /*---------------------------------------------------------------------*
1184 C version
1185 *---------------------------------------------------------------------*/
C_MTXInvXpose(const Mtx src,Mtx invX)1186 u32 C_MTXInvXpose ( const Mtx src, Mtx invX )
1187 {
1188 Mtx mTmp;
1189 MtxPtr m;
1190 f32 det;
1191
1192 ASSERTMSG( (src != 0), MTX_INVXPOSE_1 );
1193 ASSERTMSG( (invX != 0), MTX_INVXPOSE_2 );
1194
1195 if( src == invX )
1196 {
1197 m = mTmp;
1198 }
1199 else
1200 {
1201 m = invX;
1202 }
1203
1204 // compute the determinant of the upper 3x3 submatrix
1205 det = src[0][0]*src[1][1]*src[2][2] + src[0][1]*src[1][2]*src[2][0] + src[0][2]*src[1][0]*src[2][1]
1206 - src[2][0]*src[1][1]*src[0][2] - src[1][0]*src[0][1]*src[2][2] - src[0][0]*src[2][1]*src[1][2];
1207
1208 // check if matrix is singular
1209 if( det == 0.0f )
1210 {
1211 return 0;
1212 }
1213
1214 // compute the inverse-transpose of the upper submatrix:
1215
1216 // find the transposed matrix of cofactors of the upper submatrix
1217 // and multiply by (1/det)
1218
1219 det = 1.0f / det;
1220
1221 m[0][0] = (src[1][1]*src[2][2] - src[2][1]*src[1][2]) * det;
1222 m[0][1] = -(src[1][0]*src[2][2] - src[2][0]*src[1][2]) * det;
1223 m[0][2] = (src[1][0]*src[2][1] - src[2][0]*src[1][1]) * det;
1224
1225 m[1][0] = -(src[0][1]*src[2][2] - src[2][1]*src[0][2]) * det;
1226 m[1][1] = (src[0][0]*src[2][2] - src[2][0]*src[0][2]) * det;
1227 m[1][2] = -(src[0][0]*src[2][1] - src[2][0]*src[0][1]) * det;
1228
1229 m[2][0] = (src[0][1]*src[1][2] - src[1][1]*src[0][2]) * det;
1230 m[2][1] = -(src[0][0]*src[1][2] - src[1][0]*src[0][2]) * det;
1231 m[2][2] = (src[0][0]*src[1][1] - src[1][0]*src[0][1]) * det;
1232
1233
1234 // the fourth columns should be all zero
1235 m[0][3] = 0.0F;
1236 m[1][3] = 0.0F;
1237 m[2][3] = 0.0F;
1238
1239 // copy back if needed
1240 if( m == mTmp )
1241 {
1242 C_MTXCopy( mTmp,invX );
1243 }
1244
1245 return 1;
1246 }
1247
1248 /*---------------------------------------------------------------------*
1249 Paired-Single assembler version
1250 *---------------------------------------------------------------------*
1251 Note that this performs NO error checking.
1252 Results may be a little bit different from the C version
1253 because it doesn't perform exactly same calculation.
1254 *---------------------------------------------------------------------*/
1255 #ifdef GEKKO
PSMTXInvXpose(const register Mtx src,register Mtx invX)1256 asm u32 PSMTXInvXpose ( const register Mtx src, register Mtx invX )
1257 {
1258 nofralloc
1259
1260 // fp0 [ 00 ][ 1.0F ] : Load
1261 psq_l fp0, 0(src), 1, 0
1262 // fp1 [ 01 ][ 02 ] : Load
1263 psq_l fp1, 4(src), 0, 0
1264 // fp2 [ 10 ][ 1.0F ] : Load
1265 psq_l fp2, 16(src), 1, 0
1266 // fp6 [ 02 ][ 00 ]
1267 ps_merge10 fp6, fp1, fp0
1268 // fp3 [ 11 ][ 12 ] : Load
1269 psq_l fp3, 20(src), 0, 0
1270 // fp4 [ 20 ][ 1.0F ] : Load
1271 psq_l fp4, 32(src), 1, 0
1272 // fp7 [ 12 ][ 10 ]
1273 ps_merge10 fp7, fp3, fp2
1274 // fp5 [ 21 ][ 22 ] : Load
1275 psq_l fp5, 36(src), 0, 0
1276
1277 // fp11[ 11*02 ][ 00*12 ]
1278 ps_mul fp11, fp3, fp6
1279
1280 // fp8 [ 22 ][ 20 ]
1281 ps_merge10 fp8, fp5, fp4
1282
1283 // fp13[ 21*12 ][ 10*22 ]
1284 ps_mul fp13, fp5, fp7
1285 // fp11[ 01*12 - 11*02 ][ 10*02 - 00*12 ]
1286 ps_msub fp11, fp1, fp7, fp11
1287 // fp12[ 01*22 ][ 20*02 ]
1288 ps_mul fp12, fp1, fp8
1289 // fp13[ 11*22 - 21*12 ][ 20*12 - 10*22 ]
1290 ps_msub fp13, fp3, fp8, fp13
1291 // fp12[ 21*02 - 01*22 ][ 00*22 - 20*02 ]
1292 ps_msub fp12, fp5, fp6, fp12
1293
1294 // fp10[ 20*11 ][ N/A ]
1295 ps_mul fp10, fp3, fp4
1296 // fp9 [ 00*21 ][ N/A ]
1297 ps_mul fp9, fp0, fp5
1298 // fp8 [ 10*01 ][ N/A ]
1299 ps_mul fp8, fp1, fp2
1300 // fp10[ 10*21 - 20*11 ][ N/A ]
1301 ps_msub fp10, fp2, fp5, fp10
1302 // fp9 [ 20*01 - 00*21 ][ N/A ]
1303 ps_msub fp9, fp1, fp4, fp9
1304 // fp8 [ 00*11 - 10*01 ][ N/A ]
1305 ps_msub fp8, fp0, fp3, fp8
1306
1307 // fp7 [ 00*(11*22-21*12) ][ N/A ]
1308 ps_mul fp7, fp0, fp13
1309 // fp1 [ 0.0F ][ 0.0F ]
1310 ps_sub fp1, fp1, fp1
1311 // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) ][ N/A ]
1312 ps_madd fp7, fp2, fp12, fp7
1313 // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) + 20*(01*12-11*02) ][ N/A ] : det
1314 ps_madd fp7, fp4, fp11, fp7
1315
1316 // ( det == 0 ) ?
1317 ps_cmpo0 cr0, fp7, fp1
1318 bne _regular
1319
1320 // return value (singular)
1321 addi r3, 0, 0
1322 blr
1323
1324 _regular:
1325
1326 // fp0 [ 1/det ][ N/A ]
1327 fres fp0, fp7
1328
1329 // [ ix03 ] : Store fp1
1330 psq_st fp1, 12(invX), 1, 0
1331
1332 // Refinement : ( E = est. of 1/N ) -> ( E' = 2E - N*E*E )
1333 ps_add fp6, fp0, fp0
1334 ps_mul fp5, fp0, fp0
1335
1336 // [ ix13 ] : Store fp1
1337 psq_st fp1, 28(invX), 1, 0
1338
1339 ps_nmsub fp0, fp7, fp5, fp6
1340
1341 // [ ix23 ] : Store fp1
1342 psq_st fp1, 44(invX), 1, 0
1343
1344 // fp13[ ( 11*22 - 21*12 ) * rdet ][ ( 20*12 - 10*22 ) * rdet ] : ix[0][0], ix[0][1]
1345 ps_muls0 fp13, fp13, fp0
1346 // fp12[ ( 21*02 - 01*22 ) * rdet ][ ( 00*22 - 20*02 ) * rdet ] : ix[1][0], ix[1][1]
1347 ps_muls0 fp12, fp12, fp0
1348 // fp11[ ( 01*12 - 11*02 ) * rdet ][ ( 10*02 - 00*12 ) * rdet ] : ix[2][0], ix[2][1]
1349 ps_muls0 fp11, fp11, fp0
1350
1351 // [ ix00 ][ ix01 ] : Store fp13
1352 psq_st fp13, 0(invX), 0, 0
1353 // [ ix10 ][ ix11 ] : Store fp12
1354 psq_st fp12, 16(invX), 0, 0
1355
1356 // fp10[ ( 10*21 - 20*11 ) * rdet ] : i[0][2]
1357 ps_muls0 fp10, fp10, fp0
1358
1359 // fp9 [ ( 20*01 - 00*21 ) * rdet ] : i[1][2]
1360 ps_muls0 fp9, fp9, fp0
1361
1362 // [ ix20 ][ ix21 ] : Store fp11
1363 psq_st fp11, 32(invX), 0, 0
1364 // [ ix02 ] : Store fp10
1365 psq_st fp10, 8(invX), 1, 0
1366
1367 // fp8 [ ( 00*11 - 10*01 ) * rdet ] : i[2][2]
1368 ps_muls0 fp8, fp8, fp0
1369
1370 // return value (regular)
1371 addi r3, 0, 1
1372
1373 // [ ix12 ] : Store fp9
1374 psq_st fp9, 24(invX), 1, 0
1375 // [ ix22 ] : Store fp8
1376 psq_st fp8, 40(invX), 1, 0
1377
1378 blr
1379 }
1380 #endif
1381
1382 /*---------------------------------------------------------------------*
1383
1384
1385
1386
1387
1388 MODEL SECTION
1389
1390
1391
1392
1393
1394 *---------------------------------------------------------------------*/
1395
1396
1397
1398
1399
1400 /*---------------------------------------------------------------------*
1401
1402 Name: MTXRotDeg
1403
1404 Description: sets a rotation matrix about one of the X, Y or Z axes
1405
1406 Arguments: m matrix to be set
1407
1408 axis major axis about which to rotate.
1409 axis is passed in as a character.
1410 it must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1411
1412 deg rotation angle in degrees.
1413
1414 note: counter-clockwise rotation is positive.
1415
1416 Return : none
1417
1418 *---------------------------------------------------------------------*/
1419 // The function is defined as a macro in mtx.h
1420 /*
1421 void MTXRotDeg ( Mtx m, char axis, f32 deg )
1422 {
1423 MTXRotRad( m, axis, MTXDegToRad( deg ) );
1424 }
1425 */
1426
1427 /*---------------------------------------------------------------------*
1428
1429 Name: MTXRotRad
1430
1431 Description: sets a rotation matrix about one of the X, Y or Z axes
1432
1433 Arguments: m matrix to be set
1434
1435 axis major axis about which to rotate.
1436 axis is passed in as a character.
1437 it must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1438
1439 deg rotation angle in radians.
1440
1441 note: counter-clockwise rotation is positive.
1442
1443 Return : none
1444
1445 *---------------------------------------------------------------------*/
1446 /*---------------------------------------------------------------------*
1447 C version
1448 *---------------------------------------------------------------------*/
C_MTXRotRad(Mtx m,char axis,f32 rad)1449 void C_MTXRotRad ( Mtx m, char axis, f32 rad )
1450 {
1451
1452 f32 sinA, cosA;
1453
1454 ASSERTMSG( (m != 0), MTX_ROTRAD_1 );
1455
1456 // verification of "axis" will occur in MTXRotTrig
1457
1458 sinA = sinf(rad);
1459 cosA = cosf(rad);
1460
1461 C_MTXRotTrig( m, axis, sinA, cosA );
1462 }
1463
1464 /*---------------------------------------------------------------------*
1465 Paired-Single assembler version
1466 *---------------------------------------------------------------------*
1467 Note that this performs NO error checking.
1468 *---------------------------------------------------------------------*/
1469 #ifdef GEKKO
PSMTXRotRad(Mtx m,char axis,f32 rad)1470 void PSMTXRotRad ( Mtx m, char axis, f32 rad )
1471 {
1472 f32 sinA, cosA;
1473
1474 sinA = sinf(rad);
1475 cosA = cosf(rad);
1476
1477 PSMTXRotTrig( m, axis, sinA, cosA );
1478 }
1479 #endif // GEKKO
1480
1481 /*---------------------------------------------------------------------*
1482
1483 Name: MTXRotTrig
1484
1485 Description: sets a rotation matrix about one of the X, Y or Z axes
1486 from specified trig ratios
1487
1488 Arguments: m matrix to be set
1489
1490 axis major axis about which to rotate.
1491 axis is passed in as a character.
1492 It must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1493
1494 sinA sine of rotation angle.
1495
1496 cosA cosine of rotation angle.
1497
1498 note: counter-clockwise rotation is positive.
1499
1500 Return : none
1501
1502 *---------------------------------------------------------------------*/
1503 /*---------------------------------------------------------------------*
1504 C version
1505 *---------------------------------------------------------------------*/
C_MTXRotTrig(Mtx m,char axis,f32 sinA,f32 cosA)1506 void C_MTXRotTrig ( Mtx m, char axis, f32 sinA, f32 cosA )
1507 {
1508
1509 ASSERTMSG( (m != 0), MTX_ROTTRIG_1 );
1510
1511 switch(axis)
1512 {
1513
1514 case 'x':
1515 case 'X':
1516 m[0][0] = 1.0f; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = 0.0f;
1517 m[1][0] = 0.0f; m[1][1] = cosA; m[1][2] = -sinA; m[1][3] = 0.0f;
1518 m[2][0] = 0.0f; m[2][1] = sinA; m[2][2] = cosA; m[2][3] = 0.0f;
1519 break;
1520
1521 case 'y':
1522 case 'Y':
1523 m[0][0] = cosA; m[0][1] = 0.0f; m[0][2] = sinA; m[0][3] = 0.0f;
1524 m[1][0] = 0.0f; m[1][1] = 1.0f; m[1][2] = 0.0f; m[1][3] = 0.0f;
1525 m[2][0] = -sinA; m[2][1] = 0.0f; m[2][2] = cosA; m[2][3] = 0.0f;
1526 break;
1527
1528 case 'z':
1529 case 'Z':
1530 m[0][0] = cosA; m[0][1] = -sinA; m[0][2] = 0.0f; m[0][3] = 0.0f;
1531 m[1][0] = sinA; m[1][1] = cosA; m[1][2] = 0.0f; m[1][3] = 0.0f;
1532 m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = 1.0f; m[2][3] = 0.0f;
1533 break;
1534
1535 default:
1536 ASSERTMSG( 0, MTX_ROTTRIG_2 );
1537 break;
1538
1539 }
1540
1541 }
1542
1543 /*---------------------------------------------------------------------*
1544 Paired-Single assembler version
1545 *---------------------------------------------------------------------*
1546 Note that this performs NO error checking.
1547 *---------------------------------------------------------------------*/
1548 #ifdef GEKKO
PSMTXRotTrig(register Mtx m,register char axis,register f32 sinA,register f32 cosA)1549 void PSMTXRotTrig (
1550 register Mtx m,
1551 register char axis,
1552 register f32 sinA,
1553 register f32 cosA )
1554 {
1555 register f32 fc0, fc1, nsinA;
1556 register f32 fw0, fw1, fw2, fw3;
1557
1558 asm
1559 {
1560 frsp sinA, sinA // to make sure sinA = single precision
1561 frsp cosA, cosA // to make sure cosA = single precision
1562 }
1563
1564 fc0 = 0.0F;
1565 fc1 = 1.0F;
1566
1567 asm
1568 {
1569 // always lower case
1570 ori axis, axis, 0x20
1571 ps_neg nsinA, sinA
1572
1573 // branches
1574 cmplwi axis, 'x'
1575 beq _case_x
1576 cmplwi axis, 'y'
1577 beq _case_y
1578 cmplwi axis, 'z'
1579 beq _case_z
1580 b _end
1581
1582 _case_x:
1583 psq_st fc1, 0(m), 1, 0
1584 psq_st fc0, 4(m), 0, 0
1585 ps_merge00 fw0, sinA, cosA
1586 psq_st fc0, 12(m), 0, 0
1587 ps_merge00 fw1, cosA, nsinA
1588 psq_st fc0, 28(m), 0, 0
1589 psq_st fc0, 44(m), 1, 0
1590 psq_st fw0, 36(m), 0, 0
1591 psq_st fw1, 20(m), 0, 0
1592 b _end;
1593
1594 _case_y:
1595 ps_merge00 fw0, cosA, fc0
1596 ps_merge00 fw1, fc0, fc1
1597 psq_st fc0, 24(m), 0, 0
1598 psq_st fw0, 0(m), 0, 0
1599 ps_merge00 fw2, nsinA, fc0
1600 ps_merge00 fw3, sinA, fc0
1601 psq_st fw0, 40(m), 0, 0;
1602 psq_st fw1, 16(m), 0, 0;
1603 psq_st fw3, 8(m), 0, 0;
1604 psq_st fw2, 32(m), 0, 0;
1605 b _end;
1606
1607 _case_z:
1608 psq_st fc0, 8(m), 0, 0
1609 ps_merge00 fw0, sinA, cosA
1610 ps_merge00 fw2, cosA, nsinA
1611 psq_st fc0, 24(m), 0, 0
1612 psq_st fc0, 32(m), 0, 0
1613 ps_merge00 fw1, fc1, fc0
1614 psq_st fw0, 16(m), 0, 0
1615 psq_st fw2, 0(m), 0, 0
1616 psq_st fw1, 40(m), 0, 0
1617
1618 _end:
1619
1620 }
1621 }
1622 #endif // GEKKO
1623
1624 /*---------------------------------------------------------------------*
1625
1626 Name: MTXRotAxisDeg
1627
1628 Description: Sets a rotation matrix about an arbitrary axis
1629
1630
1631 Arguments: m matrix to be set
1632
1633 axis ptr to a vector containing the x,y,z axis
1634 components.
1635 axis does not have to be a unit vector.
1636
1637 deg rotation angle in degrees.
1638
1639 note: counter-clockwise rotation is positive.
1640
1641 Return : none
1642
1643 *---------------------------------------------------------------------*/
1644 // The function is defined as a macro in mtx.h
1645 /*
1646 void MTXRotAxisDeg ( Mtx m, Vec *axis, f32 deg )
1647 {
1648 MTXRotAxisRad( m, axis, MTXDegToRad( deg ) );
1649 }
1650 */
1651
1652 /*---------------------------------------------------------------------*
1653
1654 Name: MTXRotAxisRad
1655
1656 Description: Sets a rotation matrix about an arbitrary axis
1657
1658
1659 Arguments: m matrix to be set
1660
1661 axis ptr to a vector containing the x,y,z axis
1662 components.
1663 axis does not have to be a unit vector.
1664
1665 deg rotation angle in radians.
1666
1667 note: counter-clockwise rotation is positive.
1668
1669 Return : none
1670
1671 *---------------------------------------------------------------------*/
1672 /*---------------------------------------------------------------------*
1673 C version
1674 *---------------------------------------------------------------------*/
C_MTXRotAxisRad(Mtx m,const Vec * axis,f32 rad)1675 void C_MTXRotAxisRad( Mtx m, const Vec *axis, f32 rad )
1676 {
1677 Vec vN;
1678 f32 s, c; // sinTheta, cosTheta
1679 f32 t; // ( 1 - cosTheta )
1680 f32 x, y, z; // x, y, z components of normalized axis
1681 f32 xSq, ySq, zSq; // x, y, z squared
1682
1683
1684 ASSERTMSG( (m != 0), MTX_ROTAXIS_1 );
1685 ASSERTMSG( (axis != 0), MTX_ROTAXIS_2 );
1686
1687
1688 s = sinf(rad);
1689 c = cosf(rad);
1690 t = 1.0f - c;
1691
1692 C_VECNormalize( axis, &vN );
1693
1694 x = vN.x;
1695 y = vN.y;
1696 z = vN.z;
1697
1698 xSq = x * x;
1699 ySq = y * y;
1700 zSq = z * z;
1701
1702 m[0][0] = ( t * xSq ) + ( c );
1703 m[0][1] = ( t * x * y ) - ( s * z );
1704 m[0][2] = ( t * x * z ) + ( s * y );
1705 m[0][3] = 0.0f;
1706
1707 m[1][0] = ( t * x * y ) + ( s * z );
1708 m[1][1] = ( t * ySq ) + ( c );
1709 m[1][2] = ( t * y * z ) - ( s * x );
1710 m[1][3] = 0.0f;
1711
1712 m[2][0] = ( t * x * z ) - ( s * y );
1713 m[2][1] = ( t * y * z ) + ( s * x );
1714 m[2][2] = ( t * zSq ) + ( c );
1715 m[2][3] = 0.0f;
1716
1717 }
1718
1719 /*---------------------------------------------------------------------*
1720 Paired-Single assembler version
1721 *---------------------------------------------------------------------*
1722 Note that this performs NO error checking.
1723 *---------------------------------------------------------------------*/
1724 #ifdef GEKKO
1725
__PSMTXRotAxisRadInternal(register Mtx m,const register Vec * axis,register f32 sT,register f32 cT)1726 static void __PSMTXRotAxisRadInternal(
1727 register Mtx m,
1728 const register Vec *axis,
1729 register f32 sT,
1730 register f32 cT )
1731 {
1732 register f32 tT, fc0;
1733 register f32 tmp0, tmp1, tmp2, tmp3, tmp4;
1734 register f32 tmp5, tmp6, tmp7, tmp8, tmp9;
1735
1736 tmp9 = 0.5F;
1737 tmp8 = 3.0F;
1738
1739 asm
1740 {
1741 // to make sure cT = (single precision float value)
1742 frsp cT, cT
1743 // tmp0 = [x][y] : LOAD
1744 psq_l tmp0, 0(axis), 0, 0
1745 // to make sure sT = (single precision float value)
1746 frsp sT, sT
1747 // tmp1 = [z][z] : LOAD
1748 lfs tmp1, 8(axis)
1749
1750 // tmp2 = [x*x][y*y]
1751 ps_mul tmp2, tmp0, tmp0
1752 // tmp7 = [1.0F]
1753 fadds tmp7, tmp9, tmp9
1754 // tmp3 = [x*x+z*z][y*y+z*z]
1755 ps_madd tmp3, tmp1, tmp1, tmp2
1756 // fc0 = [0.0F]
1757 fsubs fc0, tmp9, tmp9
1758 // tmp4 = [S = x*x+y*y+z*z][z]
1759 ps_sum0 tmp4, tmp3, tmp1, tmp2
1760
1761 // tT = 1.0F - cT
1762 fsubs tT, tmp7, cT
1763
1764 // tmp5 = [1.0/sqrt(S)] :estimation[E]
1765 frsqrte tmp5, tmp4
1766 // Newton-Rapson refinement step
1767 // E' = E/2(3.0 - E*E*S)
1768 fmuls tmp2, tmp5, tmp5 // E*E
1769 fmuls tmp3, tmp5, tmp9 // E/2
1770 fnmsubs tmp2, tmp2, tmp4, tmp8 // (3-E*E*S)
1771 fmuls tmp5, tmp2, tmp3 // (E/2)(3-E*E*S)
1772
1773 // cT = [c][c]
1774 ps_merge00 cT, cT, cT
1775
1776 // tmp0 = [nx = x/sqrt(S)][ny = y/sqrt(S)]
1777 ps_muls0 tmp0, tmp0, tmp5
1778 // tmp1 = [nz = z/sqrt(S)][nz = z/sqrt(S)]
1779 ps_muls0 tmp1, tmp1, tmp5
1780
1781 // tmp4 = [t*nx][t*ny]
1782 ps_muls0 tmp4, tmp0, tT
1783 // tmp9 = [s*nx][s*ny]
1784 ps_muls0 tmp9, tmp0, sT
1785 // tmp5 = [t*nz][t*nz]
1786 ps_muls0 tmp5, tmp1, tT
1787
1788 // tmp3 = [t*nx*ny][t*ny*ny]
1789 ps_muls1 tmp3, tmp4, tmp0
1790 // tmp2 = [t*nx*nx][t*ny*nx]
1791 ps_muls0 tmp2, tmp4, tmp0
1792 // tmp4 = [t*nx*nz][t*ny*nz]
1793 ps_muls0 tmp4, tmp4, tmp1
1794
1795 // tmp6 = [t*nx*ny-s*nz][t*nx*ny-s*nz]
1796 fnmsubs tmp6, tmp1, sT, tmp3
1797 // tmp7 = [t*nx*ny+s*nz][t*ny*ny+s*nz]
1798 fmadds tmp7, tmp1, sT, tmp3
1799
1800 // tmp0 = [-s*nx][-s*ny]
1801 ps_neg tmp0, tmp9
1802 // tmp8 = [t*nx*nz+s*ny][0] == [m02][m03]
1803 ps_sum0 tmp8, tmp4, fc0, tmp9
1804 // tmp2 = [t*nx*nx+c][t*nx*ny-s*nz] == [m00][m01]
1805 ps_sum0 tmp2, tmp2, tmp6, cT
1806 // tmp3 = [t*nx*ny+s*nz][t*ny*ny+c] == [m10][m11]
1807 ps_sum1 tmp3, cT, tmp7, tmp3
1808 // tmp6 = [t*ny*nz-s*nx][0] == [m12][m13]
1809 ps_sum0 tmp6, tmp0, fc0 ,tmp4
1810
1811 // tmp8 [m02][m03] : STORE
1812 psq_st tmp8, 8(m), 0, 0
1813 // tmp0 = [t*nx*nz-s*ny][t*ny*nz]
1814 ps_sum0 tmp0, tmp4, tmp4, tmp0
1815 // tmp2 [m00][m01] : STORE
1816 psq_st tmp2, 0(m), 0, 0
1817 // tmp5 = [t*nz*nz][t*nz*nz]
1818 ps_muls0 tmp5, tmp5, tmp1
1819 // tmp3 [m10][m11] : STORE
1820 psq_st tmp3, 16(m), 0, 0
1821 // tmp4 = [t*nx*nz-s*ny][t*ny*nz+s*nx] == [m20][m21]
1822 ps_sum1 tmp4, tmp9, tmp0, tmp4
1823 // tmp6 [m12][m13] : STORE
1824 psq_st tmp6, 24(m), 0, 0
1825 // tmp5 = [t*nz*nz+c][0] == [m22][m23]
1826 ps_sum0 tmp5, tmp5, fc0, cT
1827 // tmp4 [m20][m21] : STORE
1828 psq_st tmp4, 32(m), 0, 0
1829 // tmp5 [m22][m23] : STORE
1830 psq_st tmp5, 40(m), 0, 0
1831 }
1832 }
1833
PSMTXRotAxisRad(Mtx m,const Vec * axis,f32 rad)1834 void PSMTXRotAxisRad(
1835 Mtx m,
1836 const Vec *axis,
1837 f32 rad )
1838 {
1839 f32 sinT, cosT;
1840
1841 sinT = sinf(rad);
1842 cosT = cosf(rad);
1843
1844 __PSMTXRotAxisRadInternal(m, axis, sinT, cosT);
1845 }
1846
1847 #endif // GEKKO
1848
1849 /*---------------------------------------------------------------------*
1850
1851 Name: MTXTrans
1852
1853 Description: sets a translation matrix.
1854
1855
1856 Arguments: m matrix to be set
1857
1858 xT x component of translation.
1859
1860 yT y component of translation.
1861
1862 zT z component of translation.
1863
1864 Return : none
1865
1866 *---------------------------------------------------------------------*/
1867 /*---------------------------------------------------------------------*
1868 C version
1869 *---------------------------------------------------------------------*/
C_MTXTrans(Mtx m,f32 xT,f32 yT,f32 zT)1870 void C_MTXTrans ( Mtx m, f32 xT, f32 yT, f32 zT )
1871 {
1872
1873 ASSERTMSG( (m != 0), MTX_TRANS_1 );
1874
1875
1876 m[0][0] = 1.0f; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = xT;
1877 m[1][0] = 0.0f; m[1][1] = 1.0f; m[1][2] = 0.0f; m[1][3] = yT;
1878 m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = 1.0f; m[2][3] = zT;
1879
1880 }
1881
1882 /*---------------------------------------------------------------------*
1883 Paired-Single assembler version
1884 *---------------------------------------------------------------------*
1885 Note that this performs NO error checking.
1886 *---------------------------------------------------------------------*/
1887 #ifdef GEKKO
PSMTXTrans(register Mtx m,register f32 xT,register f32 yT,register f32 zT)1888 void PSMTXTrans(
1889 register Mtx m,
1890 register f32 xT,
1891 register f32 yT,
1892 register f32 zT
1893 )
1894 {
1895 register f32 c0 = 0.0F;
1896 register f32 c1 = 1.0F;
1897
1898 asm
1899 {
1900 stfs xT, 12(m)
1901 stfs yT, 28(m)
1902 psq_st c0, 4(m), 0, 0
1903 psq_st c0, 32(m), 0, 0
1904 stfs c0, 16(m)
1905 stfs c1, 20(m)
1906 stfs c0, 24(m)
1907 stfs c1, 40(m)
1908 stfs zT, 44(m)
1909 stfs c1, 0(m)
1910 }
1911 }
1912 #endif // GEKKO
1913
1914 /*---------------------------------------------------------------------*
1915
1916 Name: MTXTransApply
1917
1918 Description: This function performs the operation equivalent to
1919 MTXTrans + MTXConcat.
1920
1921
1922 Arguments: src matrix to be operated.
1923
1924 dst resultant matrix from concat.
1925
1926 xT x component of translation.
1927
1928 yT y component of translation.
1929
1930 zT z component of translation.
1931
1932 Return : none
1933
1934 *---------------------------------------------------------------------*/
1935 /*---------------------------------------------------------------------*
1936 C version
1937 *---------------------------------------------------------------------*/
C_MTXTransApply(const Mtx src,Mtx dst,f32 xT,f32 yT,f32 zT)1938 void C_MTXTransApply ( const Mtx src, Mtx dst, f32 xT, f32 yT, f32 zT )
1939 {
1940 ASSERTMSG( (src != 0), MTX_TRANSAPPLY_1 );
1941 ASSERTMSG( (dst != 0), MTX_TRANSAPPLY_1 );
1942
1943 if ( src != dst )
1944 {
1945 dst[0][0] = src[0][0]; dst[0][1] = src[0][1]; dst[0][2] = src[0][2];
1946 dst[1][0] = src[1][0]; dst[1][1] = src[1][1]; dst[1][2] = src[1][2];
1947 dst[2][0] = src[2][0]; dst[2][1] = src[2][1]; dst[2][2] = src[2][2];
1948 }
1949
1950 dst[0][3] = src[0][3] + xT;
1951 dst[1][3] = src[1][3] + yT;
1952 dst[2][3] = src[2][3] + zT;
1953 }
1954
1955 /*---------------------------------------------------------------------*
1956 Paired-Single assembler version
1957 *---------------------------------------------------------------------*
1958 Note that this performs NO error checking.
1959 *---------------------------------------------------------------------*/
1960 #ifdef GEKKO
PSMTXTransApply(const register Mtx src,register Mtx dst,register f32 xT,register f32 yT,register f32 zT)1961 asm void PSMTXTransApply(
1962 const register Mtx src,
1963 register Mtx dst,
1964 register f32 xT,
1965 register f32 yT,
1966 register f32 zT )
1967 {
1968 nofralloc;
1969 psq_l fp4, 0(src), 0, 0;
1970 frsp xT, xT; // to make sure xT = single precision
1971 psq_l fp5, 8(src), 0, 0;
1972 frsp yT, yT; // to make sure yT = single precision
1973 psq_l fp7, 24(src), 0, 0;
1974 frsp zT, zT; // to make sure zT = single precision
1975 psq_l fp8, 40(src), 0, 0;
1976 psq_st fp4, 0(dst), 0, 0;
1977 ps_sum1 fp5, xT, fp5, fp5;
1978 psq_l fp6, 16(src), 0, 0;
1979 psq_st fp5, 8(dst), 0, 0;
1980 ps_sum1 fp7, yT, fp7, fp7;
1981 psq_l fp9, 32(src), 0, 0;
1982 psq_st fp6, 16(dst), 0, 0;
1983 ps_sum1 fp8, zT, fp8, fp8;
1984 psq_st fp7, 24(dst), 0, 0;
1985 psq_st fp9, 32(dst), 0, 0;
1986 psq_st fp8, 40(dst), 0, 0;
1987 blr;
1988 }
1989 #endif // GEKKO
1990
1991
1992 /*---------------------------------------------------------------------*
1993
1994 Name: MTXScale
1995
1996 Description: sets a scaling matrix.
1997
1998
1999 Arguments: m matrix to be set
2000
2001 xS x scale factor.
2002
2003 yS y scale factor.
2004
2005 zS z scale factor.
2006
2007 Return : none
2008
2009 *---------------------------------------------------------------------*/
2010 /*---------------------------------------------------------------------*
2011 C version
2012 *---------------------------------------------------------------------*/
C_MTXScale(Mtx m,f32 xS,f32 yS,f32 zS)2013 void C_MTXScale ( Mtx m, f32 xS, f32 yS, f32 zS )
2014 {
2015 ASSERTMSG( (m != 0), MTX_SCALE_1 );
2016
2017
2018 m[0][0] = xS; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = 0.0f;
2019 m[1][0] = 0.0f; m[1][1] = yS; m[1][2] = 0.0f; m[1][3] = 0.0f;
2020 m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = zS; m[2][3] = 0.0f;
2021 }
2022
2023 /*---------------------------------------------------------------------*
2024 Paired-Single assembler version
2025 *---------------------------------------------------------------------*
2026 Note that this performs NO error checking.
2027 *---------------------------------------------------------------------*/
2028 #ifdef GEKKO
PSMTXScale(register Mtx m,register f32 xS,register f32 yS,register f32 zS)2029 void PSMTXScale(
2030 register Mtx m,
2031 register f32 xS,
2032 register f32 yS,
2033 register f32 zS
2034 )
2035 {
2036 register f32 c0 = 0.0F;
2037
2038 asm
2039 {
2040 stfs xS, 0(m)
2041 psq_st c0, 4(m), 0, 0
2042 psq_st c0, 12(m), 0, 0
2043 stfs yS, 20(m)
2044 psq_st c0, 24(m), 0, 0
2045 psq_st c0, 32(m), 0, 0
2046 stfs zS, 40(m)
2047 stfs c0, 44(m)
2048 }
2049 }
2050 #endif // GEKKO
2051
2052 /*---------------------------------------------------------------------*
2053
2054 Name: MTXScaleApply
2055
2056 Description: This function performs the operation equivalent to
2057 MTXScale + MTXConcat
2058
2059 Arguments: src matrix to be operated.
2060
2061 dst resultant matrix from concat.
2062
2063 xS x scale factor.
2064
2065 yS y scale factor.
2066
2067 zS z scale factor.
2068
2069 Return : none
2070
2071 *---------------------------------------------------------------------*/
2072 /*---------------------------------------------------------------------*
2073 C version
2074 *---------------------------------------------------------------------*/
C_MTXScaleApply(const Mtx src,Mtx dst,f32 xS,f32 yS,f32 zS)2075 void C_MTXScaleApply ( const Mtx src, Mtx dst, f32 xS, f32 yS, f32 zS )
2076 {
2077 ASSERTMSG( (src != 0), MTX_SCALEAPPLY_1 );
2078 ASSERTMSG( (dst != 0), MTX_SCALEAPPLY_2 );
2079
2080 dst[0][0] = src[0][0] * xS; dst[0][1] = src[0][1] * xS;
2081 dst[0][2] = src[0][2] * xS; dst[0][3] = src[0][3] * xS;
2082
2083 dst[1][0] = src[1][0] * yS; dst[1][1] = src[1][1] * yS;
2084 dst[1][2] = src[1][2] * yS; dst[1][3] = src[1][3] * yS;
2085
2086 dst[2][0] = src[2][0] * zS; dst[2][1] = src[2][1] * zS;
2087 dst[2][2] = src[2][2] * zS; dst[2][3] = src[2][3] * zS;
2088 }
2089
2090 /*---------------------------------------------------------------------*
2091 Paired-Single assembler version
2092 *---------------------------------------------------------------------*
2093 Note that this performs NO error checking.
2094 *---------------------------------------------------------------------*/
2095 #ifdef GEKKO
PSMTXScaleApply(const register Mtx src,register Mtx dst,register f32 xS,register f32 yS,register f32 zS)2096 asm void PSMTXScaleApply (
2097 const register Mtx src,
2098 register Mtx dst,
2099 register f32 xS,
2100 register f32 yS,
2101 register f32 zS )
2102 {
2103 nofralloc;
2104 frsp xS, xS; // to make sure xS = single precision
2105 psq_l fp4, 0(src), 0, 0;
2106 frsp yS, yS; // to make sure yS = single precision
2107 psq_l fp5, 8(src), 0, 0;
2108 frsp zS, zS; // to make sure zS = single precision
2109 ps_muls0 fp4, fp4, xS;
2110 psq_l fp6, 16(src), 0, 0;
2111 ps_muls0 fp5, fp5, xS;
2112 psq_l fp7, 24(src), 0, 0;
2113 ps_muls0 fp6, fp6, yS;
2114 psq_l fp8, 32(src), 0, 0;
2115 psq_st fp4, 0(dst), 0, 0;
2116 ps_muls0 fp7, fp7, yS;
2117 psq_l fp2, 40(src), 0, 0;
2118 psq_st fp5, 8(dst), 0, 0;
2119 ps_muls0 fp8, fp8, zS;
2120 psq_st fp6, 16(dst), 0, 0;
2121 ps_muls0 fp2, fp2, zS;
2122 psq_st fp7, 24(dst), 0, 0;
2123 psq_st fp8, 32(dst), 0, 0;
2124 psq_st fp2, 40(dst), 0, 0;
2125 blr;
2126 }
2127 #endif // GEKKO
2128
2129 /*---------------------------------------------------------------------*
2130
2131 Name: MTXQuat
2132
2133 Description: sets a rotation matrix from a quaternion.
2134
2135
2136 Arguments: m matrix to be set
2137
2138 q ptr to quaternion.
2139
2140 Return : none
2141
2142 *---------------------------------------------------------------------*/
2143 /*---------------------------------------------------------------------*
2144 C version
2145 *---------------------------------------------------------------------*/
C_MTXQuat(Mtx m,const Quaternion * q)2146 void C_MTXQuat ( Mtx m, const Quaternion *q )
2147 {
2148
2149 f32 s,xs,ys,zs,wx,wy,wz,xx,xy,xz,yy,yz,zz;
2150
2151
2152 ASSERTMSG( (m != 0), MTX_QUAT_1 );
2153 ASSERTMSG( (q != 0), MTX_QUAT_2 );
2154 ASSERTMSG( ( q->x || q->y || q->z || q->w ), MTX_QUAT_3 );
2155
2156
2157 s = 2.0f / ( (q->x * q->x) + (q->y * q->y) + (q->z * q->z) + (q->w * q->w) );
2158
2159 xs = q->x * s; ys = q->y * s; zs = q->z * s;
2160 wx = q->w * xs; wy = q->w * ys; wz = q->w * zs;
2161 xx = q->x * xs; xy = q->x * ys; xz = q->x * zs;
2162 yy = q->y * ys; yz = q->y * zs; zz = q->z * zs;
2163
2164
2165 m[0][0] = 1.0f - (yy + zz);
2166 m[0][1] = xy - wz;
2167 m[0][2] = xz + wy;
2168 m[0][3] = 0.0f;
2169
2170 m[1][0] = xy + wz;
2171 m[1][1] = 1.0f - (xx + zz);
2172 m[1][2] = yz - wx;
2173 m[1][3] = 0.0f;
2174
2175 m[2][0] = xz - wy;
2176 m[2][1] = yz + wx;
2177 m[2][2] = 1.0f - (xx + yy);
2178 m[2][3] = 0.0f;
2179
2180 }
2181
2182 /*---------------------------------------------------------------------*
2183 Paired-Single assembler version
2184 *---------------------------------------------------------------------*
2185 Note that this performs NO error checking.
2186 *---------------------------------------------------------------------*/
2187 #ifdef GEKKO
PSMTXQuat(register Mtx m,const register Quaternion * q)2188 void PSMTXQuat ( register Mtx m, const register Quaternion *q )
2189 {
2190 register f32 c_zero, c_one, c_two, scale;
2191 register f32 tmp0, tmp1, tmp2, tmp3, tmp4;
2192 register f32 tmp5, tmp6, tmp7, tmp8, tmp9;
2193
2194 c_one = 1.0F;
2195
2196 asm
2197 {
2198 // tmp0 = [qx][qy] : LOAD
2199 psq_l tmp0, 0(q), 0, 0
2200 // tmp1 = [qz][qw] : LOAD
2201 psq_l tmp1, 8(q), 0, 0
2202 // c_zero = [0.0F][0.0F]
2203 fsubs c_zero, c_one, c_one
2204 // c_two = [2.0F][2.0F]
2205 fadds c_two, c_one, c_one
2206 // tmp2 = [qx*qx][qy*qy]
2207 ps_mul tmp2, tmp0, tmp0
2208 // tmp5 = [qy][qx]
2209 ps_merge10 tmp5, tmp0, tmp0
2210 // tmp4 = [qx*qx+qz*qz][qy*qy+qw*qw]
2211 ps_madd tmp4, tmp1, tmp1, tmp2
2212 // tmp3 = [qz*qz][qw*qw]
2213 ps_mul tmp3, tmp1, tmp1
2214 // scale = [qx*qx+qy*qy+qz*qz+qw*qw][?]
2215 ps_sum0 scale, tmp4, tmp4, tmp4
2216 // tmp7 = [qy*qw][qx*qw]
2217 ps_muls1 tmp7, tmp5, tmp1
2218 // Newton-Rapson refinement (1/X) : E' = 2E-X*E*E
2219 // tmp9 = [E = Est.(1/X)]
2220 fres tmp9, scale
2221 // tmp4 = [qx*qx+qz*qz][qy*qy+qz*qz]
2222 ps_sum1 tmp4, tmp3, tmp4, tmp2
2223 // scale = [2-X*E]
2224 ps_nmsub scale, scale, tmp9, c_two
2225 // tmp6 = [qz*qw][?]
2226 ps_muls1 tmp6, tmp1, tmp1
2227 // scale = [E(2-scale*E) = E']
2228 ps_mul scale, tmp9, scale
2229 // tmp2 = [qx*qx+qy*qy]
2230 ps_sum0 tmp2, tmp2, tmp2, tmp2
2231 // scale = [s = 2E' = 2.0F/(qx*qx+qy*qy+qz*qz+qw*qw)]
2232 fmuls scale, scale, c_two
2233 // tmp8 = [qx*qy+qz*qw][?]
2234 ps_madd tmp8, tmp0, tmp5, tmp6
2235 // tmp6 = [qx*qy-qz*qw][?]
2236 ps_msub tmp6, tmp0, tmp5, tmp6
2237 // c_zero [m03] : STORE
2238 psq_st c_zero, 12(m), 1, 0
2239 // tmp2 = [1-s(qx*qx+qy*qy)] : [m22]
2240 ps_nmsub tmp2, tmp2, scale, c_one
2241 // tmp4 = [1-s(qx*qx+qz*qz)][1-s(qy*qy+qz*qz)] : [m11][m00]
2242 ps_nmsub tmp4, tmp4, scale, c_one
2243 // c_zero [m23] : STORE
2244 psq_st c_zero, 44(m), 1, 0
2245 // tmp8 = [s(qx*qy+qz*qw)][?] : [m10]
2246 ps_mul tmp8, tmp8, scale
2247 // tmp6 = [s(qx*qy-qz*qw)][?] : [m01]
2248 ps_mul tmp6, tmp6, scale
2249 // tmp2 [m22] : STORE
2250 psq_st tmp2, 40(m), 1, 0
2251 // tmp5 = [qx*qz+qy*qw][qy*qz+qx*qw]
2252 ps_madds0 tmp5, tmp0, tmp1, tmp7
2253 // tmp1 = [m10][m11]
2254 ps_merge00 tmp1, tmp8, tmp4
2255 // tmp7 = [qx*qz-qy*qw][qy*qz-qx*qw]
2256 ps_nmsub tmp7, tmp7, c_two, tmp5
2257 // tmp0 = [m00][m01]
2258 ps_merge10 tmp0, tmp4, tmp6
2259 // tmp1 [m10][m11] : STORE
2260 psq_st tmp1, 16(m), 0, 0
2261 // tmp5 = [s(qx*qz+qy*qw)][s(qy*qz+qx*qw)] : [m02][m21]
2262 ps_mul tmp5, tmp5, scale
2263 // tmp7 = [s(qx*qz-qy*qw)][s(qy*qz-qx*qw)] : [m20][m12]
2264 ps_mul tmp7, tmp7, scale
2265 // tmp0 [m00][m01] : STORE
2266 psq_st tmp0, 0(m), 0, 0
2267 // tmp5 [m02] : STORE
2268 psq_st tmp5, 8(m), 1, 0
2269 // tmp3 = [m12][m13]
2270 ps_merge10 tmp3, tmp7, c_zero
2271 // tmp9 = [m20][m21]
2272 ps_merge01 tmp9, tmp7, tmp5
2273 // tmp3 [m12][m13] : STORE
2274 psq_st tmp3, 24(m), 0, 0
2275 // tmp9 [m20][m21] : STORE
2276 psq_st tmp9, 32(m), 0, 0
2277
2278 }
2279 }
2280 #endif // GEKKO
2281
2282 /*---------------------------------------------------------------------*
2283
2284 Name: MTXReflect
2285
2286 Description: reflect a rotation matrix with respect to a plane.
2287
2288
2289 Arguments: m matrix to be set
2290
2291 p point on the planar reflector.
2292
2293 n normal of the planar reflector.
2294
2295 Return : none
2296
2297 *---------------------------------------------------------------------*/
2298 /*---------------------------------------------------------------------*
2299 C version
2300 *---------------------------------------------------------------------*/
C_MTXReflect(Mtx m,const Vec * p,const Vec * n)2301 void C_MTXReflect ( Mtx m, const Vec *p, const Vec *n )
2302 {
2303 f32 vxy, vxz, vyz, pdotn;
2304
2305 vxy = -2.0f * n->x * n->y;
2306 vxz = -2.0f * n->x * n->z;
2307 vyz = -2.0f * n->y * n->z;
2308 pdotn = 2.0f * C_VECDotProduct(p, n);
2309
2310 m[0][0] = 1.0f - 2.0f * n->x * n->x;
2311 m[0][1] = vxy;
2312 m[0][2] = vxz;
2313 m[0][3] = pdotn * n->x;
2314
2315 m[1][0] = vxy;
2316 m[1][1] = 1.0f - 2.0f * n->y * n->y;
2317 m[1][2] = vyz;
2318 m[1][3] = pdotn * n->y;
2319
2320 m[2][0] = vxz;
2321 m[2][1] = vyz;
2322 m[2][2] = 1.0f - 2.0f * n->z * n->z;
2323 m[2][3] = pdotn * n->z;
2324 }
2325
2326 /*---------------------------------------------------------------------*
2327 Paired-Single assembler version
2328 *---------------------------------------------------------------------*/
2329 #ifdef GEKKO
PSMTXReflect(register Mtx m,const register Vec * p,const register Vec * n)2330 void PSMTXReflect( register Mtx m, const register Vec *p, const register Vec *n )
2331 {
2332 register f32 c_one = 1.0F;
2333 register f32 vn_xy, vn_z1, n2vn_xy, n2vn_z1, pdotn;
2334 register f32 tmp0, tmp1, tmp2, tmp3;
2335 register f32 tmp4, tmp5, tmp6, tmp7;
2336
2337 asm
2338 {
2339 // vn_z1 = [nz][1.0F] : LOAD
2340 psq_l vn_z1, 8(n), 1, 0
2341 // vn_xy = [nx][ny] : LOAD
2342 psq_l vn_xy, 0(n), 0, 0
2343
2344 // tmp0 = [px][py] : LOAD
2345 psq_l tmp0, 0(p), 0, 0
2346 // n2vn_z1 = [-2nz][-2.0F]
2347 ps_nmadd n2vn_z1, vn_z1, c_one, vn_z1
2348 // tmp1 = [pz][1.0F] : LOAD
2349 psq_l tmp1, 8(p), 1, 0
2350 // n2vn_xy = [-2nx][-2ny]
2351 ps_nmadd n2vn_xy, vn_xy, c_one, vn_xy
2352
2353 // tmp4 = [-2nx*nz][-2ny*nz] : [m20][m21]
2354 ps_muls0 tmp4, vn_xy, n2vn_z1
2355 // pdotn = [-2(px*nx)][-2(py*ny)]
2356 ps_mul pdotn, n2vn_xy, tmp0
2357 // tmp2 = [-2nx*nx][-2nx*ny]
2358 ps_muls0 tmp2, vn_xy, n2vn_xy
2359 // pdotn = [-2(px*nx+py*ny)][?]
2360 ps_sum0 pdotn, pdotn, pdotn, pdotn
2361 // tmp3 = [-2nx*ny][-2ny*ny]
2362 ps_muls1 tmp3, vn_xy, n2vn_xy
2363 // tmp4 = [m20][m21] : STORE
2364 psq_st tmp4, 32(m), 0, 0
2365 // tmp2 = [1-2nx*nx][-2nx*ny] : [m00][m01]
2366 ps_sum0 tmp2, tmp2, tmp2, c_one
2367 // pdotn = [2(px*nx+py*ny+pz*nz)][?]
2368 ps_nmadd pdotn, n2vn_z1, tmp1, pdotn
2369 // tmp3 = [-2nx*ny][1-2ny*ny] : [m10][m11]
2370 ps_sum1 tmp3, c_one, tmp3, tmp3
2371 // tmp2 = [m00][m01] : STORE
2372 psq_st tmp2, 0(m), 0, 0
2373 // tmp5 = [pdotn*nx][pdotn*ny]
2374 ps_muls0 tmp5, vn_xy, pdotn
2375 // tmp6 = [-2nz][pdotn]
2376 ps_merge00 tmp6, n2vn_z1, pdotn
2377 // tmp3 = [m10][m11] : STORE
2378 psq_st tmp3, 16(m), 0, 0
2379
2380 // tmp7 = [-2nx*nz][pdotn*nx] : [m02][m03]
2381 ps_merge00 tmp7, tmp4, tmp5
2382 // tmp6 = [-2nz*nz][pdotn*nz]
2383 ps_muls0 tmp6, tmp6, vn_z1
2384 // tmp5 = [-2ny*nz][pdotn*ny] : [m12][m13]
2385 ps_merge11 tmp5, tmp4, tmp5
2386 // tmp7 = [m02][m03] : STORE
2387 psq_st tmp7, 8(m), 0, 0
2388 // tmp6 = [1-2nz*nz][pdotn*nz] : [m22][m23]
2389 ps_sum0 tmp6, tmp6, tmp6, c_one
2390 // tmp5 = [m12][m13] : STORE
2391 psq_st tmp5, 24(m), 0, 0
2392 // tmp6 = [m22][m23] : STORE
2393 psq_st tmp6, 40(m), 0, 0
2394 }
2395 }
2396 #endif // GEKKO
2397
2398 /*---------------------------------------------------------------------*
2399
2400
2401
2402
2403
2404 VIEW SECTION
2405
2406
2407
2408
2409
2410 *---------------------------------------------------------------------*/
2411
2412
2413
2414
2415
2416
2417 /*---------------------------------------------------------------------*
2418
2419 Name: MTXLookAt
2420
2421 Description: compute a matrix to transform points to camera coordinates.
2422
2423
2424 Arguments: m matrix to be set
2425
2426 camPos camera position.
2427
2428 camUp camera 'up' direction.
2429
2430 target camera aim point.
2431
2432
2433 Return : none
2434
2435 *---------------------------------------------------------------------*/
2436 /*---------------------------------------------------------------------*
2437 C version
2438 *---------------------------------------------------------------------*/
C_MTXLookAt(Mtx m,const Point3d * camPos,const Vec * camUp,const Point3d * target)2439 void C_MTXLookAt ( Mtx m, const Point3d *camPos, const Vec *camUp, const Point3d *target )
2440 {
2441
2442 Vec vLook,vRight,vUp;
2443
2444
2445 ASSERTMSG( (m != 0), MTX_LOOKAT_1 );
2446 ASSERTMSG( (camPos != 0), MTX_LOOKAT_2 );
2447 ASSERTMSG( (camUp != 0), MTX_LOOKAT_3 );
2448 ASSERTMSG( (target != 0), MTX_LOOKAT_4 );
2449
2450
2451 // compute unit target vector
2452 // use negative value to look down (-Z) axis
2453 vLook.x = camPos->x - target->x;
2454 vLook.y = camPos->y - target->y;
2455 vLook.z = camPos->z - target->z;
2456 VECNormalize( &vLook,&vLook );
2457
2458
2459 // vRight = camUp x vLook
2460 VECCrossProduct ( camUp, &vLook, &vRight );
2461 VECNormalize( &vRight,&vRight );
2462
2463
2464 // vUp = vLook x vRight
2465 VECCrossProduct( &vLook, &vRight, &vUp );
2466 // Don't need to normalize vUp since it should already be unit length
2467 // VECNormalize( &vUp, &vUp );
2468
2469
2470 m[0][0] = vRight.x;
2471 m[0][1] = vRight.y;
2472 m[0][2] = vRight.z;
2473 m[0][3] = -( camPos->x * vRight.x + camPos->y * vRight.y + camPos->z * vRight.z );
2474
2475 m[1][0] = vUp.x;
2476 m[1][1] = vUp.y;
2477 m[1][2] = vUp.z;
2478 m[1][3] = -( camPos->x * vUp.x + camPos->y * vUp.y + camPos->z * vUp.z );
2479
2480 m[2][0] = vLook.x;
2481 m[2][1] = vLook.y;
2482 m[2][2] = vLook.z;
2483 m[2][3] = -( camPos->x * vLook.x + camPos->y * vLook.y + camPos->z * vLook.z );
2484
2485
2486 }
2487
2488 /*---------------------------------------------------------------------*
2489
2490
2491
2492
2493
2494 TEXTURE PROJECTION SECTION
2495
2496
2497
2498
2499
2500 *---------------------------------------------------------------------*/
2501
2502
2503 /*---------------------------------------------------------------------*
2504
2505 Name: MTXLightFrustum
2506
2507 Description: Compute a 3x4 projection matrix for texture projection
2508
2509
2510 Arguments: m 3x4 matrix to be set
2511
2512 t top coord. of view volume at the near clipping plane
2513
2514 b bottom coord of view volume at the near clipping plane
2515
2516 l left coord. of view volume at near clipping plane
2517
2518 r right coord. of view volume at near clipping plane
2519
2520 n positive distance from camera to near clipping plane
2521
2522 scaleS scale in the S direction for projected coordinates
2523 (usually 0.5)
2524
2525 scaleT scale in the T direction for projected coordinates
2526 (usually 0.5)
2527
2528 transS translate in the S direction for projected coordinates
2529 (usually 0.5)
2530
2531 transT translate in the T direction for projected coordinates
2532 (usually 0.5)
2533
2534
2535 Return : none.
2536
2537 *---------------------------------------------------------------------*/
2538 /*---------------------------------------------------------------------*
2539 C version
2540 *---------------------------------------------------------------------*/
C_MTXLightFrustum(Mtx m,float t,float b,float l,float r,float n,float scaleS,float scaleT,float transS,float transT)2541 void C_MTXLightFrustum ( Mtx m, float t, float b, float l, float r, float n,
2542 float scaleS, float scaleT, float transS,
2543 float transT )
2544 {
2545 f32 tmp;
2546
2547
2548 ASSERTMSG( (m != 0), MTX_LIGHT_FRUSTUM_1 );
2549 ASSERTMSG( (t != b), MTX_LIGHT_FRUSTUM_2 );
2550 ASSERTMSG( (l != r), MTX_LIGHT_FRUSTUM_3 );
2551
2552
2553 // NOTE: Be careful about "l" vs. "1" below!!!
2554
2555 tmp = 1.0f / (r - l);
2556 m[0][0] = ((2*n) * tmp) * scaleS;
2557 m[0][1] = 0.0f;
2558 m[0][2] = (((r + l) * tmp) * scaleS) - transS;
2559 m[0][3] = 0.0f;
2560
2561 tmp = 1.0f / (t - b);
2562 m[1][0] = 0.0f;
2563 m[1][1] = ((2*n) * tmp) * scaleT;
2564 m[1][2] = (((t + b) * tmp) * scaleT) - transT;
2565 m[1][3] = 0.0f;
2566
2567 m[2][0] = 0.0f;
2568 m[2][1] = 0.0f;
2569 m[2][2] = -1.0f;
2570 m[2][3] = 0.0f;
2571 }
2572
2573 /*---------------------------------------------------------------------*
2574
2575 Name: MTXLightPerspective
2576
2577 Description: compute a 3x4 perspective projection matrix from
2578 field of view and aspect ratio for texture projection.
2579
2580
2581 Arguments: m 3x4 matrix to be set
2582
2583 fovy total field of view in in degrees in the YZ plane
2584
2585 aspect ratio of view window width:height (X / Y)
2586
2587 scaleS scale in the S direction for projected coordinates
2588 (usually 0.5)
2589
2590 scaleT scale in the T direction for projected coordinates
2591 (usually 0.5)
2592
2593 transS translate in the S direction for projected coordinates
2594 (usually 0.5)
2595
2596 transT translate in the T direction for projected coordinates
2597 (usually 0.5)
2598
2599
2600 Return : none
2601
2602 *---------------------------------------------------------------------*/
2603 /*---------------------------------------------------------------------*
2604 C version
2605 *---------------------------------------------------------------------*/
C_MTXLightPerspective(Mtx m,f32 fovY,f32 aspect,float scaleS,float scaleT,float transS,float transT)2606 void C_MTXLightPerspective ( Mtx m, f32 fovY, f32 aspect, float scaleS,
2607 float scaleT, float transS, float transT )
2608 {
2609 f32 angle;
2610 f32 cot;
2611
2612 ASSERTMSG( (m != 0), MTX_LIGHT_PERSPECTIVE_1 );
2613 ASSERTMSG( ( (fovY > 0.0) && ( fovY < 180.0) ), MTX_LIGHT_PERSPECTIVE_2 );
2614 ASSERTMSG( (aspect != 0), MTX_LIGHT_PERSPECTIVE_3 );
2615
2616 // find the cotangent of half the (YZ) field of view
2617 angle = fovY * 0.5f;
2618 angle = MTXDegToRad( angle );
2619
2620 cot = 1.0f / tanf(angle);
2621
2622 m[0][0] = (cot / aspect) * scaleS;
2623 m[0][1] = 0.0f;
2624 m[0][2] = -transS;
2625 m[0][3] = 0.0f;
2626
2627 m[1][0] = 0.0f;
2628 m[1][1] = cot * scaleT;
2629 m[1][2] = -transT;
2630 m[1][3] = 0.0f;
2631
2632 m[2][0] = 0.0f;
2633 m[2][1] = 0.0f;
2634 m[2][2] = -1.0f;
2635 m[2][3] = 0.0f;
2636 }
2637
2638 /*---------------------------------------------------------------------*
2639
2640 Name: MTXLightOrtho
2641
2642 Description: compute a 3x4 orthographic projection matrix.
2643
2644
2645 Arguments: m matrix to be set
2646
2647 t top coord. of parallel view volume
2648
2649 b bottom coord of parallel view volume
2650
2651 l left coord. of parallel view volume
2652
2653 r right coord. of parallel view volume
2654
2655 scaleS scale in the S direction for projected coordinates
2656 (usually 0.5)
2657
2658 scaleT scale in the T direction for projected coordinates
2659 (usually 0.5)
2660
2661 transS translate in the S direction for projected coordinates
2662 (usually 0.5)
2663
2664 transT translate in the T direction for projected coordinates
2665 (usually 0.5)
2666
2667
2668 Return : none
2669
2670 *---------------------------------------------------------------------*/
2671 /*---------------------------------------------------------------------*
2672 C version
2673 *---------------------------------------------------------------------*/
C_MTXLightOrtho(Mtx m,f32 t,f32 b,f32 l,f32 r,float scaleS,float scaleT,float transS,float transT)2674 void C_MTXLightOrtho ( Mtx m, f32 t, f32 b, f32 l, f32 r, float scaleS,
2675 float scaleT, float transS, float transT )
2676 {
2677 f32 tmp;
2678
2679
2680 ASSERTMSG( (m != 0), MTX_LIGHT_ORTHO_1 );
2681 ASSERTMSG( (t != b), MTX_LIGHT_ORTHO_2 );
2682 ASSERTMSG( (l != r), MTX_LIGHT_ORTHO_3 );
2683
2684
2685 // NOTE: Be careful about "l" vs. "1" below!!!
2686
2687 tmp = 1.0f / (r - l);
2688 m[0][0] = (2.0f * tmp * scaleS);
2689 m[0][1] = 0.0f;
2690 m[0][2] = 0.0f;
2691 m[0][3] = ((-(r + l) * tmp) * scaleS) + transS;
2692
2693 tmp = 1.0f / (t - b);
2694 m[1][0] = 0.0f;
2695 m[1][1] = (2.0f * tmp) * scaleT;
2696 m[1][2] = 0.0f;
2697 m[1][3] = ((-(t + b) * tmp)* scaleT) + transT;
2698
2699 m[2][0] = 0.0f;
2700 m[2][1] = 0.0f;
2701 m[2][2] = 0.0f;
2702 m[2][3] = 1.0f;
2703 }
2704
2705 /*===========================================================================*/
2706
2707 #if ( __MWERKS__ == 0x00004100 )
2708 #pragma defer_codegen reset
2709 #endif
2710
2711