1 /*---------------------------------------------------------------------------*
2 Project: matrix vector Library
3 File: mtx.c
4
5 Copyright 1998 - 2006 Nintendo. All rights reserved.
6
7 These coded instructions, statements, and computer programs contain
8 proprietary information of Nintendo of America Inc. and/or Nintendo
9 Company Ltd., and are protected by Federal copyright law. They may
10 not be disclosed to third parties or copied or duplicated in any form,
11 in whole or in part, without the prior written consent of Nintendo.
12
13
14 $Log: mtx.c,v $
15 Revision 1.6 2008/05/23 08:21:52 nakano_yoshinobu
16 Improved the performance of PSMTXInverse and PSInvXpose
17
18 Revision 1.5 2008/05/23 01:50:36 nakano_yoshinobu
19 Fixed overflow in PSMTXInverse.
20 Changed ( E' = 2E - K*E*E ) to ( E' = ( 2 - K * E ) * E ).
21
22 Revision 1.4 2007/01/11 00:45:26 aka
23 Removed win32.h.
24
25 Revision 1.3 2006/02/20 04:25:42 mitu
26 changed include path from dolphin/ to revolution/.
27
28 Revision 1.2 2005/12/30 06:01:51 hirose
29 Temporary workaround for CW3.0 Alpha3 problem.
30
31
32 30 02/06/26 13:49 Hirose
33 Fixed a register allocation problem with DEBUG build.
34
35 29 02/06/20 11:21 Hirose
36 Added MTXConcatArray.
37
38 28 02/04/11 13:10 Hirose
39 const type specifier support. (worked by Hiratsu@IRD)
40
41 27 02/02/18 9:27 Hirose
42 Workaround for floating-point precision mismatch issue.
43
44 26 9/13/01 4:28p Hirose
45 Removed unnecessary possibilities of zero division exception.
46
47 25 8/28/01 3:48p Hirose
48 Removed one refinement process in the calculation of square root.
49
50 24 7/30/01 10:21p Hirose
51 Changes for function definitions.
52
53 23 7/23/01 8:45p Hirose
54 Now all model matrix functions and matrix-vector functions have PS
55 version.
56
57 22 7/07/01 7:37p Hirose
58 added PS versions of MTXTrans* / MTXScale* functions.
59
60 21 3/29/01 8:23p Hirose
61 fixed assertion messages
62
63 20 3/29/01 3:06p Hirose
64 added MTXInvXpose
65
66 19 3/16/01 11:43p Hirose
67 added PSMTXInverse
68
69 18 2/22/01 11:58p Hirose
70 Moved some sections into other files.
71 Added some new Paired-Single functions. Etc.
72
73 17 7/31/00 2:07p Carl
74 Removed redundant verify in MTXRotRad.
75 Minor potential optimization in VECNormalize.
76
77 16 7/21/00 4:19p Carl
78 Removed as many divides as possible.
79
80 15 7/21/00 2:58p Carl
81 Removed unnecessary VECNormalize in MTXLookAt.
82
83 14 7/12/00 4:41p John
84 Substitues MTXConcat and MTXMultVecArray with their paired-singles
85 equivalent for Gekko nondebug builds.
86
87 13 7/07/00 7:09p Dante
88 PC Compatibility
89
90 12 5/10/00 1:50p Hirose
91 added radian-based rotation matrix functions
92 moved paired-single matrix stuff into an another source file
93
94 11 4/13/00 11:26a Danm
95 Restore hgh performance version of ROMulVecArray.
96
97 10 4/10/00 11:56a Danm
98 Added 2 matrix skinning support.
99
100 9 3/27/00 6:38p Carl
101 Fixed other comments.
102
103 8 3/27/00 6:20p Carl
104 Changed comment regarding z scale in MTXFrustum.
105
106 7 3/22/00 2:01p John
107 Added VECSquareDistance and VECDistance.
108
109 6 2/14/00 2:04p Mikepc
110 detabbed code.
111
112 5 2/11/00 4:22p Tian
113 Cleaned up reordered paired single code
114
115 4 1/27/00 5:21p Tian
116 Documented paired single ops.
117
118 3 1/27/00 4:56p Tian
119 Implemented fast paired single matrix ops : PSMTXReorder, PSMTXConcat,
120 PSMTXROMultVecArray, PSMTXMultVecArray, PSMTXMultS16VecArray,
121 PSMTXROMultS16VecArray
122
123 2 12/06/99 12:40p Alligator
124 modify proj mtx to scale/translate z to (0, w) range for hardware
125
126 20 11/17/99 4:32p Ryan
127 update to fin win32 compiler warnings
128
129 19 11/12/99 12:08p Mikepc
130 changed #include<dolphin/mtx/mtxAssert.h> to #include"mtxAssert.h" to
131 reflect new location of mtxAssert.h file.
132
133 18 11/10/99 7:21p Hirose
134 added singular case check for VECHalfAngle
135
136 17 11/10/99 4:40p Alligator
137 added MTXReflect
138
139 16 10/06/99 12:28p Yasu
140 Appended MTXMultVecSR and MTXMultVecArraySR
141
142 15 8/31/99 5:41p Shiki
143 Revised to use sqrtf() instead of sqrt().
144
145 14 8/31/99 5:18p Shiki
146 Revised to use sinf()/cosf() instead of sin()/cos().
147
148 13 7/28/99 3:38p Ryan
149
150 12 7/28/99 11:30a Ryan
151 Added Texture Projection functions
152
153 11 7/02/99 12:53p Mikepc
154 changed mtx row/col
155
156 $NoKeywords: $
157 *---------------------------------------------------------------------------*/
158
159 #include <math.h>
160 #include <revolution/mtx.h>
161 #include "mtxAssert.h"
162
163 #if ( __MWERKS__ == 0x00004100 )
164 #pragma defer_codegen on // This will be removed when compiler is fixed.
165 #endif
166
167 /*---------------------------------------------------------------------*
168 Constants
169 *---------------------------------------------------------------------*/
170 static f32 Unit01[] = { 0.0f, 1.0f };
171
172
173 /*---------------------------------------------------------------------*
174
175
176
177
178 GENERAL SECTION
179
180
181
182
183 *---------------------------------------------------------------------*/
184
185
186 /*---------------------------------------------------------------------*
187
188 Name: MTXIdentity
189
190 Description: sets a matrix to identity
191
192 Arguments: m : matrix to be set
193
194 Return: none
195
196 *---------------------------------------------------------------------*/
197 /*---------------------------------------------------------------------*
198 C version
199 *---------------------------------------------------------------------*/
C_MTXIdentity(Mtx m)200 void C_MTXIdentity ( Mtx m )
201 {
202
203 ASSERTMSG( (m != 0), MTX_IDENTITY_1 );
204
205
206 m[0][0] = 1.0f; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = 0.0f;
207
208 m[1][0] = 0.0f; m[1][1] = 1.0f; m[1][2] = 0.0f; m[1][3] = 0.0f;
209
210 m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = 1.0f; m[2][3] = 0.0f;
211
212 }
213
214 /*---------------------------------------------------------------------*
215 Paired-Single assembler version
216 *---------------------------------------------------------------------*
217 Note that this performs NO error checking.
218 Actually there is not much performance advantage.
219 *---------------------------------------------------------------------*/
220 #ifdef GEKKO
PSMTXIdentity(register Mtx m)221 void PSMTXIdentity
222 (
223 register Mtx m // r3
224 )
225 {
226 register f32 c_zero = 0.0F; // { 0.0F, 0.0F };
227 register f32 c_one = 1.0F; // { 1.0F, 1.0F };
228 register f32 c_01;
229 register f32 c_10;
230
231 asm
232 {
233 psq_st c_zero, 8(m), 0, 0 // m[0][2], m[0][3]
234 ps_merge01 c_01, c_zero, c_one // { 0.1F, 1.0F }
235 psq_st c_zero, 24(m), 0, 0 // m[1][2], m[1][3]
236 ps_merge10 c_10, c_one, c_zero // fp2 = { 1.0F, 0.0F }
237 psq_st c_zero, 32(m), 0, 0 // m[2][0], m[2][1]
238 psq_st c_01, 16(m), 0, 0 // m[1][0], m[1][1]
239 psq_st c_10, 0(m), 0, 0 // m[0][0], m[0][1]
240 psq_st c_10, 40(m), 0, 0 // m[2][2], m[2][3]
241 }
242 }
243 #endif // GEKKO
244
245 /*---------------------------------------------------------------------*
246
247 Name: MTXCopy
248
249 Description: copies the contents of one matrix into another
250
251 Arguments: src source matrix for copy
252 dst destination matrix for copy
253
254
255 Return: none
256
257 *---------------------------------------------------------------------*/
258 /*---------------------------------------------------------------------*
259 C version
260 *---------------------------------------------------------------------*/
C_MTXCopy(const Mtx src,Mtx dst)261 void C_MTXCopy ( const Mtx src, Mtx dst )
262 {
263
264 ASSERTMSG( (src != 0) , MTX_COPY_1 );
265 ASSERTMSG( (dst != 0) , MTX_COPY_2 );
266
267 if( src == dst )
268 {
269 return;
270 }
271
272
273 dst[0][0] = src[0][0]; dst[0][1] = src[0][1]; dst[0][2] = src[0][2]; dst[0][3] = src[0][3];
274
275 dst[1][0] = src[1][0]; dst[1][1] = src[1][1]; dst[1][2] = src[1][2]; dst[1][3] = src[1][3];
276
277 dst[2][0] = src[2][0]; dst[2][1] = src[2][1]; dst[2][2] = src[2][2]; dst[2][3] = src[2][3];
278
279 }
280
281 /*---------------------------------------------------------------------*
282 Paired-Single assembler version
283 *---------------------------------------------------------------------*
284 Note that this performs NO error checking.
285 *---------------------------------------------------------------------*/
286 #ifdef GEKKO
PSMTXCopy(const register Mtx src,register Mtx dst)287 asm void PSMTXCopy
288 (
289 const register Mtx src, // r3
290 register Mtx dst // r4
291 )
292 {
293 nofralloc
294
295 psq_l fp0, 0(src), 0, 0
296 psq_st fp0, 0(dst), 0, 0
297 psq_l fp1, 8(src), 0, 0
298 psq_st fp1, 8(dst), 0, 0
299 psq_l fp2, 16(src), 0, 0
300 psq_st fp2, 16(dst), 0, 0
301 psq_l fp3, 24(src), 0, 0
302 psq_st fp3, 24(dst), 0, 0
303 psq_l fp4, 32(src), 0, 0
304 psq_st fp4, 32(dst), 0, 0
305 psq_l fp5, 40(src), 0, 0
306 psq_st fp5, 40(dst), 0, 0
307
308 blr
309 }
310 #endif // GEKKO
311
312 /*---------------------------------------------------------------------*
313
314 Name: MTXConcat
315
316 Description: concatenates two matrices.
317 order of operation is A x B = AB.
318 ok for any of ab == a == b.
319
320 saves a MTXCopy operation if ab != to a or b.
321
322 Arguments: a first matrix for concat.
323 b second matrix for concat.
324 ab resultant matrix from concat.
325
326 Return: none
327
328 *---------------------------------------------------------------------*/
329
330 /*---------------------------------------------------------------------*
331 C version
332 *---------------------------------------------------------------------*/
C_MTXConcat(const Mtx a,const Mtx b,Mtx ab)333 void C_MTXConcat ( const Mtx a, const Mtx b, Mtx ab )
334 {
335 Mtx mTmp;
336 MtxPtr m;
337
338 ASSERTMSG( (a != 0), MTX_CONCAT_1 );
339 ASSERTMSG( (b != 0), MTX_CONCAT_2 );
340 ASSERTMSG( (ab != 0), MTX_CONCAT_3 );
341
342 if( (ab == a) || (ab == b) )
343 {
344 m = mTmp;
345 }
346
347 else
348 {
349 m = ab;
350 }
351
352 // compute (a x b) -> m
353
354 m[0][0] = a[0][0]*b[0][0] + a[0][1]*b[1][0] + a[0][2]*b[2][0];
355 m[0][1] = a[0][0]*b[0][1] + a[0][1]*b[1][1] + a[0][2]*b[2][1];
356 m[0][2] = a[0][0]*b[0][2] + a[0][1]*b[1][2] + a[0][2]*b[2][2];
357 m[0][3] = a[0][0]*b[0][3] + a[0][1]*b[1][3] + a[0][2]*b[2][3] + a[0][3];
358
359 m[1][0] = a[1][0]*b[0][0] + a[1][1]*b[1][0] + a[1][2]*b[2][0];
360 m[1][1] = a[1][0]*b[0][1] + a[1][1]*b[1][1] + a[1][2]*b[2][1];
361 m[1][2] = a[1][0]*b[0][2] + a[1][1]*b[1][2] + a[1][2]*b[2][2];
362 m[1][3] = a[1][0]*b[0][3] + a[1][1]*b[1][3] + a[1][2]*b[2][3] + a[1][3];
363
364 m[2][0] = a[2][0]*b[0][0] + a[2][1]*b[1][0] + a[2][2]*b[2][0];
365 m[2][1] = a[2][0]*b[0][1] + a[2][1]*b[1][1] + a[2][2]*b[2][1];
366 m[2][2] = a[2][0]*b[0][2] + a[2][1]*b[1][2] + a[2][2]*b[2][2];
367 m[2][3] = a[2][0]*b[0][3] + a[2][1]*b[1][3] + a[2][2]*b[2][3] + a[2][3];
368
369
370 // overwrite a or b if needed
371 if(m == mTmp)
372 {
373 C_MTXCopy( mTmp, ab );
374 }
375 }
376
377 /*---------------------------------------------------------------------*
378 Paired-Single assembler version
379 *---------------------------------------------------------------------*
380 Note that this performs NO error checking.
381 *---------------------------------------------------------------------*/
382 #ifdef GEKKO
PSMTXConcat(const register Mtx mA,const register Mtx mB,register Mtx mAB)383 asm void PSMTXConcat
384 (
385 const register Mtx mA, // r3
386 const register Mtx mB, // r4
387 register Mtx mAB // r5
388 )
389 {
390 nofralloc
391
392 #define A00_A01 fp0
393 #define A02_A03 fp1
394 #define A10_A11 fp2
395 #define A12_A13 fp3
396 #define A20_A21 fp4
397 #define A22_A23 fp5
398
399 #define B00_B01 fp6
400 #define B02_B03 fp7
401 #define B10_B11 fp8
402 #define B12_B13 fp9
403 #define B20_B21 fp10
404 #define B22_B23 fp11
405
406 #define D00_D01 fp12
407 #define D02_D03 fp13
408 #define D10_D11 fp14
409 #define D12_D13 fp15
410 #define D20_D21 fp2
411 #define D22_D23 fp0
412
413 #define UNIT01 fp31
414
415 // don't save LR since we don't make any function calls
416 // mflr r0
417 // stw r0, 4(r1)
418 stwu r1, -64(r1)
419 psq_l A00_A01, 0(mA), 0, 0
420 stfd fp14, 8(r1)
421 psq_l B00_B01, 0(mB), 0, 0
422 addis r6, 0, Unit01@ha
423 psq_l B02_B03, 8(mB), 0, 0
424 stfd fp15, 16(r1)
425 addi r6, r6, Unit01@l
426 stfd fp31, 40(r1)
427 psq_l B10_B11, 16(mB), 0, 0
428 // D00_D01 = b00a00 , b01a00
429 ps_muls0 D00_D01, B00_B01, A00_A01
430 psq_l A10_A11, 16(mA), 0, 0
431 // D02_D03 = b02a00 , b03a00
432 ps_muls0 D02_D03, B02_B03, A00_A01
433 psq_l UNIT01, 0(r6), 0, 0
434 // D10_D11 = a10b00 , a10b01
435 ps_muls0 D10_D11, B00_B01, A10_A11
436 psq_l B12_B13, 24(mB), 0, 0
437 // D12_D13 = a10b02 , a10b03
438 ps_muls0 D12_D13, B02_B03, A10_A11
439 psq_l A02_A03, 8(mA), 0, 0
440 // fp12 = b10a01 + b00a00 , b11a01 + b01a00
441 ps_madds1 D00_D01, B10_B11, A00_A01, D00_D01
442 psq_l A12_A13, 24(mA), 0, 0
443 // D10_D11 = a10b00 + a11b10 , a10b01 + a11b11
444 ps_madds1 D10_D11, B10_B11, A10_A11, D10_D11
445 psq_l B20_B21, 32(mB), 0, 0
446 // D02_D03 = b12a01 + b02a00 , b13a01 + b03a00
447 ps_madds1 D02_D03, B12_B13, A00_A01, D02_D03 // YYY LAST TIME FP0 IS USED
448 psq_l B22_B23, 40(mB), 0, 0
449 // D12_D13 = a10b02 + a11b12, a10b03+a11b13
450 ps_madds1 D12_D13, B12_B13, A10_A11, D12_D13 // YYY LAST TIME FP2 IS USED
451 psq_l A20_A21, 32(mA), 0, 0
452 psq_l A22_A23, 40(mA), 0, 0
453 // D00_D01 = b20a02 + b10a01 + b00a00 , b21a02 + b11a01 + b01a00
454 ps_madds0 D00_D01, B20_B21, A02_A03, D00_D01 // m00, m01 computed
455 // D02_D03 = b12a01 + b02a00 + b22a02 , b13a01 + b03a00 + b23a02
456 ps_madds0 D02_D03, B22_B23, A02_A03, D02_D03
457 // D10_D11 = a10b00 + a11b10 +a12b20, a10b01 + a11b11 + a12b21
458 ps_madds0 D10_D11, B20_B21, A12_A13, D10_D11 // m10, m11 computed
459 // D12_D13 = a10b02 + a11b12 + a12b22, a10b03+a11b13 + a12b23 + a13
460 ps_madds0 D12_D13, B22_B23, A12_A13, D12_D13
461
462
463 // store m00m01
464 psq_st D00_D01, 0(mAB), 0, 0 // YYY LAST TIME FP12 IS USED
465
466 // D20_D21 = a20b00, a20b01
467 ps_muls0 D20_D21, B00_B01, A20_A21 // YYY LAST TIME FP6 IS USED
468 // get a03 from fp1 and add to D02_D03
469 ps_madds1 D02_D03, UNIT01, A02_A03, D02_D03 // m02, m03 computed
470 // YYY LAST TIME FP1 IS USED
471 // D22_D23 = a20b02, a20b03
472 ps_muls0 D22_D23, B02_B03, A20_A21 // YYY LAST TIME FP7 IS USED
473 // store m10m11
474 psq_st D10_D11, 16(mAB), 0, 0
475 // get a13 from fp3 and add to D12_D13
476 ps_madds1 D12_D13, UNIT01, A12_A13, D12_D13 // m12, m13 computed
477 // store m02m03
478 psq_st D02_D03, 8(mAB), 0, 0 // YYY LAST TIME D02_D03 IS USED
479
480 // D20_D21 = a20b00 + a21b10, a20b01 + a21b11
481 ps_madds1 D20_D21, B10_B11, A20_A21, D20_D21 // YYY LAST TIME FP8 IS USED
482 // D22_D23 = a20b02 + a21b12, a20b03 + a21b13
483 ps_madds1 D22_D23, B12_B13, A20_A21, D22_D23
484 // D20_D21 = a20b00 + a21b10 + a22b20, a20b01 + a21b11 + a22b21
485 ps_madds0 D20_D21, B20_B21, A22_A23, D20_D21
486 // Restore fp14
487 lfd fp14, 8(r1) // D10_D11
488 // store m12m13
489 psq_st D12_D13, 24(mAB), 0, 0
490 // D22_D23 = a20b02 + a21b12 + a22b22, a20b03 + a21b13 + a22b23 + a23
491 ps_madds0 D22_D23, B22_B23, A22_A23, D22_D23
492 // store m20m21
493 psq_st D20_D21, 32(mAB), 0, 0
494 // get a23 from fp5 and add to fp17
495 ps_madds1 D22_D23, UNIT01, A22_A23, D22_D23
496 // restore stack frame
497 lfd fp15, 16(r1) // D12_D13
498 // store m22m23
499 psq_st D22_D23, 40(mAB), 0, 0
500
501 lfd fp31, 40(r1)
502 addi r1, r1, 64
503
504
505 blr
506
507
508 #undef A00_A01
509 #undef A02_A03
510 #undef A10_A11
511 #undef A12_A13
512 #undef A20_A21
513 #undef A22_A23
514
515 #undef B00_B01
516 #undef B02_B03
517 #undef B10_B11
518 #undef B12_B13
519 #undef B20_B21
520 #undef B22_B23
521
522 #undef D00_D01
523 #undef D02_D03
524 #undef D10_D11
525 #undef D12_D13
526 #undef D20_D21
527 #undef D22_D23
528
529 #undef UNIT01
530
531
532 /*
533 / m[0][0] = a[0][0]*b[0][0] + a[0][1]*b[1][0] + a[0][2]*b[2][0];
534 fp12m[0][1] = a[0][0]*b[0][1] + a[0][1]*b[1][1] + a[0][2]*b[2][1];
535 / m[0][2] = a[0][0]*b[0][2] + a[0][1]*b[1][2] + a[0][2]*b[2][2];
536 fp13m[0][3] = a[0][0]*b[0][3] + a[0][1]*b[1][3] + a[0][2]*b[2][3] + a[0][3];
537
538 / m[1][0] = a[1][0]*b[0][0] + a[1][1]*b[1][0] + a[1][2]*b[2][0];
539 fp14m[1][1] = a[1][0]*b[0][1] + a[1][1]*b[1][1] + a[1][2]*b[2][1];
540 / m[1][2] = a[1][0]*b[0][2] + a[1][1]*b[1][2] + a[1][2]*b[2][2];
541 fp15m[1][3] = a[1][0]*b[0][3] + a[1][1]*b[1][3] + a[1][2]*b[2][3] + a[1][3];
542
543 / m[2][0] = a[2][0]*b[0][0] + a[2][1]*b[1][0] + a[2][2]*b[2][0];
544 fp16m[2][1] = a[2][0]*b[0][1] + a[2][1]*b[1][1] + a[2][2]*b[2][1];
545 / m[2][2] = a[2][0]*b[0][2] + a[2][1]*b[1][2] + a[2][2]*b[2][2];
546 fp17m[2][3] = a[2][0]*b[0][3] + a[2][1]*b[1][3] + a[2][2]*b[2][3] + a[2][3];
547 OPTIMIZATIONS :
548 fp0 instead of fp17
549 fp2 instead of fp16
550 psq_l fp0, 0(mA), 0, 0 // a[0][0], a[0][1]
551 psq_l fp1, 8(mA), 0, 0 // a[0][2], a[0][3]
552 psq_l fp2, 16(mA), 0, 0 // a[1][0], a[1][1]
553 psq_l fp3, 24(mA), 0, 0 // a[1][2], a[1][3]
554 psq_l fp4, 32(mA), 0, 0 // a[2][0], a[2][1]
555 psq_l fp5, 40(mA), 0, 0 // a[2][2], a[2][3]
556
557 psq_l fp6, 0(mB), 0, 0 // b[0][0], b[0][1]
558 psq_l fp7, 8(mB), 0, 0 // b[0][2], b[0][3]
559 psq_l fp8, 16(mB), 0, 0 // b[1][0], b[1][1]
560 psq_l fp9, 24(mB), 0, 0 // b[1][2], b[1][3]
561 psq_l fp10, 32(mB), 0, 0 // b[2][0], b[2][1]
562 psq_l fp11, 40(mB), 0, 0 // b[2][2], b[2][3]
563
564 */
565 }
566 #endif // GEKKO
567
568
569 /*---------------------------------------------------------------------*
570
571 Name: MTXConcatArray
572
573 Description: concatenates a matrix to an array of matrices.
574 order of operation is A x B(array) = AB(array).
575
576 Arguments: a first matrix for concat.
577 srcBase array base of second matrix for concat.
578 dstBase array base of resultant matrix from concat.
579 count number of matrices in srcBase, dstBase arrays.
580
581 note: cannot check for array overflow
582
583 Return: none
584
585 *---------------------------------------------------------------------*/
586
587 /*---------------------------------------------------------------------*
588 C version
589 *---------------------------------------------------------------------*/
C_MTXConcatArray(const Mtx a,const Mtx * srcBase,Mtx * dstBase,u32 count)590 void C_MTXConcatArray ( const Mtx a, const Mtx* srcBase, Mtx* dstBase, u32 count )
591 {
592 u32 i;
593
594 ASSERTMSG( (a != 0), "MTXConcatArray(): NULL MtxPtr 'a' " );
595 ASSERTMSG( (srcBase != 0), "MTXConcatArray(): NULL MtxPtr 'srcBase' " );
596 ASSERTMSG( (dstBase != 0), "MTXConcatArray(): NULL MtxPtr 'dstBase' " );
597 ASSERTMSG( (count > 1), "MTXConcatArray(): count must be greater than 1." );
598
599 for ( i = 0 ; i < count ; i++ )
600 {
601 C_MTXConcat(a, *srcBase, *dstBase);
602
603 srcBase++;
604 dstBase++;
605 }
606 }
607
608 /*---------------------------------------------------------------------*
609 Paired-Single assembler version
610 *---------------------------------------------------------------------*
611 Note that this performs NO error checking.
612 *---------------------------------------------------------------------*/
613 #ifdef GEKKO
614
615 // This code can be compiled only with register allocation optimization.
616 #if ( defined(__MWERKS__) && defined(_DEBUG) )
617 #pragma global_optimizer on
618 #pragma optimization_level 1
619 #endif
620
PSMTXConcatArray(const register Mtx a,const register Mtx * srcBase,register Mtx * dstBase,register u32 count)621 void PSMTXConcatArray (
622 const register Mtx a,
623 const register Mtx* srcBase,
624 register Mtx* dstBase,
625 register u32 count )
626 {
627 register f32 va0, va1, va2, va3, va4, va5;
628 register f32 vb0, vb1, vb2, vb3, vb4, vb5;
629 register f32 vd0, vd1, vd2, vd3, vd4, vd5;
630 register f32 u01;
631 register f32* u01Ptr = Unit01;
632
633 asm
634 {
635 // [a00][a01]
636 psq_l va0, 0(a), 0, 0
637 // [a02][a03]
638 psq_l va1, 8(a), 0, 0
639 // [a10][a11]
640 psq_l va2, 16(a), 0, 0
641 // [a12][a13]
642 psq_l va3, 24(a), 0, 0
643 // count--
644 subi count, count, 1
645 // [a20][a21]
646 psq_l va4, 32(a), 0, 0
647 // [a22][a23]
648 psq_l va5, 40(a), 0, 0
649 // Loop count
650 mtctr count
651 // [0][1]
652 psq_l u01, 0(u01Ptr), 0, 0
653
654 //---------------------------------
655 // [b00][b01]
656 psq_l vb0, 0(srcBase), 0, 0
657 // [b10][b11]
658 psq_l vb2, 16(srcBase), 0, 0
659
660 // [a00*b00][a00*b01]
661 ps_muls0 vd0, vb0, va0
662 // [a10*b00][a10*b01]
663 ps_muls0 vd2, vb0, va2
664 // [a20*b00][a20*b01]
665 ps_muls0 vd4, vb0, va4
666
667 // [b20][b21]
668 psq_l vb4, 32(srcBase), 0, 0
669
670 // [a00*b00 + a01*b10][a00*b01 + a01*b11]
671 ps_madds1 vd0, vb2, va0, vd0
672 // [a10*b00 + a11*b10][a10*b01 + a11*b11]
673 ps_madds1 vd2, vb2, va2, vd2
674 // [a20*b00 + a21*b10][a20*b01 + a21*b11]
675 ps_madds1 vd4, vb2, va4, vd4
676
677 // [b02][b03]
678 psq_l vb1, 8(srcBase), 0, 0
679
680 // [a00*b00 + a01*b10 + a02*b20][a00*b01 + a01*b11 + a02*b21]
681 ps_madds0 vd0, vb4, va1, vd0
682 // [a10*b00 + a11*b10 + a12*b20][a10*b01 + a11*b11 + a12*b21]
683 ps_madds0 vd2, vb4, va3, vd2
684 // [a20*b00 + a21*b10 + a22*b20][a20*b01 + a21*b11 + a22*b21]
685 ps_madds0 vd4, vb4, va5, vd4
686
687 // [b12][b13]
688 psq_l vb3, 24(srcBase), 0, 0
689 // [a00*b00 + a01*b10 + a02*b20][a00*b01 + a01*b11 + a02*b21]
690 psq_st vd0, 0(dstBase), 0, 0
691
692 // [a00*b02][a00*b03]
693 ps_muls0 vd1, vb1, va0
694 // [a10*b02][a10*b03]
695 ps_muls0 vd3, vb1, va2
696 // [a20*b02][a20*b03]
697 ps_muls0 vd5, vb1, va4
698
699 // [b22][b23]
700 psq_l vb5, 40(srcBase), 0, 0
701 // [a10*b00 + a11*b10 + a12*b20][a10*b01 + a11*b11 + a12*b21]
702 psq_st vd2, 16(dstBase), 0, 0
703
704 // [a00*b02 + a01*b12][a00*b03 + a01*b13]
705 ps_madds1 vd1, vb3, va0, vd1
706 // [a10*b02 + a11*b12][a10*b03 + a11*b13]
707 ps_madds1 vd3, vb3, va2, vd3
708 // [a20*b02 + a21*b12][a20*b03 + a21*b13]
709 ps_madds1 vd5, vb3, va4, vd5
710
711 _loop:
712
713 // ++srcBase
714 addi srcBase, srcBase, sizeof(Mtx)
715
716 // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23]
717 ps_madds0 vd1, vb5, va1, vd1
718 // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23]
719 ps_madds0 vd3, vb5, va3, vd3
720 // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23]
721 ps_madds0 vd5, vb5, va5, vd5
722
723 // [b00][b01]
724 psq_l vb0, 0(srcBase), 0, 0
725 // [a20*b00 + a21*b10 + a22*b20][a20*b01 + a21*b11 + a22*b21]
726 psq_st vd4, 32(dstBase), 0, 0
727
728 // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23 + a03]
729 ps_madd vd1, u01, va1, vd1
730 // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23 + a13]
731 ps_madd vd3, u01, va3, vd3
732 // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23 + a23]
733 ps_madd vd5, u01, va5, vd5
734
735 // [b10][b11]
736 psq_l vb2, 16(srcBase), 0, 0
737 // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23 + a03]
738 psq_st vd1, 8(dstBase), 0, 0
739
740 // [a00*b00][a00*b01]
741 ps_muls0 vd0, vb0, va0
742 // [a10*b00][a10*b01]
743 ps_muls0 vd2, vb0, va2
744 // [a20*b00][a20*b01]
745 ps_muls0 vd4, vb0, va4
746
747 // [b20][b21]
748 psq_l vb4, 32(srcBase), 0, 0
749 // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23 + a13]
750 psq_st vd3, 24(dstBase), 0, 0
751
752 // [a00*b00 + a01*b10][a00*b01 + a01*b11]
753 ps_madds1 vd0, vb2, va0, vd0
754 // [a10*b00 + a11*b10][a10*b01 + a11*b11]
755 ps_madds1 vd2, vb2, va2, vd2
756 // [a20*b00 + a21*b10][a20*b01 + a21*b11]
757 ps_madds1 vd4, vb2, va4, vd4
758
759 // [b02][b03]
760 psq_l vb1, 8(srcBase), 0, 0
761 // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23 + a23]
762 psq_st vd5, 40(dstBase), 0, 0
763 // ++dstBase
764 addi dstBase, dstBase, sizeof(Mtx)
765
766 // [a00*b00 + a01*b10 + a02*b20][a00*b01 + a01*b11 + a02*b21]
767 ps_madds0 vd0, vb4, va1, vd0
768 // [a10*b00 + a11*b10 + a12*b20][a10*b01 + a11*b11 + a12*b21]
769 ps_madds0 vd2, vb4, va3, vd2
770 // [a20*b00 + a21*b10 + a22*b20][a20*b01 + a21*b11 + a22*b21]
771 ps_madds0 vd4, vb4, va5, vd4
772
773 // [b12][b13]
774 psq_l vb3, 24(srcBase), 0, 0
775 // [a00*b00 + a01*b10 + a02*b20][a00*b01 + a01*b11 + a02*b21]
776 psq_st vd0, 0(dstBase), 0, 0
777
778 // [a00*b02][a00*b03]
779 ps_muls0 vd1, vb1, va0
780 // [a10*b02][a10*b03]
781 ps_muls0 vd3, vb1, va2
782 // [a20*b02][a20*b03]
783 ps_muls0 vd5, vb1, va4
784
785 // [b22][b23]
786 psq_l vb5, 40(srcBase), 0, 0
787 // [a10*b00 + a11*b10 + a12*b20][a10*b01 + a11*b11 + a12*b21]
788 psq_st vd2, 16(dstBase), 0, 0
789
790 // [a00*b02 + a01*b12][a00*b03 + a01*b13]
791 ps_madds1 vd1, vb3, va0, vd1
792 // [a10*b02 + a11*b12][a10*b03 + a11*b13]
793 ps_madds1 vd3, vb3, va2, vd3
794 // [a20*b02 + a21*b12][a20*b03 + a21*b13]
795 ps_madds1 vd5, vb3, va4, vd5
796
797 // LOOP
798 bdnz _loop
799
800 // [a20*b00 + a21*b10 + a22*b20][a20*b01 + a21*b11 + a22*b21]
801 psq_st vd4, 32(dstBase), 0, 0
802
803 // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23]
804 ps_madds0 vd1, vb5, va1, vd1
805 // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23]
806 ps_madds0 vd3, vb5, va3, vd3
807 // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23]
808 ps_madds0 vd5, vb5, va5, vd5
809
810 // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23 + a03]
811 ps_madd vd1, u01, va1, vd1
812 // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23 + a13]
813 ps_madd vd3, u01, va3, vd3
814 // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23 + a23]
815 ps_madd vd5, u01, va5, vd5
816
817 // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23 + a03]
818 psq_st vd1, 8(dstBase), 0, 0
819 // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23 + a13]
820 psq_st vd3, 24(dstBase), 0, 0
821 // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23 + a23]
822 psq_st vd5, 40(dstBase), 0, 0
823 //---------------------------------
824 }
825 }
826
827 #if ( defined(__MWERKS__) && defined(_DEBUG) )
828 #pragma optimization_level 0
829 #pragma global_optimizer reset
830 #endif
831
832 #endif
833
834 /*---------------------------------------------------------------------*
835
836 Name: MTXTranspose
837
838 Description: computes the transpose of a matrix.
839 As matrices are 3x4, fourth column (translation component) is
840 lost and becomes (0,0,0).
841
842 This function is intended for use in computing an
843 inverse-transpose matrix to transform normals for lighting.
844 In this case, lost translation component doesn't matter.
845
846
847 Arguments: src source matrix.
848 xPose destination (transposed) matrix.
849 ok if src == xPose.
850
851
852
853 Return: none
854
855 *---------------------------------------------------------------------*/
856 /*---------------------------------------------------------------------*
857 C version
858 *---------------------------------------------------------------------*/
C_MTXTranspose(const Mtx src,Mtx xPose)859 void C_MTXTranspose ( const Mtx src, Mtx xPose )
860 {
861 Mtx mTmp;
862 MtxPtr m;
863
864
865 ASSERTMSG( (src != 0), MTX_TRANSPOSE_1 );
866 ASSERTMSG( (xPose != 0), MTX_TRANSPOSE_2 );
867
868
869 if(src == xPose)
870 {
871 m = mTmp;
872 }
873 else
874 {
875 m = xPose;
876 }
877
878
879 m[0][0] = src[0][0]; m[0][1] = src[1][0]; m[0][2] = src[2][0]; m[0][3] = 0.0f;
880 m[1][0] = src[0][1]; m[1][1] = src[1][1]; m[1][2] = src[2][1]; m[1][3] = 0.0f;
881 m[2][0] = src[0][2]; m[2][1] = src[1][2]; m[2][2] = src[2][2]; m[2][3] = 0.0f;
882
883
884 // copy back if needed
885 if( m == mTmp )
886 {
887 C_MTXCopy( mTmp, xPose );
888 }
889 }
890
891 /*---------------------------------------------------------------------*
892 Paired-Single assembler version
893 *---------------------------------------------------------------------*
894 Note that this performs NO error checking.
895 *---------------------------------------------------------------------*/
896 #ifdef GEKKO
PSMTXTranspose(const register Mtx src,register Mtx xPose)897 void PSMTXTranspose ( const register Mtx src, register Mtx xPose )
898 {
899 register f32 c_zero = 0.0F;
900 register f32 row0a, row1a, row0b, row1b;
901 register f32 trns0, trns1, trns2;
902
903 asm
904 {
905 psq_l row0a, 0(src), 0, 0 // [0][0], [0][1]
906 stfs c_zero, 44(xPose) // 0 -> [2][3]
907 psq_l row1a, 16(src), 0, 0 // [1][0], [1][1]
908 ps_merge00 trns0, row0a, row1a // [0][0], [1][0]
909 psq_l row0b, 8(src), 1, 0 // [0][2], 1
910 ps_merge11 trns1, row0a, row1a // [0][1], [1][1]
911 psq_l row1b, 24(src), 1, 0 // [1][2], 1
912 psq_st trns0, 0(xPose), 0, 0 // [0][0], [1][0] -> [0][0], [0][1]
913 psq_l row0a, 32(src), 0, 0 // [2][0], [2][1]
914 ps_merge00 trns2, row0b, row1b // [0][2], [1][2]
915 psq_st trns1, 16(xPose), 0, 0 // [0][1], [1][1] -> [1][0], [1][1]
916 ps_merge00 trns0, row0a, c_zero // [2][0], 0
917 psq_st trns2, 32(xPose), 0, 0 // [0][2], [1][2] -> [2][0], [2][1]
918 ps_merge10 trns1, row0a, c_zero // [2][1], 0
919 psq_st trns0, 8(xPose), 0, 0 // [2][0], 0 -> [0][2], [0][3]
920 lfs row0b, 40(src) // [2][2]
921 psq_st trns1, 24(xPose), 0, 0 // [2][1], 0 -> [1][2], [1][3]
922 stfs row0b, 40(xPose) // [2][2] -> [2][2]
923 }
924 }
925 #endif // GEKKO
926
927 /*---------------------------------------------------------------------*
928
929 Name: MTXInverse
930
931 Description: computes a fast inverse of a matrix.
932 this algorithm works for matrices with a fourth row of
933 (0,0,0,1).
934
935 for a matrix
936 M = | A C | where A is the upper 3x3 submatrix,
937 | 0 1 | C is a 1x3 column vector
938
939
940 INV(M) = | inv(A) (inv(A))*(-C) |
941 | 0 1 |
942
943
944
945 Arguments: src source matrix.
946 inv destination (inverse) matrix.
947 ok if src == inv.
948
949
950
951 Return: 0 if src is not invertible.
952 1 on success.
953
954 *---------------------------------------------------------------------*/
955 /*---------------------------------------------------------------------*
956 C version
957 *---------------------------------------------------------------------*/
C_MTXInverse(const Mtx src,Mtx inv)958 u32 C_MTXInverse ( const Mtx src, Mtx inv )
959 {
960 Mtx mTmp;
961 MtxPtr m;
962 f32 det;
963
964 ASSERTMSG( (src != 0), MTX_INVERSE_1 );
965 ASSERTMSG( (inv != 0), MTX_INVERSE_2 );
966
967 if( src == inv )
968 {
969 m = mTmp;
970 }
971 else
972 {
973 m = inv;
974 }
975
976
977 // compute the determinant of the upper 3x3 submatrix
978 det = src[0][0]*src[1][1]*src[2][2] + src[0][1]*src[1][2]*src[2][0] + src[0][2]*src[1][0]*src[2][1]
979 - src[2][0]*src[1][1]*src[0][2] - src[1][0]*src[0][1]*src[2][2] - src[0][0]*src[2][1]*src[1][2];
980
981
982 // check if matrix is singular
983 if( det == 0.0f )
984 {
985 return 0;
986 }
987
988
989 // compute the inverse of the upper submatrix:
990
991 // find the transposed matrix of cofactors of the upper submatrix
992 // and multiply by (1/det)
993
994 det = 1.0f / det;
995
996
997 m[0][0] = (src[1][1]*src[2][2] - src[2][1]*src[1][2]) * det;
998 m[0][1] = -(src[0][1]*src[2][2] - src[2][1]*src[0][2]) * det;
999 m[0][2] = (src[0][1]*src[1][2] - src[1][1]*src[0][2]) * det;
1000
1001 m[1][0] = -(src[1][0]*src[2][2] - src[2][0]*src[1][2]) * det;
1002 m[1][1] = (src[0][0]*src[2][2] - src[2][0]*src[0][2]) * det;
1003 m[1][2] = -(src[0][0]*src[1][2] - src[1][0]*src[0][2]) * det;
1004
1005 m[2][0] = (src[1][0]*src[2][1] - src[2][0]*src[1][1]) * det;
1006 m[2][1] = -(src[0][0]*src[2][1] - src[2][0]*src[0][1]) * det;
1007 m[2][2] = (src[0][0]*src[1][1] - src[1][0]*src[0][1]) * det;
1008
1009
1010 // compute (invA)*(-C)
1011 m[0][3] = -m[0][0]*src[0][3] - m[0][1]*src[1][3] - m[0][2]*src[2][3];
1012 m[1][3] = -m[1][0]*src[0][3] - m[1][1]*src[1][3] - m[1][2]*src[2][3];
1013 m[2][3] = -m[2][0]*src[0][3] - m[2][1]*src[1][3] - m[2][2]*src[2][3];
1014
1015 // copy back if needed
1016 if( m == mTmp )
1017 {
1018 C_MTXCopy( mTmp,inv );
1019 }
1020
1021 return 1;
1022 }
1023
1024 /*---------------------------------------------------------------------*
1025 Paired-Single assembler version
1026 *---------------------------------------------------------------------*
1027 Note that this performs NO error checking.
1028 Results may be a little bit different from the C version
1029 because it doesn't perform exactly same calculation.
1030 *---------------------------------------------------------------------*/
1031 #ifdef GEKKO
PSMTXInverse(const register Mtx src,register Mtx inv)1032 asm u32 PSMTXInverse ( const register Mtx src, register Mtx inv )
1033 {
1034 nofralloc
1035
1036 // fp0 [ 00 ][ 1.0F ] : Load
1037 psq_l fp0, 0( src ), 1, 0;
1038 // fp1 [ 01 ][ 02 ] : Load
1039 psq_l fp1, 4( src ), 0, 0;
1040 // fp2 [ 10 ][ 1.0F ] : Load
1041 psq_l fp2, 16( src ), 1, 0;
1042 // fp6 [ 02 ][ 00 ]
1043 ps_merge10 fp6, fp1, fp0;
1044 // fp3 [ 11 ][ 12 ] : Load
1045 psq_l fp3, 20( src ), 0, 0;
1046 // fp4 [ 20 ][ 1.0F ] : Load
1047 psq_l fp4, 32( src ), 1, 0;
1048 // fp7 [ 12 ][ 10 ]
1049 ps_merge10 fp7, fp3, fp2;
1050 // fp5 [ 21 ][ 22 ] : Load
1051 psq_l fp5, 36( src ), 0, 0;
1052 // fp11[ 11*02 ][ 00*12 ]
1053 ps_mul fp11, fp3, fp6;
1054 // fp8 [ 22 ][ 20 ]
1055 ps_merge10 fp8, fp5, fp4;
1056 // fp13[ 21*12 ][ 10*22 ]
1057 ps_mul fp13, fp5, fp7;
1058 // fp11[ 01*12 - 11*02 ][ 10*02 - 00*12 ]
1059 ps_msub fp11, fp1, fp7, fp11;
1060 // fp12[ 01*22 ][ 20*02 ]
1061 ps_mul fp12, fp1, fp8;
1062 // fp13[ 11*22 - 21*12 ][ 20*12 - 10*22 ]
1063 ps_msub fp13, fp3, fp8, fp13;
1064 // fp10[ 20*11 ][ N/A ]
1065 ps_mul fp10, fp3, fp4;
1066 // fp12[ 21*02 - 01*22 ][ 00*22 - 20*02 ]
1067 ps_msub fp12, fp5, fp6, fp12;
1068 // fp7 [ 00*(11*22-21*12) ][ N/A ]
1069 ps_mul fp7, fp0, fp13;
1070 // fp9 [ 00*21 ][ N/A ]
1071 ps_mul fp9, fp0, fp5;
1072 // fp8 [ 10*01 ][ N/A ]
1073 ps_mul fp8, fp1, fp2;
1074 // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) ][ N/A ]
1075 ps_madd fp7, fp2, fp12, fp7;
1076 // fp6 [ 0.0F ][ 0.0F ]
1077 ps_sub fp6, fp6, fp6;
1078 // fp10[ 10*21 - 20*11 ][ N/A ]
1079 ps_msub fp10, fp2, fp5, fp10;
1080 // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) + 20*(01*12-11*02) ][ N/A ] : det
1081 ps_madd fp7, fp4, fp11, fp7;
1082 // fp9 [ 20*01 - 00*21 ][ N/A ]
1083 ps_msub fp9, fp1, fp4, fp9;
1084 // fp8 [ 00*11 - 10*01 ][ N/A ]
1085 ps_msub fp8, fp0, fp3, fp8;
1086
1087 // ( det == 0 ) ?
1088 ps_cmpo0 cr0, fp7, fp6;
1089 bne _regular;
1090
1091 // return value (singular)
1092 addi r3, 0, 0;
1093
1094 blr;
1095
1096 _regular:
1097
1098 // fp0 [ 1/det ][ N/A ]
1099 fres fp0, fp7;
1100
1101 // Newton's approximation
1102 // Refinement : ( E = est. of 1/K ) -> ( E' = ( 2 - K * E ) * E )
1103 ps_add fp6, fp0, fp0
1104 ps_mul fp5, fp7, fp0
1105 ps_nmsub fp0, fp0, fp5, fp6
1106
1107 // fp1 [ 03 ][ 03 ] : Load
1108 lfs fp1, 12(src);
1109 // fp13[ ( 11*22 - 21*12 ) * rdet ][ ( 20*12 - 10*22 ) * rdet ] : i[0][0], i[1][0]
1110 ps_muls0 fp13, fp13, fp0;
1111 // fp2 [ 13 ][ 13 ] : Load
1112 lfs fp2, 28(src);
1113 // fp12[ ( 21*02 - 01*22 ) * rdet ][ ( 00*22 - 20*02 ) * rdet ] : i[0][1], i[1][1]
1114 ps_muls0 fp12, fp12, fp0;
1115 // fp3 [ 23 ][ 23 ] : Load
1116 lfs fp3, 44(src);
1117 // fp11[ ( 01*12 - 11*02 ) * rdet ][ ( 10*02 - 00*12 ) * rdet ] : i[0][2], i[1][2]
1118 ps_muls0 fp11, fp11, fp0;
1119 // fp5 [ i00 ][ i01 ]
1120 ps_merge00 fp5, fp13, fp12;
1121 // fp4 [ i10 ][ i11 ]
1122 ps_merge11 fp4, fp13, fp12;
1123 // fp6 [ i00*03 ][ i10*03 ]
1124 ps_mul fp6, fp13, fp1;
1125 // [ i00 ][ i01 ] : Store fp5 -> free(fp5[ i00 ][ i01 ])
1126 psq_st fp5, 0(inv), 0, 0;
1127 // [ i10 ][ i11 ] : Store fp4 -> free(fp4[ i10 ][ i11 ])
1128 psq_st fp4, 16(inv), 0, 0;
1129 // fp10[ ( 10*21 - 20*11 ) * rdet ] : i[2][0]
1130 ps_muls0 fp10, fp10, fp0;
1131 // fp9 [ ( 20*01 - 00*21 ) * rdet ] : i[2][1]
1132 ps_muls0 fp9, fp9, fp0;
1133 // fp6 [ i00*03+i01*13 ][ i10*03+i11*13 ]
1134 ps_madd fp6, fp12, fp2, fp6;
1135 // [ i20 ] : Store fp10
1136 psq_st fp10, 32(inv), 1, 0;
1137 // fp8 [ ( 00*11 - 10*01 ) * rdet ] : i[2][2]
1138 ps_muls0 fp8, fp8, fp0;
1139 // fp6 [ -i00*03-i01*13-i02*23 ][ -i10*03-i11*13-i12*23 ] : i[0][3], i[1][3]
1140 ps_nmadd fp6, fp11, fp3, fp6;
1141 // [ i21 ] : Store fp9
1142 psq_st fp9, 36(inv), 1, 0;
1143 // fp7 [ i20*03 ][ N/A ]
1144 ps_mul fp7, fp10, fp1;
1145 // fp5 [ i02 ][ i03 ]
1146 ps_merge00 fp5, fp11, fp6;
1147 // [ i22 ] : Store fp8
1148 psq_st fp8, 40(inv), 1, 0;
1149 // fp7 [ i20*03+i21*13 ][ N/A ]
1150 ps_madd fp7, fp9, fp2, fp7;
1151 // fp4 [ i12 ][ i13 ]
1152 ps_merge11 fp4, fp11, fp6;
1153 // [ i02 ][ i03 ] : Store fp5
1154 psq_st fp5, 8(inv), 0, 0;
1155 // fp7 [ -i20*03-i21*13-i22*23 ][ N/A ] : i[2][3]
1156 ps_nmadd fp7, fp8, fp3, fp7;
1157 // [ i12 ][ i13 ] : Store fp4
1158 psq_st fp4, 24(inv), 0, 0;
1159 // [ i23 ] : Store fp7
1160 psq_st fp7, 44(inv), 1, 0;
1161
1162 // return value (regular)
1163 addi r3, 0, 1;
1164
1165 blr;
1166 }
1167 #endif // GEKKO
1168
1169 /*---------------------------------------------------------------------*
1170
1171 Name: MTXInvXpose
1172
1173 Description: computes a fast inverse-transpose of a matrix.
1174 this algorithm works for matrices with a fourth row of
1175 (0,0,0,1). Commonly used for calculating normal transform
1176 matrices.
1177
1178 This function is equivalent to the combination of
1179 two functions MTXInverse + MTXTranspose.
1180
1181 Arguments: src source matrix.
1182 invx destination (inverse-transpose) matrix.
1183 ok if src == invx.
1184
1185 Return: 0 if src is not invertible.
1186 1 on success.
1187
1188 *---------------------------------------------------------------------*/
1189 /*---------------------------------------------------------------------*
1190 C version
1191 *---------------------------------------------------------------------*/
C_MTXInvXpose(const Mtx src,Mtx invX)1192 u32 C_MTXInvXpose ( const Mtx src, Mtx invX )
1193 {
1194 Mtx mTmp;
1195 MtxPtr m;
1196 f32 det;
1197
1198 ASSERTMSG( (src != 0), MTX_INVXPOSE_1 );
1199 ASSERTMSG( (invX != 0), MTX_INVXPOSE_2 );
1200
1201 if( src == invX )
1202 {
1203 m = mTmp;
1204 }
1205 else
1206 {
1207 m = invX;
1208 }
1209
1210 // compute the determinant of the upper 3x3 submatrix
1211 det = src[0][0]*src[1][1]*src[2][2] + src[0][1]*src[1][2]*src[2][0] + src[0][2]*src[1][0]*src[2][1]
1212 - src[2][0]*src[1][1]*src[0][2] - src[1][0]*src[0][1]*src[2][2] - src[0][0]*src[2][1]*src[1][2];
1213
1214 // check if matrix is singular
1215 if( det == 0.0f )
1216 {
1217 return 0;
1218 }
1219
1220 // compute the inverse-transpose of the upper submatrix:
1221
1222 // find the transposed matrix of cofactors of the upper submatrix
1223 // and multiply by (1/det)
1224
1225 det = 1.0f / det;
1226
1227 m[0][0] = (src[1][1]*src[2][2] - src[2][1]*src[1][2]) * det;
1228 m[0][1] = -(src[1][0]*src[2][2] - src[2][0]*src[1][2]) * det;
1229 m[0][2] = (src[1][0]*src[2][1] - src[2][0]*src[1][1]) * det;
1230
1231 m[1][0] = -(src[0][1]*src[2][2] - src[2][1]*src[0][2]) * det;
1232 m[1][1] = (src[0][0]*src[2][2] - src[2][0]*src[0][2]) * det;
1233 m[1][2] = -(src[0][0]*src[2][1] - src[2][0]*src[0][1]) * det;
1234
1235 m[2][0] = (src[0][1]*src[1][2] - src[1][1]*src[0][2]) * det;
1236 m[2][1] = -(src[0][0]*src[1][2] - src[1][0]*src[0][2]) * det;
1237 m[2][2] = (src[0][0]*src[1][1] - src[1][0]*src[0][1]) * det;
1238
1239
1240 // the fourth columns should be all zero
1241 m[0][3] = 0.0F;
1242 m[1][3] = 0.0F;
1243 m[2][3] = 0.0F;
1244
1245 // copy back if needed
1246 if( m == mTmp )
1247 {
1248 C_MTXCopy( mTmp,invX );
1249 }
1250
1251 return 1;
1252 }
1253
1254 /*---------------------------------------------------------------------*
1255 Paired-Single assembler version
1256 *---------------------------------------------------------------------*
1257 Note that this performs NO error checking.
1258 Results may be a little bit different from the C version
1259 because it doesn't perform exactly same calculation.
1260 *---------------------------------------------------------------------*/
1261 #ifdef GEKKO
PSMTXInvXpose(const register Mtx src,register Mtx invX)1262 asm u32 PSMTXInvXpose ( const register Mtx src, register Mtx invX )
1263 {
1264 nofralloc
1265
1266 // fp0 [ 00 ][ 1.0F ] : Load
1267 psq_l fp0, 0( src ), 1, 0;
1268 // fp1 [ 01 ][ 02 ] : Load
1269 psq_l fp1, 4( src ), 0, 0;
1270 // fp2 [ 10 ][ 1.0F ] : Load
1271 psq_l fp2, 16( src ), 1, 0;
1272 // fp6 [ 02 ][ 00 ]
1273 ps_merge10 fp6, fp1, fp0;
1274 // fp3 [ 11 ][ 12 ] : Load
1275 psq_l fp3, 20( src ), 0, 0;
1276 // fp4 [ 20 ][ 1.0F ] : Load
1277 psq_l fp4, 32( src ), 1, 0;
1278 // fp7 [ 12 ][ 10 ]
1279 ps_merge10 fp7, fp3, fp2;
1280 // fp5 [ 21 ][ 22 ] : Load
1281 psq_l fp5, 36( src ), 0, 0;
1282 // fp11[ 11*02 ][ 00*12 ]
1283 ps_mul fp11, fp3, fp6;
1284 // fp8 [ 22 ][ 20 ]
1285 ps_merge10 fp8, fp5, fp4;
1286 // fp13[ 21*12 ][ 10*22 ]
1287 ps_mul fp13, fp5, fp7;
1288 // fp11[ 01*12 - 11*02 ][ 10*02 - 00*12 ]
1289 ps_msub fp11, fp1, fp7, fp11;
1290 // fp12[ 01*22 ][ 20*02 ]
1291 ps_mul fp12, fp1, fp8;
1292 // fp13[ 11*22 - 21*12 ][ 20*12 - 10*22 ]
1293 ps_msub fp13, fp3, fp8, fp13;
1294 // fp10[ 20*11 ][ N/A ]
1295 ps_mul fp10, fp3, fp4;
1296 // fp12[ 21*02 - 01*22 ][ 00*22 - 20*02 ]
1297 ps_msub fp12, fp5, fp6, fp12;
1298 // fp7 [ 00*(11*22-21*12) ][ N/A ]
1299 ps_mul fp7, fp0, fp13;
1300 // fp9 [ 00*21 ][ N/A ]
1301 ps_mul fp9, fp0, fp5;
1302 // fp8 [ 10*01 ][ N/A ]
1303 ps_mul fp8, fp1, fp2;
1304 // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) ][ N/A ]
1305 ps_madd fp7, fp2, fp12, fp7;
1306 // fp6 [ 0.0F ][ 0.0F ]
1307 ps_sub fp6, fp6, fp6;
1308 // fp10[ 10*21 - 20*11 ][ N/A ]
1309 ps_msub fp10, fp2, fp5, fp10;
1310 // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) + 20*(01*12-11*02) ][ N/A ] : det
1311 ps_madd fp7, fp4, fp11, fp7;
1312 // fp9 [ 20*01 - 00*21 ][ N/A ]
1313 ps_msub fp9, fp1, fp4, fp9;
1314 // fp8 [ 00*11 - 10*01 ][ N/A ]
1315 ps_msub fp8, fp0, fp3, fp8;
1316
1317 // ( det == 0 ) ?
1318 ps_cmpo0 cr0, fp7, fp6;
1319 //bne _regular;
1320 bne _regular;
1321
1322 // return value (singular)
1323 addi r3, 0, 0;
1324
1325 blr;
1326
1327 _regular:
1328
1329 // fp0 [ 1/det ][ N/A ]
1330 fres fp0, fp7;
1331
1332 psq_st fp6, 12(invX),1, 0
1333
1334 // Newton's approximation
1335 // Refinement : ( E = est. of 1/K ) -> ( E' = ( 2 - K * E ) * E )
1336 ps_add fp4, fp0, fp0
1337 ps_mul fp5, fp7, fp0
1338 psq_st fp6, 28(invX),1, 0
1339 ps_nmsub fp0, fp0, fp5, fp4
1340 psq_st fp6, 44(invX),1, 0
1341
1342 // fp13[ ( 11*22 - 21*12 ) * rdet ][ ( 20*12 - 10*22 ) * rdet ] : ix[0][0], ix[0][1]
1343 ps_muls0 fp13, fp13, fp0;
1344 // fp12[ ( 21*02 - 01*22 ) * rdet ][ ( 00*22 - 20*02 ) * rdet ] : ix[1][0], ix[1][1]
1345 ps_muls0 fp12, fp12, fp0;
1346 // [ ix00 ][ ix01 ] : Store fp13
1347 psq_st fp13, 0( invX ), 0, 0;
1348 // fp11[ ( 01*12 - 11*02 ) * rdet ][ ( 10*02 - 00*12 ) * rdet ] : ix[2][0], ix[2][1]
1349 ps_muls0 fp11, fp11, fp0;
1350 // [ ix10 ][ ix11 ] : Store fp12
1351 psq_st fp12, 16( invX ), 0, 0;
1352 // fp10[ ( 10*21 - 20*11 ) * rdet ] : i[0][2]
1353 ps_muls0 fp10, fp10, fp0;
1354 // [ ix20 ][ ix21 ] : Store fp11
1355 psq_st fp11, 32( invX ), 0, 0;
1356 // fp9 [ ( 20*01 - 00*21 ) * rdet ] : i[1][2]
1357 ps_muls0 fp9, fp9, fp0;
1358 // [ ix02 ] : Store fp10
1359 psq_st fp10, 8( invX ), 1, 0;
1360 // fp8 [ ( 00*11 - 10*01 ) * rdet ] : i[2][2]
1361 ps_muls0 fp8, fp8, fp0;
1362 // [ ix12 ] : Store fp9
1363 psq_st fp9, 24( invX ), 1, 0;
1364 // [ ix22 ] : Store fp8
1365 psq_st fp8, 40( invX ), 1, 0;
1366
1367 // return value (regular)
1368 addi r3, 0, 1;
1369
1370 blr;
1371 }
1372 #endif
1373
1374 /*---------------------------------------------------------------------*
1375
1376
1377
1378
1379
1380 MODEL SECTION
1381
1382
1383
1384
1385
1386 *---------------------------------------------------------------------*/
1387
1388
1389
1390
1391
1392 /*---------------------------------------------------------------------*
1393
1394 Name: MTXRotDeg
1395
1396 Description: sets a rotation matrix about one of the X, Y or Z axes
1397
1398 Arguments: m matrix to be set
1399
1400 axis major axis about which to rotate.
1401 axis is passed in as a character.
1402 it must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1403
1404 deg rotation angle in degrees.
1405
1406 note: counter-clockwise rotation is positive.
1407
1408 Return: none
1409
1410 *---------------------------------------------------------------------*/
1411 // The function is defined as a macro in mtx.h
1412 /*
1413 void MTXRotDeg ( Mtx m, char axis, f32 deg )
1414 {
1415 MTXRotRad( m, axis, MTXDegToRad( deg ) );
1416 }
1417 */
1418
1419 /*---------------------------------------------------------------------*
1420
1421 Name: MTXRotRad
1422
1423 Description: sets a rotation matrix about one of the X, Y or Z axes
1424
1425 Arguments: m matrix to be set
1426
1427 axis major axis about which to rotate.
1428 axis is passed in as a character.
1429 it must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1430
1431 deg rotation angle in radians.
1432
1433 note: counter-clockwise rotation is positive.
1434
1435 Return: none
1436
1437 *---------------------------------------------------------------------*/
1438 /*---------------------------------------------------------------------*
1439 C version
1440 *---------------------------------------------------------------------*/
C_MTXRotRad(Mtx m,char axis,f32 rad)1441 void C_MTXRotRad ( Mtx m, char axis, f32 rad )
1442 {
1443
1444 f32 sinA, cosA;
1445
1446 ASSERTMSG( (m != 0), MTX_ROTRAD_1 );
1447
1448 // verification of "axis" will occur in MTXRotTrig
1449
1450 sinA = sinf(rad);
1451 cosA = cosf(rad);
1452
1453 C_MTXRotTrig( m, axis, sinA, cosA );
1454 }
1455
1456 /*---------------------------------------------------------------------*
1457 Paired-Single assembler version
1458 *---------------------------------------------------------------------*
1459 Note that this performs NO error checking.
1460 *---------------------------------------------------------------------*/
1461 #ifdef GEKKO
PSMTXRotRad(Mtx m,char axis,f32 rad)1462 void PSMTXRotRad ( Mtx m, char axis, f32 rad )
1463 {
1464 f32 sinA, cosA;
1465
1466 sinA = sinf(rad);
1467 cosA = cosf(rad);
1468
1469 PSMTXRotTrig( m, axis, sinA, cosA );
1470 }
1471 #endif // GEKKO
1472
1473 /*---------------------------------------------------------------------*
1474
1475 Name: MTXRotTrig
1476
1477 Description: sets a rotation matrix about one of the X, Y or Z axes
1478 from specified trig ratios
1479
1480 Arguments: m matrix to be set
1481
1482 axis major axis about which to rotate.
1483 axis is passed in as a character.
1484 It must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
1485
1486 sinA sine of rotation angle.
1487
1488 cosA cosine of rotation angle.
1489
1490 note: counter-clockwise rotation is positive.
1491
1492 Return: none
1493
1494 *---------------------------------------------------------------------*/
1495 /*---------------------------------------------------------------------*
1496 C version
1497 *---------------------------------------------------------------------*/
C_MTXRotTrig(Mtx m,char axis,f32 sinA,f32 cosA)1498 void C_MTXRotTrig ( Mtx m, char axis, f32 sinA, f32 cosA )
1499 {
1500
1501 ASSERTMSG( (m != 0), MTX_ROTTRIG_1 );
1502
1503 switch(axis)
1504 {
1505
1506 case 'x':
1507 case 'X':
1508 m[0][0] = 1.0f; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = 0.0f;
1509 m[1][0] = 0.0f; m[1][1] = cosA; m[1][2] = -sinA; m[1][3] = 0.0f;
1510 m[2][0] = 0.0f; m[2][1] = sinA; m[2][2] = cosA; m[2][3] = 0.0f;
1511 break;
1512
1513 case 'y':
1514 case 'Y':
1515 m[0][0] = cosA; m[0][1] = 0.0f; m[0][2] = sinA; m[0][3] = 0.0f;
1516 m[1][0] = 0.0f; m[1][1] = 1.0f; m[1][2] = 0.0f; m[1][3] = 0.0f;
1517 m[2][0] = -sinA; m[2][1] = 0.0f; m[2][2] = cosA; m[2][3] = 0.0f;
1518 break;
1519
1520 case 'z':
1521 case 'Z':
1522 m[0][0] = cosA; m[0][1] = -sinA; m[0][2] = 0.0f; m[0][3] = 0.0f;
1523 m[1][0] = sinA; m[1][1] = cosA; m[1][2] = 0.0f; m[1][3] = 0.0f;
1524 m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = 1.0f; m[2][3] = 0.0f;
1525 break;
1526
1527 default:
1528 ASSERTMSG( 0, MTX_ROTTRIG_2 );
1529 break;
1530
1531 }
1532
1533 }
1534
1535 /*---------------------------------------------------------------------*
1536 Paired-Single assembler version
1537 *---------------------------------------------------------------------*
1538 Note that this performs NO error checking.
1539 *---------------------------------------------------------------------*/
1540 #ifdef GEKKO
PSMTXRotTrig(register Mtx m,register char axis,register f32 sinA,register f32 cosA)1541 void PSMTXRotTrig (
1542 register Mtx m,
1543 register char axis,
1544 register f32 sinA,
1545 register f32 cosA )
1546 {
1547 register f32 fc0, fc1, nsinA;
1548 register f32 fw0, fw1, fw2, fw3;
1549
1550 asm
1551 {
1552 frsp sinA, sinA // to make sure sinA = single precision
1553 frsp cosA, cosA // to make sure cosA = single precision
1554 }
1555
1556 fc0 = 0.0F;
1557 fc1 = 1.0F;
1558
1559 asm
1560 {
1561 // always lower case
1562 ori axis, axis, 0x20
1563 ps_neg nsinA, sinA
1564
1565 // branches
1566 cmplwi axis, 'x'
1567 beq _case_x
1568 cmplwi axis, 'y'
1569 beq _case_y
1570 cmplwi axis, 'z'
1571 beq _case_z
1572 b _end
1573
1574 _case_x:
1575 psq_st fc1, 0(m), 1, 0
1576 psq_st fc0, 4(m), 0, 0
1577 ps_merge00 fw0, sinA, cosA
1578 psq_st fc0, 12(m), 0, 0
1579 ps_merge00 fw1, cosA, nsinA
1580 psq_st fc0, 28(m), 0, 0
1581 psq_st fc0, 44(m), 1, 0
1582 psq_st fw0, 36(m), 0, 0
1583 psq_st fw1, 20(m), 0, 0
1584 b _end;
1585
1586 _case_y:
1587 ps_merge00 fw0, cosA, fc0
1588 ps_merge00 fw1, fc0, fc1
1589 psq_st fc0, 24(m), 0, 0
1590 psq_st fw0, 0(m), 0, 0
1591 ps_merge00 fw2, nsinA, fc0
1592 ps_merge00 fw3, sinA, fc0
1593 psq_st fw0, 40(m), 0, 0;
1594 psq_st fw1, 16(m), 0, 0;
1595 psq_st fw3, 8(m), 0, 0;
1596 psq_st fw2, 32(m), 0, 0;
1597 b _end;
1598
1599 _case_z:
1600 psq_st fc0, 8(m), 0, 0
1601 ps_merge00 fw0, sinA, cosA
1602 ps_merge00 fw2, cosA, nsinA
1603 psq_st fc0, 24(m), 0, 0
1604 psq_st fc0, 32(m), 0, 0
1605 ps_merge00 fw1, fc1, fc0
1606 psq_st fw0, 16(m), 0, 0
1607 psq_st fw2, 0(m), 0, 0
1608 psq_st fw1, 40(m), 0, 0
1609
1610 _end:
1611
1612 }
1613 }
1614 #endif // GEKKO
1615
1616 /*---------------------------------------------------------------------*
1617
1618 Name: MTXRotAxisDeg
1619
1620 Description: sets a rotation matrix about an arbitrary axis
1621
1622
1623 Arguments: m matrix to be set
1624
1625 axis ptr to a vector containing the x,y,z axis
1626 components.
1627 axis does not have to be a unit vector.
1628
1629 deg rotation angle in degrees.
1630
1631 note: counter-clockwise rotation is positive.
1632
1633 Return: none
1634
1635 *---------------------------------------------------------------------*/
1636 // The function is defined as a macro in mtx.h
1637 /*
1638 void MTXRotAxisDeg ( Mtx m, Vec *axis, f32 deg )
1639 {
1640 MTXRotAxisRad( m, axis, MTXDegToRad( deg ) );
1641 }
1642 */
1643
1644 /*---------------------------------------------------------------------*
1645
1646 Name: MTXRotAxisRad
1647
1648 Description: sets a rotation matrix about an arbitrary axis
1649
1650
1651 Arguments: m matrix to be set
1652
1653 axis ptr to a vector containing the x,y,z axis
1654 components.
1655 axis does not have to be a unit vector.
1656
1657 deg rotation angle in radians.
1658
1659 note: counter-clockwise rotation is positive.
1660
1661 Return: none
1662
1663 *---------------------------------------------------------------------*/
1664 /*---------------------------------------------------------------------*
1665 C version
1666 *---------------------------------------------------------------------*/
C_MTXRotAxisRad(Mtx m,const Vec * axis,f32 rad)1667 void C_MTXRotAxisRad( Mtx m, const Vec *axis, f32 rad )
1668 {
1669 Vec vN;
1670 f32 s, c; // sinTheta, cosTheta
1671 f32 t; // ( 1 - cosTheta )
1672 f32 x, y, z; // x, y, z components of normalized axis
1673 f32 xSq, ySq, zSq; // x, y, z squared
1674
1675
1676 ASSERTMSG( (m != 0), MTX_ROTAXIS_1 );
1677 ASSERTMSG( (axis != 0), MTX_ROTAXIS_2 );
1678
1679
1680 s = sinf(rad);
1681 c = cosf(rad);
1682 t = 1.0f - c;
1683
1684 C_VECNormalize( axis, &vN );
1685
1686 x = vN.x;
1687 y = vN.y;
1688 z = vN.z;
1689
1690 xSq = x * x;
1691 ySq = y * y;
1692 zSq = z * z;
1693
1694 m[0][0] = ( t * xSq ) + ( c );
1695 m[0][1] = ( t * x * y ) - ( s * z );
1696 m[0][2] = ( t * x * z ) + ( s * y );
1697 m[0][3] = 0.0f;
1698
1699 m[1][0] = ( t * x * y ) + ( s * z );
1700 m[1][1] = ( t * ySq ) + ( c );
1701 m[1][2] = ( t * y * z ) - ( s * x );
1702 m[1][3] = 0.0f;
1703
1704 m[2][0] = ( t * x * z ) - ( s * y );
1705 m[2][1] = ( t * y * z ) + ( s * x );
1706 m[2][2] = ( t * zSq ) + ( c );
1707 m[2][3] = 0.0f;
1708
1709 }
1710
1711 /*---------------------------------------------------------------------*
1712 Paired-Single assembler version
1713 *---------------------------------------------------------------------*
1714 Note that this performs NO error checking.
1715 *---------------------------------------------------------------------*/
1716 #ifdef GEKKO
1717
__PSMTXRotAxisRadInternal(register Mtx m,const register Vec * axis,register f32 sT,register f32 cT)1718 static void __PSMTXRotAxisRadInternal(
1719 register Mtx m,
1720 const register Vec *axis,
1721 register f32 sT,
1722 register f32 cT )
1723 {
1724 register f32 tT, fc0;
1725 register f32 tmp0, tmp1, tmp2, tmp3, tmp4;
1726 register f32 tmp5, tmp6, tmp7, tmp8, tmp9;
1727
1728 tmp9 = 0.5F;
1729 tmp8 = 3.0F;
1730
1731 asm
1732 {
1733 // to make sure cT = (single precision float value)
1734 frsp cT, cT
1735 // tmp0 = [x][y] : LOAD
1736 psq_l tmp0, 0(axis), 0, 0
1737 // to make sure sT = (single precision float value)
1738 frsp sT, sT
1739 // tmp1 = [z][z] : LOAD
1740 lfs tmp1, 8(axis)
1741
1742 // tmp2 = [x*x][y*y]
1743 ps_mul tmp2, tmp0, tmp0
1744 // tmp7 = [1.0F]
1745 fadds tmp7, tmp9, tmp9
1746 // tmp3 = [x*x+z*z][y*y+z*z]
1747 ps_madd tmp3, tmp1, tmp1, tmp2
1748 // fc0 = [0.0F]
1749 fsubs fc0, tmp9, tmp9
1750 // tmp4 = [S = x*x+y*y+z*z][z]
1751 ps_sum0 tmp4, tmp3, tmp1, tmp2
1752
1753 // tT = 1.0F - cT
1754 fsubs tT, tmp7, cT
1755
1756 // tmp5 = [1.0/sqrt(S)] :estimation[E]
1757 frsqrte tmp5, tmp4
1758 // Newton-Rapson refinement step
1759 // E' = E/2(3.0 - E*E*S)
1760 fmuls tmp2, tmp5, tmp5 // E*E
1761 fmuls tmp3, tmp5, tmp9 // E/2
1762 fnmsubs tmp2, tmp2, tmp4, tmp8 // (3-E*E*S)
1763 fmuls tmp5, tmp2, tmp3 // (E/2)(3-E*E*S)
1764
1765 // cT = [c][c]
1766 ps_merge00 cT, cT, cT
1767
1768 // tmp0 = [nx = x/sqrt(S)][ny = y/sqrt(S)]
1769 ps_muls0 tmp0, tmp0, tmp5
1770 // tmp1 = [nz = z/sqrt(S)][nz = z/sqrt(S)]
1771 ps_muls0 tmp1, tmp1, tmp5
1772
1773 // tmp4 = [t*nx][t*ny]
1774 ps_muls0 tmp4, tmp0, tT
1775 // tmp9 = [s*nx][s*ny]
1776 ps_muls0 tmp9, tmp0, sT
1777 // tmp5 = [t*nz][t*nz]
1778 ps_muls0 tmp5, tmp1, tT
1779
1780 // tmp3 = [t*nx*ny][t*ny*ny]
1781 ps_muls1 tmp3, tmp4, tmp0
1782 // tmp2 = [t*nx*nx][t*ny*nx]
1783 ps_muls0 tmp2, tmp4, tmp0
1784 // tmp4 = [t*nx*nz][t*ny*nz]
1785 ps_muls0 tmp4, tmp4, tmp1
1786
1787 // tmp6 = [t*nx*ny-s*nz][t*nx*ny-s*nz]
1788 fnmsubs tmp6, tmp1, sT, tmp3
1789 // tmp7 = [t*nx*ny+s*nz][t*ny*ny+s*nz]
1790 fmadds tmp7, tmp1, sT, tmp3
1791
1792 // tmp0 = [-s*nx][-s*ny]
1793 ps_neg tmp0, tmp9
1794 // tmp8 = [t*nx*nz+s*ny][0] == [m02][m03]
1795 ps_sum0 tmp8, tmp4, fc0, tmp9
1796 // tmp2 = [t*nx*nx+c][t*nx*ny-s*nz] == [m00][m01]
1797 ps_sum0 tmp2, tmp2, tmp6, cT
1798 // tmp3 = [t*nx*ny+s*nz][t*ny*ny+c] == [m10][m11]
1799 ps_sum1 tmp3, cT, tmp7, tmp3
1800 // tmp6 = [t*ny*nz-s*nx][0] == [m12][m13]
1801 ps_sum0 tmp6, tmp0, fc0 ,tmp4
1802
1803 // tmp8 [m02][m03] : STORE
1804 psq_st tmp8, 8(m), 0, 0
1805 // tmp0 = [t*nx*nz-s*ny][t*ny*nz]
1806 ps_sum0 tmp0, tmp4, tmp4, tmp0
1807 // tmp2 [m00][m01] : STORE
1808 psq_st tmp2, 0(m), 0, 0
1809 // tmp5 = [t*nz*nz][t*nz*nz]
1810 ps_muls0 tmp5, tmp5, tmp1
1811 // tmp3 [m10][m11] : STORE
1812 psq_st tmp3, 16(m), 0, 0
1813 // tmp4 = [t*nx*nz-s*ny][t*ny*nz+s*nx] == [m20][m21]
1814 ps_sum1 tmp4, tmp9, tmp0, tmp4
1815 // tmp6 [m12][m13] : STORE
1816 psq_st tmp6, 24(m), 0, 0
1817 // tmp5 = [t*nz*nz+c][0] == [m22][m23]
1818 ps_sum0 tmp5, tmp5, fc0, cT
1819 // tmp4 [m20][m21] : STORE
1820 psq_st tmp4, 32(m), 0, 0
1821 // tmp5 [m22][m23] : STORE
1822 psq_st tmp5, 40(m), 0, 0
1823 }
1824 }
1825
PSMTXRotAxisRad(Mtx m,const Vec * axis,f32 rad)1826 void PSMTXRotAxisRad(
1827 Mtx m,
1828 const Vec *axis,
1829 f32 rad )
1830 {
1831 f32 sinT, cosT;
1832
1833 sinT = sinf(rad);
1834 cosT = cosf(rad);
1835
1836 __PSMTXRotAxisRadInternal(m, axis, sinT, cosT);
1837 }
1838
1839 #endif // GEKKO
1840
1841 /*---------------------------------------------------------------------*
1842
1843 Name: MTXTrans
1844
1845 Description: sets a translation matrix.
1846
1847
1848 Arguments: m matrix to be set
1849
1850 xT x component of translation.
1851
1852 yT y component of translation.
1853
1854 zT z component of translation.
1855
1856 Return: none
1857
1858 *---------------------------------------------------------------------*/
1859 /*---------------------------------------------------------------------*
1860 C version
1861 *---------------------------------------------------------------------*/
C_MTXTrans(Mtx m,f32 xT,f32 yT,f32 zT)1862 void C_MTXTrans ( Mtx m, f32 xT, f32 yT, f32 zT )
1863 {
1864
1865 ASSERTMSG( (m != 0), MTX_TRANS_1 );
1866
1867
1868 m[0][0] = 1.0f; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = xT;
1869 m[1][0] = 0.0f; m[1][1] = 1.0f; m[1][2] = 0.0f; m[1][3] = yT;
1870 m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = 1.0f; m[2][3] = zT;
1871
1872 }
1873
1874 /*---------------------------------------------------------------------*
1875 Paired-Single assembler version
1876 *---------------------------------------------------------------------*
1877 Note that this performs NO error checking.
1878 *---------------------------------------------------------------------*/
1879 #ifdef GEKKO
PSMTXTrans(register Mtx m,register f32 xT,register f32 yT,register f32 zT)1880 void PSMTXTrans(
1881 register Mtx m,
1882 register f32 xT,
1883 register f32 yT,
1884 register f32 zT
1885 )
1886 {
1887 register f32 c0 = 0.0F;
1888 register f32 c1 = 1.0F;
1889
1890 asm
1891 {
1892 stfs xT, 12(m)
1893 stfs yT, 28(m)
1894 psq_st c0, 4(m), 0, 0
1895 psq_st c0, 32(m), 0, 0
1896 stfs c0, 16(m)
1897 stfs c1, 20(m)
1898 stfs c0, 24(m)
1899 stfs c1, 40(m)
1900 stfs zT, 44(m)
1901 stfs c1, 0(m)
1902 }
1903 }
1904 #endif // GEKKO
1905
1906 /*---------------------------------------------------------------------*
1907
1908 Name: MTXTransApply
1909
1910 Description: This function performs the operation equivalent to
1911 MTXTrans + MTXConcat.
1912
1913
1914 Arguments: src matrix to be operated.
1915
1916 dst resultant matrix from concat.
1917
1918 xT x component of translation.
1919
1920 yT y component of translation.
1921
1922 zT z component of translation.
1923
1924 Return: none
1925
1926 *---------------------------------------------------------------------*/
1927 /*---------------------------------------------------------------------*
1928 C version
1929 *---------------------------------------------------------------------*/
C_MTXTransApply(const Mtx src,Mtx dst,f32 xT,f32 yT,f32 zT)1930 void C_MTXTransApply ( const Mtx src, Mtx dst, f32 xT, f32 yT, f32 zT )
1931 {
1932 ASSERTMSG( (src != 0), MTX_TRANSAPPLY_1 );
1933 ASSERTMSG( (dst != 0), MTX_TRANSAPPLY_1 );
1934
1935 if ( src != dst )
1936 {
1937 dst[0][0] = src[0][0]; dst[0][1] = src[0][1]; dst[0][2] = src[0][2];
1938 dst[1][0] = src[1][0]; dst[1][1] = src[1][1]; dst[1][2] = src[1][2];
1939 dst[2][0] = src[2][0]; dst[2][1] = src[2][1]; dst[2][2] = src[2][2];
1940 }
1941
1942 dst[0][3] = src[0][3] + xT;
1943 dst[1][3] = src[1][3] + yT;
1944 dst[2][3] = src[2][3] + zT;
1945 }
1946
1947 /*---------------------------------------------------------------------*
1948 Paired-Single assembler version
1949 *---------------------------------------------------------------------*
1950 Note that this performs NO error checking.
1951 *---------------------------------------------------------------------*/
1952 #ifdef GEKKO
PSMTXTransApply(const register Mtx src,register Mtx dst,register f32 xT,register f32 yT,register f32 zT)1953 asm void PSMTXTransApply(
1954 const register Mtx src,
1955 register Mtx dst,
1956 register f32 xT,
1957 register f32 yT,
1958 register f32 zT )
1959 {
1960 nofralloc;
1961 psq_l fp4, 0(src), 0, 0;
1962 frsp xT, xT; // to make sure xT = single precision
1963 psq_l fp5, 8(src), 0, 0;
1964 frsp yT, yT; // to make sure yT = single precision
1965 psq_l fp7, 24(src), 0, 0;
1966 frsp zT, zT; // to make sure zT = single precision
1967 psq_l fp8, 40(src), 0, 0;
1968 psq_st fp4, 0(dst), 0, 0;
1969 ps_sum1 fp5, xT, fp5, fp5;
1970 psq_l fp6, 16(src), 0, 0;
1971 psq_st fp5, 8(dst), 0, 0;
1972 ps_sum1 fp7, yT, fp7, fp7;
1973 psq_l fp9, 32(src), 0, 0;
1974 psq_st fp6, 16(dst), 0, 0;
1975 ps_sum1 fp8, zT, fp8, fp8;
1976 psq_st fp7, 24(dst), 0, 0;
1977 psq_st fp9, 32(dst), 0, 0;
1978 psq_st fp8, 40(dst), 0, 0;
1979 blr;
1980 }
1981 #endif // GEKKO
1982
1983
1984 /*---------------------------------------------------------------------*
1985
1986 Name: MTXScale
1987
1988 Description: sets a scaling matrix.
1989
1990
1991 Arguments: m matrix to be set
1992
1993 xS x scale factor.
1994
1995 yS y scale factor.
1996
1997 zS z scale factor.
1998
1999 Return: none
2000
2001 *---------------------------------------------------------------------*/
2002 /*---------------------------------------------------------------------*
2003 C version
2004 *---------------------------------------------------------------------*/
C_MTXScale(Mtx m,f32 xS,f32 yS,f32 zS)2005 void C_MTXScale ( Mtx m, f32 xS, f32 yS, f32 zS )
2006 {
2007 ASSERTMSG( (m != 0), MTX_SCALE_1 );
2008
2009
2010 m[0][0] = xS; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = 0.0f;
2011 m[1][0] = 0.0f; m[1][1] = yS; m[1][2] = 0.0f; m[1][3] = 0.0f;
2012 m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = zS; m[2][3] = 0.0f;
2013 }
2014
2015 /*---------------------------------------------------------------------*
2016 Paired-Single assembler version
2017 *---------------------------------------------------------------------*
2018 Note that this performs NO error checking.
2019 *---------------------------------------------------------------------*/
2020 #ifdef GEKKO
PSMTXScale(register Mtx m,register f32 xS,register f32 yS,register f32 zS)2021 void PSMTXScale(
2022 register Mtx m,
2023 register f32 xS,
2024 register f32 yS,
2025 register f32 zS
2026 )
2027 {
2028 register f32 c0 = 0.0F;
2029
2030 asm
2031 {
2032 stfs xS, 0(m)
2033 psq_st c0, 4(m), 0, 0
2034 psq_st c0, 12(m), 0, 0
2035 stfs yS, 20(m)
2036 psq_st c0, 24(m), 0, 0
2037 psq_st c0, 32(m), 0, 0
2038 stfs zS, 40(m)
2039 stfs c0, 44(m)
2040 }
2041 }
2042 #endif // GEKKO
2043
2044 /*---------------------------------------------------------------------*
2045
2046 Name: MTXScaleApply
2047
2048 Description: This function performs the operation equivalent to
2049 MTXScale + MTXConcat
2050
2051 Arguments: src matrix to be operated.
2052
2053 dst resultant matrix from concat.
2054
2055 xS x scale factor.
2056
2057 yS y scale factor.
2058
2059 zS z scale factor.
2060
2061 Return: none
2062
2063 *---------------------------------------------------------------------*/
2064 /*---------------------------------------------------------------------*
2065 C version
2066 *---------------------------------------------------------------------*/
C_MTXScaleApply(const Mtx src,Mtx dst,f32 xS,f32 yS,f32 zS)2067 void C_MTXScaleApply ( const Mtx src, Mtx dst, f32 xS, f32 yS, f32 zS )
2068 {
2069 ASSERTMSG( (src != 0), MTX_SCALEAPPLY_1 );
2070 ASSERTMSG( (dst != 0), MTX_SCALEAPPLY_2 );
2071
2072 dst[0][0] = src[0][0] * xS; dst[0][1] = src[0][1] * xS;
2073 dst[0][2] = src[0][2] * xS; dst[0][3] = src[0][3] * xS;
2074
2075 dst[1][0] = src[1][0] * yS; dst[1][1] = src[1][1] * yS;
2076 dst[1][2] = src[1][2] * yS; dst[1][3] = src[1][3] * yS;
2077
2078 dst[2][0] = src[2][0] * zS; dst[2][1] = src[2][1] * zS;
2079 dst[2][2] = src[2][2] * zS; dst[2][3] = src[2][3] * zS;
2080 }
2081
2082 /*---------------------------------------------------------------------*
2083 Paired-Single assembler version
2084 *---------------------------------------------------------------------*
2085 Note that this performs NO error checking.
2086 *---------------------------------------------------------------------*/
2087 #ifdef GEKKO
PSMTXScaleApply(const register Mtx src,register Mtx dst,register f32 xS,register f32 yS,register f32 zS)2088 asm void PSMTXScaleApply (
2089 const register Mtx src,
2090 register Mtx dst,
2091 register f32 xS,
2092 register f32 yS,
2093 register f32 zS )
2094 {
2095 nofralloc;
2096 frsp xS, xS; // to make sure xS = single precision
2097 psq_l fp4, 0(src), 0, 0;
2098 frsp yS, yS; // to make sure yS = single precision
2099 psq_l fp5, 8(src), 0, 0;
2100 frsp zS, zS; // to make sure zS = single precision
2101 ps_muls0 fp4, fp4, xS;
2102 psq_l fp6, 16(src), 0, 0;
2103 ps_muls0 fp5, fp5, xS;
2104 psq_l fp7, 24(src), 0, 0;
2105 ps_muls0 fp6, fp6, yS;
2106 psq_l fp8, 32(src), 0, 0;
2107 psq_st fp4, 0(dst), 0, 0;
2108 ps_muls0 fp7, fp7, yS;
2109 psq_l fp2, 40(src), 0, 0;
2110 psq_st fp5, 8(dst), 0, 0;
2111 ps_muls0 fp8, fp8, zS;
2112 psq_st fp6, 16(dst), 0, 0;
2113 ps_muls0 fp2, fp2, zS;
2114 psq_st fp7, 24(dst), 0, 0;
2115 psq_st fp8, 32(dst), 0, 0;
2116 psq_st fp2, 40(dst), 0, 0;
2117 blr;
2118 }
2119 #endif // GEKKO
2120
2121 /*---------------------------------------------------------------------*
2122
2123 Name: MTXQuat
2124
2125 Description: sets a rotation matrix from a quaternion.
2126
2127
2128 Arguments: m matrix to be set
2129
2130 q ptr to quaternion.
2131
2132 Return: none
2133
2134 *---------------------------------------------------------------------*/
2135 /*---------------------------------------------------------------------*
2136 C version
2137 *---------------------------------------------------------------------*/
C_MTXQuat(Mtx m,const Quaternion * q)2138 void C_MTXQuat ( Mtx m, const Quaternion *q )
2139 {
2140
2141 f32 s,xs,ys,zs,wx,wy,wz,xx,xy,xz,yy,yz,zz;
2142
2143
2144 ASSERTMSG( (m != 0), MTX_QUAT_1 );
2145 ASSERTMSG( (q != 0), MTX_QUAT_2 );
2146 ASSERTMSG( ( q->x || q->y || q->z || q->w ), MTX_QUAT_3 );
2147
2148
2149 s = 2.0f / ( (q->x * q->x) + (q->y * q->y) + (q->z * q->z) + (q->w * q->w) );
2150
2151 xs = q->x * s; ys = q->y * s; zs = q->z * s;
2152 wx = q->w * xs; wy = q->w * ys; wz = q->w * zs;
2153 xx = q->x * xs; xy = q->x * ys; xz = q->x * zs;
2154 yy = q->y * ys; yz = q->y * zs; zz = q->z * zs;
2155
2156
2157 m[0][0] = 1.0f - (yy + zz);
2158 m[0][1] = xy - wz;
2159 m[0][2] = xz + wy;
2160 m[0][3] = 0.0f;
2161
2162 m[1][0] = xy + wz;
2163 m[1][1] = 1.0f - (xx + zz);
2164 m[1][2] = yz - wx;
2165 m[1][3] = 0.0f;
2166
2167 m[2][0] = xz - wy;
2168 m[2][1] = yz + wx;
2169 m[2][2] = 1.0f - (xx + yy);
2170 m[2][3] = 0.0f;
2171
2172 }
2173
2174 /*---------------------------------------------------------------------*
2175 Paired-Single assembler version
2176 *---------------------------------------------------------------------*
2177 Note that this performs NO error checking.
2178 *---------------------------------------------------------------------*/
2179 #ifdef GEKKO
PSMTXQuat(register Mtx m,const register Quaternion * q)2180 void PSMTXQuat ( register Mtx m, const register Quaternion *q )
2181 {
2182 register f32 c_zero, c_one, c_two, scale;
2183 register f32 tmp0, tmp1, tmp2, tmp3, tmp4;
2184 register f32 tmp5, tmp6, tmp7, tmp8, tmp9;
2185
2186 c_one = 1.0F;
2187
2188 asm
2189 {
2190 // tmp0 = [qx][qy] : LOAD
2191 psq_l tmp0, 0(q), 0, 0
2192 // tmp1 = [qz][qw] : LOAD
2193 psq_l tmp1, 8(q), 0, 0
2194 // c_zero = [0.0F][0.0F]
2195 fsubs c_zero, c_one, c_one
2196 // c_two = [2.0F][2.0F]
2197 fadds c_two, c_one, c_one
2198 // tmp2 = [qx*qx][qy*qy]
2199 ps_mul tmp2, tmp0, tmp0
2200 // tmp5 = [qy][qx]
2201 ps_merge10 tmp5, tmp0, tmp0
2202 // tmp4 = [qx*qx+qz*qz][qy*qy+qw*qw]
2203 ps_madd tmp4, tmp1, tmp1, tmp2
2204 // tmp3 = [qz*qz][qw*qw]
2205 ps_mul tmp3, tmp1, tmp1
2206 // scale = [qx*qx+qy*qy+qz*qz+qw*qw][?]
2207 ps_sum0 scale, tmp4, tmp4, tmp4
2208 // tmp7 = [qy*qw][qx*qw]
2209 ps_muls1 tmp7, tmp5, tmp1
2210 // Newton-Rapson refinment (1/X) : E' = 2E-X*E*E
2211 // tmp9 = [E = Est.(1/X)]
2212 fres tmp9, scale
2213 // tmp4 = [qx*qx+qz*qz][qy*qy+qz*qz]
2214 ps_sum1 tmp4, tmp3, tmp4, tmp2
2215 // scale = [2-X*E]
2216 ps_nmsub scale, scale, tmp9, c_two
2217 // tmp6 = [qz*qw][?]
2218 ps_muls1 tmp6, tmp1, tmp1
2219 // scale = [E(2-scale*E) = E']
2220 ps_mul scale, tmp9, scale
2221 // tmp2 = [qx*qx+qy*qy]
2222 ps_sum0 tmp2, tmp2, tmp2, tmp2
2223 // scale = [s = 2E' = 2.0F/(qx*qx+qy*qy+qz*qz+qw*qw)]
2224 fmuls scale, scale, c_two
2225 // tmp8 = [qx*qy+qz*qw][?]
2226 ps_madd tmp8, tmp0, tmp5, tmp6
2227 // tmp6 = [qx*qy-qz*qw][?]
2228 ps_msub tmp6, tmp0, tmp5, tmp6
2229 // c_zero [m03] : STORE
2230 psq_st c_zero, 12(m), 1, 0
2231 // tmp2 = [1-s(qx*qx+qy*qy)] : [m22]
2232 ps_nmsub tmp2, tmp2, scale, c_one
2233 // tmp4 = [1-s(qx*qx+qz*qz)][1-s(qy*qy+qz*qz)] : [m11][m00]
2234 ps_nmsub tmp4, tmp4, scale, c_one
2235 // c_zero [m23] : STORE
2236 psq_st c_zero, 44(m), 1, 0
2237 // tmp8 = [s(qx*qy+qz*qw)][?] : [m10]
2238 ps_mul tmp8, tmp8, scale
2239 // tmp6 = [s(qx*qy-qz*qw)][?] : [m01]
2240 ps_mul tmp6, tmp6, scale
2241 // tmp2 [m22] : STORE
2242 psq_st tmp2, 40(m), 1, 0
2243 // tmp5 = [qx*qz+qy*qw][qy*qz+qx*qw]
2244 ps_madds0 tmp5, tmp0, tmp1, tmp7
2245 // tmp1 = [m10][m11]
2246 ps_merge00 tmp1, tmp8, tmp4
2247 // tmp7 = [qx*qz-qy*qw][qy*qz-qx*qw]
2248 ps_nmsub tmp7, tmp7, c_two, tmp5
2249 // tmp0 = [m00][m01]
2250 ps_merge10 tmp0, tmp4, tmp6
2251 // tmp1 [m10][m11] : STORE
2252 psq_st tmp1, 16(m), 0, 0
2253 // tmp5 = [s(qx*qz+qy*qw)][s(qy*qz+qx*qw)] : [m02][m21]
2254 ps_mul tmp5, tmp5, scale
2255 // tmp7 = [s(qx*qz-qy*qw)][s(qy*qz-qx*qw)] : [m20][m12]
2256 ps_mul tmp7, tmp7, scale
2257 // tmp0 [m00][m01] : STORE
2258 psq_st tmp0, 0(m), 0, 0
2259 // tmp5 [m02] : STORE
2260 psq_st tmp5, 8(m), 1, 0
2261 // tmp3 = [m12][m13]
2262 ps_merge10 tmp3, tmp7, c_zero
2263 // tmp9 = [m20][m21]
2264 ps_merge01 tmp9, tmp7, tmp5
2265 // tmp3 [m12][m13] : STORE
2266 psq_st tmp3, 24(m), 0, 0
2267 // tmp9 [m20][m21] : STORE
2268 psq_st tmp9, 32(m), 0, 0
2269
2270 }
2271 }
2272 #endif // GEKKO
2273
2274 /*---------------------------------------------------------------------*
2275
2276 Name: MTXReflect
2277
2278 Description: reflect a rotation matrix with respect to a plane.
2279
2280
2281 Arguments: m matrix to be set
2282
2283 p point on the planar reflector.
2284
2285 n normal of the planar reflector.
2286
2287 Return: none
2288
2289 *---------------------------------------------------------------------*/
2290 /*---------------------------------------------------------------------*
2291 C version
2292 *---------------------------------------------------------------------*/
C_MTXReflect(Mtx m,const Vec * p,const Vec * n)2293 void C_MTXReflect ( Mtx m, const Vec *p, const Vec *n )
2294 {
2295 f32 vxy, vxz, vyz, pdotn;
2296
2297 vxy = -2.0f * n->x * n->y;
2298 vxz = -2.0f * n->x * n->z;
2299 vyz = -2.0f * n->y * n->z;
2300 pdotn = 2.0f * C_VECDotProduct(p, n);
2301
2302 m[0][0] = 1.0f - 2.0f * n->x * n->x;
2303 m[0][1] = vxy;
2304 m[0][2] = vxz;
2305 m[0][3] = pdotn * n->x;
2306
2307 m[1][0] = vxy;
2308 m[1][1] = 1.0f - 2.0f * n->y * n->y;
2309 m[1][2] = vyz;
2310 m[1][3] = pdotn * n->y;
2311
2312 m[2][0] = vxz;
2313 m[2][1] = vyz;
2314 m[2][2] = 1.0f - 2.0f * n->z * n->z;
2315 m[2][3] = pdotn * n->z;
2316 }
2317
2318 /*---------------------------------------------------------------------*
2319 Paired-Single assembler version
2320 *---------------------------------------------------------------------*/
2321 #ifdef GEKKO
PSMTXReflect(register Mtx m,const register Vec * p,const register Vec * n)2322 void PSMTXReflect( register Mtx m, const register Vec *p, const register Vec *n )
2323 {
2324 register f32 c_one = 1.0F;
2325 register f32 vn_xy, vn_z1, n2vn_xy, n2vn_z1, pdotn;
2326 register f32 tmp0, tmp1, tmp2, tmp3;
2327 register f32 tmp4, tmp5, tmp6, tmp7;
2328
2329 asm
2330 {
2331 // vn_z1 = [nz][1.0F] : LOAD
2332 psq_l vn_z1, 8(n), 1, 0
2333 // vn_xy = [nx][ny] : LOAD
2334 psq_l vn_xy, 0(n), 0, 0
2335
2336 // tmp0 = [px][py] : LOAD
2337 psq_l tmp0, 0(p), 0, 0
2338 // n2vn_z1 = [-2nz][-2.0F]
2339 ps_nmadd n2vn_z1, vn_z1, c_one, vn_z1
2340 // tmp1 = [pz][1.0F] : LOAD
2341 psq_l tmp1, 8(p), 1, 0
2342 // n2vn_xy = [-2nx][-2ny]
2343 ps_nmadd n2vn_xy, vn_xy, c_one, vn_xy
2344
2345 // tmp4 = [-2nx*nz][-2ny*nz] : [m20][m21]
2346 ps_muls0 tmp4, vn_xy, n2vn_z1
2347 // pdotn = [-2(px*nx)][-2(py*ny)]
2348 ps_mul pdotn, n2vn_xy, tmp0
2349 // tmp2 = [-2nx*nx][-2nx*ny]
2350 ps_muls0 tmp2, vn_xy, n2vn_xy
2351 // pdotn = [-2(px*nx+py*ny)][?]
2352 ps_sum0 pdotn, pdotn, pdotn, pdotn
2353 // tmp3 = [-2nx*ny][-2ny*ny]
2354 ps_muls1 tmp3, vn_xy, n2vn_xy
2355 // tmp4 = [m20][m21] : STORE
2356 psq_st tmp4, 32(m), 0, 0
2357 // tmp2 = [1-2nx*nx][-2nx*ny] : [m00][m01]
2358 ps_sum0 tmp2, tmp2, tmp2, c_one
2359 // pdotn = [2(px*nx+py*ny+pz*nz)][?]
2360 ps_nmadd pdotn, n2vn_z1, tmp1, pdotn
2361 // tmp3 = [-2nx*ny][1-2ny*ny] : [m10][m11]
2362 ps_sum1 tmp3, c_one, tmp3, tmp3
2363 // tmp2 = [m00][m01] : STORE
2364 psq_st tmp2, 0(m), 0, 0
2365 // tmp5 = [pdotn*nx][pdotn*ny]
2366 ps_muls0 tmp5, vn_xy, pdotn
2367 // tmp6 = [-2nz][pdotn]
2368 ps_merge00 tmp6, n2vn_z1, pdotn
2369 // tmp3 = [m10][m11] : STORE
2370 psq_st tmp3, 16(m), 0, 0
2371
2372 // tmp7 = [-2nx*nz][pdotn*nx] : [m02][m03]
2373 ps_merge00 tmp7, tmp4, tmp5
2374 // tmp6 = [-2nz*nz][pdotn*nz]
2375 ps_muls0 tmp6, tmp6, vn_z1
2376 // tmp5 = [-2ny*nz][pdotn*ny] : [m12][m13]
2377 ps_merge11 tmp5, tmp4, tmp5
2378 // tmp7 = [m02][m03] : STORE
2379 psq_st tmp7, 8(m), 0, 0
2380 // tmp6 = [1-2nz*nz][pdotn*nz] : [m22][m23]
2381 ps_sum0 tmp6, tmp6, tmp6, c_one
2382 // tmp5 = [m12][m13] : STORE
2383 psq_st tmp5, 24(m), 0, 0
2384 // tmp6 = [m22][m23] : STORE
2385 psq_st tmp6, 40(m), 0, 0
2386 }
2387 }
2388 #endif // GEKKO
2389
2390 /*---------------------------------------------------------------------*
2391
2392
2393
2394
2395
2396 VIEW SECTION
2397
2398
2399
2400
2401
2402 *---------------------------------------------------------------------*/
2403
2404
2405
2406
2407
2408
2409 /*---------------------------------------------------------------------*
2410
2411 Name: MTXLookAt
2412
2413 Description: compute a matrix to transform points to camera coordinates.
2414
2415
2416 Arguments: m matrix to be set
2417
2418 camPos camera position.
2419
2420 camUp camera 'up' direction.
2421
2422 target camera aim point.
2423
2424
2425 Return: none
2426
2427 *---------------------------------------------------------------------*/
2428 /*---------------------------------------------------------------------*
2429 C version
2430 *---------------------------------------------------------------------*/
C_MTXLookAt(Mtx m,const Point3d * camPos,const Vec * camUp,const Point3d * target)2431 void C_MTXLookAt ( Mtx m, const Point3d *camPos, const Vec *camUp, const Point3d *target )
2432 {
2433
2434 Vec vLook,vRight,vUp;
2435
2436
2437 ASSERTMSG( (m != 0), MTX_LOOKAT_1 );
2438 ASSERTMSG( (camPos != 0), MTX_LOOKAT_2 );
2439 ASSERTMSG( (camUp != 0), MTX_LOOKAT_3 );
2440 ASSERTMSG( (target != 0), MTX_LOOKAT_4 );
2441
2442
2443 // compute unit target vector
2444 // use negative value to look down (-Z) axis
2445 vLook.x = camPos->x - target->x;
2446 vLook.y = camPos->y - target->y;
2447 vLook.z = camPos->z - target->z;
2448 VECNormalize( &vLook,&vLook );
2449
2450
2451 // vRight = camUp x vLook
2452 VECCrossProduct ( camUp, &vLook, &vRight );
2453 VECNormalize( &vRight,&vRight );
2454
2455
2456 // vUp = vLook x vRight
2457 VECCrossProduct( &vLook, &vRight, &vUp );
2458 // Don't need to normalize vUp since it should already be unit length
2459 // VECNormalize( &vUp, &vUp );
2460
2461
2462 m[0][0] = vRight.x;
2463 m[0][1] = vRight.y;
2464 m[0][2] = vRight.z;
2465 m[0][3] = -( camPos->x * vRight.x + camPos->y * vRight.y + camPos->z * vRight.z );
2466
2467 m[1][0] = vUp.x;
2468 m[1][1] = vUp.y;
2469 m[1][2] = vUp.z;
2470 m[1][3] = -( camPos->x * vUp.x + camPos->y * vUp.y + camPos->z * vUp.z );
2471
2472 m[2][0] = vLook.x;
2473 m[2][1] = vLook.y;
2474 m[2][2] = vLook.z;
2475 m[2][3] = -( camPos->x * vLook.x + camPos->y * vLook.y + camPos->z * vLook.z );
2476
2477
2478 }
2479
2480 /*---------------------------------------------------------------------*
2481
2482
2483
2484
2485
2486 TEXTURE PROJECTION SECTION
2487
2488
2489
2490
2491
2492 *---------------------------------------------------------------------*/
2493
2494
2495 /*---------------------------------------------------------------------*
2496
2497 Name: MTXLightFrustum
2498
2499 Description: Compute a 3x4 projection matrix for texture projection
2500
2501
2502 Arguments: m 3x4 matrix to be set
2503
2504 t top coord. of view volume at the near clipping plane
2505
2506 b bottom coord of view volume at the near clipping plane
2507
2508 l left coord. of view volume at near clipping plane
2509
2510 r right coord. of view volume at near clipping plane
2511
2512 n positive distance from camera to near clipping plane
2513
2514 scaleS scale in the S direction for projected coordinates
2515 (usually 0.5)
2516
2517 scaleT scale in the T direction for projected coordinates
2518 (usually 0.5)
2519
2520 transS translate in the S direction for projected coordinates
2521 (usually 0.5)
2522
2523 transT translate in the T direction for projected coordinates
2524 (usually 0.5)
2525
2526
2527 Return: none.
2528
2529 *---------------------------------------------------------------------*/
2530 /*---------------------------------------------------------------------*
2531 C version
2532 *---------------------------------------------------------------------*/
C_MTXLightFrustum(Mtx m,float t,float b,float l,float r,float n,float scaleS,float scaleT,float transS,float transT)2533 void C_MTXLightFrustum ( Mtx m, float t, float b, float l, float r, float n,
2534 float scaleS, float scaleT, float transS,
2535 float transT )
2536 {
2537 f32 tmp;
2538
2539
2540 ASSERTMSG( (m != 0), MTX_LIGHT_FRUSTUM_1 );
2541 ASSERTMSG( (t != b), MTX_LIGHT_FRUSTUM_2 );
2542 ASSERTMSG( (l != r), MTX_LIGHT_FRUSTUM_3 );
2543
2544
2545 // NOTE: Be careful about "l" vs. "1" below!!!
2546
2547 tmp = 1.0f / (r - l);
2548 m[0][0] = ((2*n) * tmp) * scaleS;
2549 m[0][1] = 0.0f;
2550 m[0][2] = (((r + l) * tmp) * scaleS) - transS;
2551 m[0][3] = 0.0f;
2552
2553 tmp = 1.0f / (t - b);
2554 m[1][0] = 0.0f;
2555 m[1][1] = ((2*n) * tmp) * scaleT;
2556 m[1][2] = (((t + b) * tmp) * scaleT) - transT;
2557 m[1][3] = 0.0f;
2558
2559 m[2][0] = 0.0f;
2560 m[2][1] = 0.0f;
2561 m[2][2] = -1.0f;
2562 m[2][3] = 0.0f;
2563 }
2564
2565 /*---------------------------------------------------------------------*
2566
2567 Name: MTXLightPerspective
2568
2569 Description: compute a 3x4 perspective projection matrix from
2570 field of view and aspect ratio for texture projection.
2571
2572
2573 Arguments: m 3x4 matrix to be set
2574
2575 fovy total field of view in in degrees in the YZ plane
2576
2577 aspect ratio of view window width:height (X / Y)
2578
2579 scaleS scale in the S direction for projected coordinates
2580 (usually 0.5)
2581
2582 scaleT scale in the T direction for projected coordinates
2583 (usually 0.5)
2584
2585 transS translate in the S direction for projected coordinates
2586 (usually 0.5)
2587
2588 transT translate in the T direction for projected coordinates
2589 (usually 0.5)
2590
2591
2592 Return: none
2593
2594 *---------------------------------------------------------------------*/
2595 /*---------------------------------------------------------------------*
2596 C version
2597 *---------------------------------------------------------------------*/
C_MTXLightPerspective(Mtx m,f32 fovY,f32 aspect,float scaleS,float scaleT,float transS,float transT)2598 void C_MTXLightPerspective ( Mtx m, f32 fovY, f32 aspect, float scaleS,
2599 float scaleT, float transS, float transT )
2600 {
2601 f32 angle;
2602 f32 cot;
2603
2604 ASSERTMSG( (m != 0), MTX_LIGHT_PERSPECTIVE_1 );
2605 ASSERTMSG( ( (fovY > 0.0) && ( fovY < 180.0) ), MTX_LIGHT_PERSPECTIVE_2 );
2606 ASSERTMSG( (aspect != 0), MTX_LIGHT_PERSPECTIVE_3 );
2607
2608 // find the cotangent of half the (YZ) field of view
2609 angle = fovY * 0.5f;
2610 angle = MTXDegToRad( angle );
2611
2612 cot = 1.0f / tanf(angle);
2613
2614 m[0][0] = (cot / aspect) * scaleS;
2615 m[0][1] = 0.0f;
2616 m[0][2] = -transS;
2617 m[0][3] = 0.0f;
2618
2619 m[1][0] = 0.0f;
2620 m[1][1] = cot * scaleT;
2621 m[1][2] = -transT;
2622 m[1][3] = 0.0f;
2623
2624 m[2][0] = 0.0f;
2625 m[2][1] = 0.0f;
2626 m[2][2] = -1.0f;
2627 m[2][3] = 0.0f;
2628 }
2629
2630 /*---------------------------------------------------------------------*
2631
2632 Name: MTXLightOrtho
2633
2634 Description: compute a 3x4 orthographic projection matrix.
2635
2636
2637 Arguments: m matrix to be set
2638
2639 t top coord. of parallel view volume
2640
2641 b bottom coord of parallel view volume
2642
2643 l left coord. of parallel view volume
2644
2645 r right coord. of parallel view volume
2646
2647 scaleS scale in the S direction for projected coordinates
2648 (usually 0.5)
2649
2650 scaleT scale in the T direction for projected coordinates
2651 (usually 0.5)
2652
2653 transS translate in the S direction for projected coordinates
2654 (usually 0.5)
2655
2656 transT translate in the T direction for projected coordinates
2657 (usually 0.5)
2658
2659
2660 Return: none
2661
2662 *---------------------------------------------------------------------*/
2663 /*---------------------------------------------------------------------*
2664 C version
2665 *---------------------------------------------------------------------*/
C_MTXLightOrtho(Mtx m,f32 t,f32 b,f32 l,f32 r,float scaleS,float scaleT,float transS,float transT)2666 void C_MTXLightOrtho ( Mtx m, f32 t, f32 b, f32 l, f32 r, float scaleS,
2667 float scaleT, float transS, float transT )
2668 {
2669 f32 tmp;
2670
2671
2672 ASSERTMSG( (m != 0), MTX_LIGHT_ORTHO_1 );
2673 ASSERTMSG( (t != b), MTX_LIGHT_ORTHO_2 );
2674 ASSERTMSG( (l != r), MTX_LIGHT_ORTHO_3 );
2675
2676
2677 // NOTE: Be careful about "l" vs. "1" below!!!
2678
2679 tmp = 1.0f / (r - l);
2680 m[0][0] = (2.0f * tmp * scaleS);
2681 m[0][1] = 0.0f;
2682 m[0][2] = 0.0f;
2683 m[0][3] = ((-(r + l) * tmp) * scaleS) + transS;
2684
2685 tmp = 1.0f / (t - b);
2686 m[1][0] = 0.0f;
2687 m[1][1] = (2.0f * tmp) * scaleT;
2688 m[1][2] = 0.0f;
2689 m[1][3] = ((-(t + b) * tmp)* scaleT) + transT;
2690
2691 m[2][0] = 0.0f;
2692 m[2][1] = 0.0f;
2693 m[2][2] = 0.0f;
2694 m[2][3] = 1.0f;
2695 }
2696
2697 /*===========================================================================*/
2698
2699 #if ( __MWERKS__ == 0x00004100 )
2700 #pragma defer_codegen reset
2701 #endif
2702
2703