/*---------------------------------------------------------------------------* Project: matrix vector Library File: mtx.c Copyright 1998 - 2006 Nintendo. All rights reserved. These coded instructions, statements, and computer programs contain proprietary information of Nintendo of America Inc. and/or Nintendo Company Ltd., and are protected by Federal copyright law. They may not be disclosed to third parties or copied or duplicated in any form, in whole or in part, without the prior written consent of Nintendo. $Log: mtx.c,v $ Revision 1.6 2008/05/23 08:21:52 nakano_yoshinobu Improved the performance of PSMTXInverse and PSInvXpose Revision 1.5 2008/05/23 01:50:36 nakano_yoshinobu Fixed overflow in PSMTXInverse. Changed ( E' = 2E - K*E*E ) to ( E' = ( 2 - K * E ) * E ). Revision 1.4 2007/01/11 00:45:26 aka Removed win32.h. Revision 1.3 2006/02/20 04:25:42 mitu changed include path from dolphin/ to revolution/. Revision 1.2 2005/12/30 06:01:51 hirose Temporary workaround for CW3.0 Alpha3 problem. 30 02/06/26 13:49 Hirose Fixed a register allocation problem with DEBUG build. 29 02/06/20 11:21 Hirose Added MTXConcatArray. 28 02/04/11 13:10 Hirose const type specifier support. (worked by Hiratsu@IRD) 27 02/02/18 9:27 Hirose Workaround for floating-point precision mismatch issue. 26 9/13/01 4:28p Hirose Removed unnecessary possibilities of zero division exception. 25 8/28/01 3:48p Hirose Removed one refinement process in the calculation of square root. 24 7/30/01 10:21p Hirose Changes for function definitions. 23 7/23/01 8:45p Hirose Now all model matrix functions and matrix-vector functions have PS version. 22 7/07/01 7:37p Hirose added PS versions of MTXTrans* / MTXScale* functions. 21 3/29/01 8:23p Hirose fixed assertion messages 20 3/29/01 3:06p Hirose added MTXInvXpose 19 3/16/01 11:43p Hirose added PSMTXInverse 18 2/22/01 11:58p Hirose Moved some sections into other files. Added some new Paired-Single functions. Etc. 17 7/31/00 2:07p Carl Removed redundant verify in MTXRotRad. Minor potential optimization in VECNormalize. 16 7/21/00 4:19p Carl Removed as many divides as possible. 15 7/21/00 2:58p Carl Removed unnecessary VECNormalize in MTXLookAt. 14 7/12/00 4:41p John Substitues MTXConcat and MTXMultVecArray with their paired-singles equivalent for Gekko nondebug builds. 13 7/07/00 7:09p Dante PC Compatibility 12 5/10/00 1:50p Hirose added radian-based rotation matrix functions moved paired-single matrix stuff into an another source file 11 4/13/00 11:26a Danm Restore hgh performance version of ROMulVecArray. 10 4/10/00 11:56a Danm Added 2 matrix skinning support. 9 3/27/00 6:38p Carl Fixed other comments. 8 3/27/00 6:20p Carl Changed comment regarding z scale in MTXFrustum. 7 3/22/00 2:01p John Added VECSquareDistance and VECDistance. 6 2/14/00 2:04p Mikepc detabbed code. 5 2/11/00 4:22p Tian Cleaned up reordered paired single code 4 1/27/00 5:21p Tian Documented paired single ops. 3 1/27/00 4:56p Tian Implemented fast paired single matrix ops : PSMTXReorder, PSMTXConcat, PSMTXROMultVecArray, PSMTXMultVecArray, PSMTXMultS16VecArray, PSMTXROMultS16VecArray 2 12/06/99 12:40p Alligator modify proj mtx to scale/translate z to (0, w) range for hardware 20 11/17/99 4:32p Ryan update to fin win32 compiler warnings 19 11/12/99 12:08p Mikepc changed #include to #include"mtxAssert.h" to reflect new location of mtxAssert.h file. 18 11/10/99 7:21p Hirose added singular case check for VECHalfAngle 17 11/10/99 4:40p Alligator added MTXReflect 16 10/06/99 12:28p Yasu Appended MTXMultVecSR and MTXMultVecArraySR 15 8/31/99 5:41p Shiki Revised to use sqrtf() instead of sqrt(). 14 8/31/99 5:18p Shiki Revised to use sinf()/cosf() instead of sin()/cos(). 13 7/28/99 3:38p Ryan 12 7/28/99 11:30a Ryan Added Texture Projection functions 11 7/02/99 12:53p Mikepc changed mtx row/col $NoKeywords: $ *---------------------------------------------------------------------------*/ #include #include #include "mtxAssert.h" #if ( __MWERKS__ == 0x00004100 ) #pragma defer_codegen on // This will be removed when compiler is fixed. #endif /*---------------------------------------------------------------------* Constants *---------------------------------------------------------------------*/ static f32 Unit01[] = { 0.0f, 1.0f }; /*---------------------------------------------------------------------* GENERAL SECTION *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* Name: MTXIdentity Description: sets a matrix to identity Arguments: m : matrix to be set Return: none *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ void C_MTXIdentity ( Mtx m ) { ASSERTMSG( (m != 0), MTX_IDENTITY_1 ); m[0][0] = 1.0f; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = 0.0f; m[1][0] = 0.0f; m[1][1] = 1.0f; m[1][2] = 0.0f; m[1][3] = 0.0f; m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = 1.0f; m[2][3] = 0.0f; } /*---------------------------------------------------------------------* Paired-Single assembler version *---------------------------------------------------------------------* Note that this performs NO error checking. Actually there is not much performance advantage. *---------------------------------------------------------------------*/ #ifdef GEKKO void PSMTXIdentity ( register Mtx m // r3 ) { register f32 c_zero = 0.0F; // { 0.0F, 0.0F }; register f32 c_one = 1.0F; // { 1.0F, 1.0F }; register f32 c_01; register f32 c_10; asm { psq_st c_zero, 8(m), 0, 0 // m[0][2], m[0][3] ps_merge01 c_01, c_zero, c_one // { 0.1F, 1.0F } psq_st c_zero, 24(m), 0, 0 // m[1][2], m[1][3] ps_merge10 c_10, c_one, c_zero // fp2 = { 1.0F, 0.0F } psq_st c_zero, 32(m), 0, 0 // m[2][0], m[2][1] psq_st c_01, 16(m), 0, 0 // m[1][0], m[1][1] psq_st c_10, 0(m), 0, 0 // m[0][0], m[0][1] psq_st c_10, 40(m), 0, 0 // m[2][2], m[2][3] } } #endif // GEKKO /*---------------------------------------------------------------------* Name: MTXCopy Description: copies the contents of one matrix into another Arguments: src source matrix for copy dst destination matrix for copy Return: none *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ void C_MTXCopy ( const Mtx src, Mtx dst ) { ASSERTMSG( (src != 0) , MTX_COPY_1 ); ASSERTMSG( (dst != 0) , MTX_COPY_2 ); if( src == dst ) { return; } dst[0][0] = src[0][0]; dst[0][1] = src[0][1]; dst[0][2] = src[0][2]; dst[0][3] = src[0][3]; dst[1][0] = src[1][0]; dst[1][1] = src[1][1]; dst[1][2] = src[1][2]; dst[1][3] = src[1][3]; dst[2][0] = src[2][0]; dst[2][1] = src[2][1]; dst[2][2] = src[2][2]; dst[2][3] = src[2][3]; } /*---------------------------------------------------------------------* Paired-Single assembler version *---------------------------------------------------------------------* Note that this performs NO error checking. *---------------------------------------------------------------------*/ #ifdef GEKKO asm void PSMTXCopy ( const register Mtx src, // r3 register Mtx dst // r4 ) { nofralloc psq_l fp0, 0(src), 0, 0 psq_st fp0, 0(dst), 0, 0 psq_l fp1, 8(src), 0, 0 psq_st fp1, 8(dst), 0, 0 psq_l fp2, 16(src), 0, 0 psq_st fp2, 16(dst), 0, 0 psq_l fp3, 24(src), 0, 0 psq_st fp3, 24(dst), 0, 0 psq_l fp4, 32(src), 0, 0 psq_st fp4, 32(dst), 0, 0 psq_l fp5, 40(src), 0, 0 psq_st fp5, 40(dst), 0, 0 blr } #endif // GEKKO /*---------------------------------------------------------------------* Name: MTXConcat Description: concatenates two matrices. order of operation is A x B = AB. ok for any of ab == a == b. saves a MTXCopy operation if ab != to a or b. Arguments: a first matrix for concat. b second matrix for concat. ab resultant matrix from concat. Return: none *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ void C_MTXConcat ( const Mtx a, const Mtx b, Mtx ab ) { Mtx mTmp; MtxPtr m; ASSERTMSG( (a != 0), MTX_CONCAT_1 ); ASSERTMSG( (b != 0), MTX_CONCAT_2 ); ASSERTMSG( (ab != 0), MTX_CONCAT_3 ); if( (ab == a) || (ab == b) ) { m = mTmp; } else { m = ab; } // compute (a x b) -> m m[0][0] = a[0][0]*b[0][0] + a[0][1]*b[1][0] + a[0][2]*b[2][0]; m[0][1] = a[0][0]*b[0][1] + a[0][1]*b[1][1] + a[0][2]*b[2][1]; m[0][2] = a[0][0]*b[0][2] + a[0][1]*b[1][2] + a[0][2]*b[2][2]; m[0][3] = a[0][0]*b[0][3] + a[0][1]*b[1][3] + a[0][2]*b[2][3] + a[0][3]; m[1][0] = a[1][0]*b[0][0] + a[1][1]*b[1][0] + a[1][2]*b[2][0]; m[1][1] = a[1][0]*b[0][1] + a[1][1]*b[1][1] + a[1][2]*b[2][1]; m[1][2] = a[1][0]*b[0][2] + a[1][1]*b[1][2] + a[1][2]*b[2][2]; m[1][3] = a[1][0]*b[0][3] + a[1][1]*b[1][3] + a[1][2]*b[2][3] + a[1][3]; m[2][0] = a[2][0]*b[0][0] + a[2][1]*b[1][0] + a[2][2]*b[2][0]; m[2][1] = a[2][0]*b[0][1] + a[2][1]*b[1][1] + a[2][2]*b[2][1]; m[2][2] = a[2][0]*b[0][2] + a[2][1]*b[1][2] + a[2][2]*b[2][2]; m[2][3] = a[2][0]*b[0][3] + a[2][1]*b[1][3] + a[2][2]*b[2][3] + a[2][3]; // overwrite a or b if needed if(m == mTmp) { C_MTXCopy( mTmp, ab ); } } /*---------------------------------------------------------------------* Paired-Single assembler version *---------------------------------------------------------------------* Note that this performs NO error checking. *---------------------------------------------------------------------*/ #ifdef GEKKO asm void PSMTXConcat ( const register Mtx mA, // r3 const register Mtx mB, // r4 register Mtx mAB // r5 ) { nofralloc #define A00_A01 fp0 #define A02_A03 fp1 #define A10_A11 fp2 #define A12_A13 fp3 #define A20_A21 fp4 #define A22_A23 fp5 #define B00_B01 fp6 #define B02_B03 fp7 #define B10_B11 fp8 #define B12_B13 fp9 #define B20_B21 fp10 #define B22_B23 fp11 #define D00_D01 fp12 #define D02_D03 fp13 #define D10_D11 fp14 #define D12_D13 fp15 #define D20_D21 fp2 #define D22_D23 fp0 #define UNIT01 fp31 // don't save LR since we don't make any function calls // mflr r0 // stw r0, 4(r1) stwu r1, -64(r1) psq_l A00_A01, 0(mA), 0, 0 stfd fp14, 8(r1) psq_l B00_B01, 0(mB), 0, 0 addis r6, 0, Unit01@ha psq_l B02_B03, 8(mB), 0, 0 stfd fp15, 16(r1) addi r6, r6, Unit01@l stfd fp31, 40(r1) psq_l B10_B11, 16(mB), 0, 0 // D00_D01 = b00a00 , b01a00 ps_muls0 D00_D01, B00_B01, A00_A01 psq_l A10_A11, 16(mA), 0, 0 // D02_D03 = b02a00 , b03a00 ps_muls0 D02_D03, B02_B03, A00_A01 psq_l UNIT01, 0(r6), 0, 0 // D10_D11 = a10b00 , a10b01 ps_muls0 D10_D11, B00_B01, A10_A11 psq_l B12_B13, 24(mB), 0, 0 // D12_D13 = a10b02 , a10b03 ps_muls0 D12_D13, B02_B03, A10_A11 psq_l A02_A03, 8(mA), 0, 0 // fp12 = b10a01 + b00a00 , b11a01 + b01a00 ps_madds1 D00_D01, B10_B11, A00_A01, D00_D01 psq_l A12_A13, 24(mA), 0, 0 // D10_D11 = a10b00 + a11b10 , a10b01 + a11b11 ps_madds1 D10_D11, B10_B11, A10_A11, D10_D11 psq_l B20_B21, 32(mB), 0, 0 // D02_D03 = b12a01 + b02a00 , b13a01 + b03a00 ps_madds1 D02_D03, B12_B13, A00_A01, D02_D03 // YYY LAST TIME FP0 IS USED psq_l B22_B23, 40(mB), 0, 0 // D12_D13 = a10b02 + a11b12, a10b03+a11b13 ps_madds1 D12_D13, B12_B13, A10_A11, D12_D13 // YYY LAST TIME FP2 IS USED psq_l A20_A21, 32(mA), 0, 0 psq_l A22_A23, 40(mA), 0, 0 // D00_D01 = b20a02 + b10a01 + b00a00 , b21a02 + b11a01 + b01a00 ps_madds0 D00_D01, B20_B21, A02_A03, D00_D01 // m00, m01 computed // D02_D03 = b12a01 + b02a00 + b22a02 , b13a01 + b03a00 + b23a02 ps_madds0 D02_D03, B22_B23, A02_A03, D02_D03 // D10_D11 = a10b00 + a11b10 +a12b20, a10b01 + a11b11 + a12b21 ps_madds0 D10_D11, B20_B21, A12_A13, D10_D11 // m10, m11 computed // D12_D13 = a10b02 + a11b12 + a12b22, a10b03+a11b13 + a12b23 + a13 ps_madds0 D12_D13, B22_B23, A12_A13, D12_D13 // store m00m01 psq_st D00_D01, 0(mAB), 0, 0 // YYY LAST TIME FP12 IS USED // D20_D21 = a20b00, a20b01 ps_muls0 D20_D21, B00_B01, A20_A21 // YYY LAST TIME FP6 IS USED // get a03 from fp1 and add to D02_D03 ps_madds1 D02_D03, UNIT01, A02_A03, D02_D03 // m02, m03 computed // YYY LAST TIME FP1 IS USED // D22_D23 = a20b02, a20b03 ps_muls0 D22_D23, B02_B03, A20_A21 // YYY LAST TIME FP7 IS USED // store m10m11 psq_st D10_D11, 16(mAB), 0, 0 // get a13 from fp3 and add to D12_D13 ps_madds1 D12_D13, UNIT01, A12_A13, D12_D13 // m12, m13 computed // store m02m03 psq_st D02_D03, 8(mAB), 0, 0 // YYY LAST TIME D02_D03 IS USED // D20_D21 = a20b00 + a21b10, a20b01 + a21b11 ps_madds1 D20_D21, B10_B11, A20_A21, D20_D21 // YYY LAST TIME FP8 IS USED // D22_D23 = a20b02 + a21b12, a20b03 + a21b13 ps_madds1 D22_D23, B12_B13, A20_A21, D22_D23 // D20_D21 = a20b00 + a21b10 + a22b20, a20b01 + a21b11 + a22b21 ps_madds0 D20_D21, B20_B21, A22_A23, D20_D21 // Restore fp14 lfd fp14, 8(r1) // D10_D11 // store m12m13 psq_st D12_D13, 24(mAB), 0, 0 // D22_D23 = a20b02 + a21b12 + a22b22, a20b03 + a21b13 + a22b23 + a23 ps_madds0 D22_D23, B22_B23, A22_A23, D22_D23 // store m20m21 psq_st D20_D21, 32(mAB), 0, 0 // get a23 from fp5 and add to fp17 ps_madds1 D22_D23, UNIT01, A22_A23, D22_D23 // restore stack frame lfd fp15, 16(r1) // D12_D13 // store m22m23 psq_st D22_D23, 40(mAB), 0, 0 lfd fp31, 40(r1) addi r1, r1, 64 blr #undef A00_A01 #undef A02_A03 #undef A10_A11 #undef A12_A13 #undef A20_A21 #undef A22_A23 #undef B00_B01 #undef B02_B03 #undef B10_B11 #undef B12_B13 #undef B20_B21 #undef B22_B23 #undef D00_D01 #undef D02_D03 #undef D10_D11 #undef D12_D13 #undef D20_D21 #undef D22_D23 #undef UNIT01 /* / m[0][0] = a[0][0]*b[0][0] + a[0][1]*b[1][0] + a[0][2]*b[2][0]; fp12m[0][1] = a[0][0]*b[0][1] + a[0][1]*b[1][1] + a[0][2]*b[2][1]; / m[0][2] = a[0][0]*b[0][2] + a[0][1]*b[1][2] + a[0][2]*b[2][2]; fp13m[0][3] = a[0][0]*b[0][3] + a[0][1]*b[1][3] + a[0][2]*b[2][3] + a[0][3]; / m[1][0] = a[1][0]*b[0][0] + a[1][1]*b[1][0] + a[1][2]*b[2][0]; fp14m[1][1] = a[1][0]*b[0][1] + a[1][1]*b[1][1] + a[1][2]*b[2][1]; / m[1][2] = a[1][0]*b[0][2] + a[1][1]*b[1][2] + a[1][2]*b[2][2]; fp15m[1][3] = a[1][0]*b[0][3] + a[1][1]*b[1][3] + a[1][2]*b[2][3] + a[1][3]; / m[2][0] = a[2][0]*b[0][0] + a[2][1]*b[1][0] + a[2][2]*b[2][0]; fp16m[2][1] = a[2][0]*b[0][1] + a[2][1]*b[1][1] + a[2][2]*b[2][1]; / m[2][2] = a[2][0]*b[0][2] + a[2][1]*b[1][2] + a[2][2]*b[2][2]; fp17m[2][3] = a[2][0]*b[0][3] + a[2][1]*b[1][3] + a[2][2]*b[2][3] + a[2][3]; OPTIMIZATIONS : fp0 instead of fp17 fp2 instead of fp16 psq_l fp0, 0(mA), 0, 0 // a[0][0], a[0][1] psq_l fp1, 8(mA), 0, 0 // a[0][2], a[0][3] psq_l fp2, 16(mA), 0, 0 // a[1][0], a[1][1] psq_l fp3, 24(mA), 0, 0 // a[1][2], a[1][3] psq_l fp4, 32(mA), 0, 0 // a[2][0], a[2][1] psq_l fp5, 40(mA), 0, 0 // a[2][2], a[2][3] psq_l fp6, 0(mB), 0, 0 // b[0][0], b[0][1] psq_l fp7, 8(mB), 0, 0 // b[0][2], b[0][3] psq_l fp8, 16(mB), 0, 0 // b[1][0], b[1][1] psq_l fp9, 24(mB), 0, 0 // b[1][2], b[1][3] psq_l fp10, 32(mB), 0, 0 // b[2][0], b[2][1] psq_l fp11, 40(mB), 0, 0 // b[2][2], b[2][3] */ } #endif // GEKKO /*---------------------------------------------------------------------* Name: MTXConcatArray Description: concatenates a matrix to an array of matrices. order of operation is A x B(array) = AB(array). Arguments: a first matrix for concat. srcBase array base of second matrix for concat. dstBase array base of resultant matrix from concat. count number of matrices in srcBase, dstBase arrays. note: cannot check for array overflow Return: none *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ void C_MTXConcatArray ( const Mtx a, const Mtx* srcBase, Mtx* dstBase, u32 count ) { u32 i; ASSERTMSG( (a != 0), "MTXConcatArray(): NULL MtxPtr 'a' " ); ASSERTMSG( (srcBase != 0), "MTXConcatArray(): NULL MtxPtr 'srcBase' " ); ASSERTMSG( (dstBase != 0), "MTXConcatArray(): NULL MtxPtr 'dstBase' " ); ASSERTMSG( (count > 1), "MTXConcatArray(): count must be greater than 1." ); for ( i = 0 ; i < count ; i++ ) { C_MTXConcat(a, *srcBase, *dstBase); srcBase++; dstBase++; } } /*---------------------------------------------------------------------* Paired-Single assembler version *---------------------------------------------------------------------* Note that this performs NO error checking. *---------------------------------------------------------------------*/ #ifdef GEKKO // This code can be compiled only with register allocation optimization. #if ( defined(__MWERKS__) && defined(_DEBUG) ) #pragma global_optimizer on #pragma optimization_level 1 #endif void PSMTXConcatArray ( const register Mtx a, const register Mtx* srcBase, register Mtx* dstBase, register u32 count ) { register f32 va0, va1, va2, va3, va4, va5; register f32 vb0, vb1, vb2, vb3, vb4, vb5; register f32 vd0, vd1, vd2, vd3, vd4, vd5; register f32 u01; register f32* u01Ptr = Unit01; asm { // [a00][a01] psq_l va0, 0(a), 0, 0 // [a02][a03] psq_l va1, 8(a), 0, 0 // [a10][a11] psq_l va2, 16(a), 0, 0 // [a12][a13] psq_l va3, 24(a), 0, 0 // count-- subi count, count, 1 // [a20][a21] psq_l va4, 32(a), 0, 0 // [a22][a23] psq_l va5, 40(a), 0, 0 // Loop count mtctr count // [0][1] psq_l u01, 0(u01Ptr), 0, 0 //--------------------------------- // [b00][b01] psq_l vb0, 0(srcBase), 0, 0 // [b10][b11] psq_l vb2, 16(srcBase), 0, 0 // [a00*b00][a00*b01] ps_muls0 vd0, vb0, va0 // [a10*b00][a10*b01] ps_muls0 vd2, vb0, va2 // [a20*b00][a20*b01] ps_muls0 vd4, vb0, va4 // [b20][b21] psq_l vb4, 32(srcBase), 0, 0 // [a00*b00 + a01*b10][a00*b01 + a01*b11] ps_madds1 vd0, vb2, va0, vd0 // [a10*b00 + a11*b10][a10*b01 + a11*b11] ps_madds1 vd2, vb2, va2, vd2 // [a20*b00 + a21*b10][a20*b01 + a21*b11] ps_madds1 vd4, vb2, va4, vd4 // [b02][b03] psq_l vb1, 8(srcBase), 0, 0 // [a00*b00 + a01*b10 + a02*b20][a00*b01 + a01*b11 + a02*b21] ps_madds0 vd0, vb4, va1, vd0 // [a10*b00 + a11*b10 + a12*b20][a10*b01 + a11*b11 + a12*b21] ps_madds0 vd2, vb4, va3, vd2 // [a20*b00 + a21*b10 + a22*b20][a20*b01 + a21*b11 + a22*b21] ps_madds0 vd4, vb4, va5, vd4 // [b12][b13] psq_l vb3, 24(srcBase), 0, 0 // [a00*b00 + a01*b10 + a02*b20][a00*b01 + a01*b11 + a02*b21] psq_st vd0, 0(dstBase), 0, 0 // [a00*b02][a00*b03] ps_muls0 vd1, vb1, va0 // [a10*b02][a10*b03] ps_muls0 vd3, vb1, va2 // [a20*b02][a20*b03] ps_muls0 vd5, vb1, va4 // [b22][b23] psq_l vb5, 40(srcBase), 0, 0 // [a10*b00 + a11*b10 + a12*b20][a10*b01 + a11*b11 + a12*b21] psq_st vd2, 16(dstBase), 0, 0 // [a00*b02 + a01*b12][a00*b03 + a01*b13] ps_madds1 vd1, vb3, va0, vd1 // [a10*b02 + a11*b12][a10*b03 + a11*b13] ps_madds1 vd3, vb3, va2, vd3 // [a20*b02 + a21*b12][a20*b03 + a21*b13] ps_madds1 vd5, vb3, va4, vd5 _loop: // ++srcBase addi srcBase, srcBase, sizeof(Mtx) // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23] ps_madds0 vd1, vb5, va1, vd1 // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23] ps_madds0 vd3, vb5, va3, vd3 // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23] ps_madds0 vd5, vb5, va5, vd5 // [b00][b01] psq_l vb0, 0(srcBase), 0, 0 // [a20*b00 + a21*b10 + a22*b20][a20*b01 + a21*b11 + a22*b21] psq_st vd4, 32(dstBase), 0, 0 // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23 + a03] ps_madd vd1, u01, va1, vd1 // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23 + a13] ps_madd vd3, u01, va3, vd3 // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23 + a23] ps_madd vd5, u01, va5, vd5 // [b10][b11] psq_l vb2, 16(srcBase), 0, 0 // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23 + a03] psq_st vd1, 8(dstBase), 0, 0 // [a00*b00][a00*b01] ps_muls0 vd0, vb0, va0 // [a10*b00][a10*b01] ps_muls0 vd2, vb0, va2 // [a20*b00][a20*b01] ps_muls0 vd4, vb0, va4 // [b20][b21] psq_l vb4, 32(srcBase), 0, 0 // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23 + a13] psq_st vd3, 24(dstBase), 0, 0 // [a00*b00 + a01*b10][a00*b01 + a01*b11] ps_madds1 vd0, vb2, va0, vd0 // [a10*b00 + a11*b10][a10*b01 + a11*b11] ps_madds1 vd2, vb2, va2, vd2 // [a20*b00 + a21*b10][a20*b01 + a21*b11] ps_madds1 vd4, vb2, va4, vd4 // [b02][b03] psq_l vb1, 8(srcBase), 0, 0 // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23 + a23] psq_st vd5, 40(dstBase), 0, 0 // ++dstBase addi dstBase, dstBase, sizeof(Mtx) // [a00*b00 + a01*b10 + a02*b20][a00*b01 + a01*b11 + a02*b21] ps_madds0 vd0, vb4, va1, vd0 // [a10*b00 + a11*b10 + a12*b20][a10*b01 + a11*b11 + a12*b21] ps_madds0 vd2, vb4, va3, vd2 // [a20*b00 + a21*b10 + a22*b20][a20*b01 + a21*b11 + a22*b21] ps_madds0 vd4, vb4, va5, vd4 // [b12][b13] psq_l vb3, 24(srcBase), 0, 0 // [a00*b00 + a01*b10 + a02*b20][a00*b01 + a01*b11 + a02*b21] psq_st vd0, 0(dstBase), 0, 0 // [a00*b02][a00*b03] ps_muls0 vd1, vb1, va0 // [a10*b02][a10*b03] ps_muls0 vd3, vb1, va2 // [a20*b02][a20*b03] ps_muls0 vd5, vb1, va4 // [b22][b23] psq_l vb5, 40(srcBase), 0, 0 // [a10*b00 + a11*b10 + a12*b20][a10*b01 + a11*b11 + a12*b21] psq_st vd2, 16(dstBase), 0, 0 // [a00*b02 + a01*b12][a00*b03 + a01*b13] ps_madds1 vd1, vb3, va0, vd1 // [a10*b02 + a11*b12][a10*b03 + a11*b13] ps_madds1 vd3, vb3, va2, vd3 // [a20*b02 + a21*b12][a20*b03 + a21*b13] ps_madds1 vd5, vb3, va4, vd5 // LOOP bdnz _loop // [a20*b00 + a21*b10 + a22*b20][a20*b01 + a21*b11 + a22*b21] psq_st vd4, 32(dstBase), 0, 0 // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23] ps_madds0 vd1, vb5, va1, vd1 // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23] ps_madds0 vd3, vb5, va3, vd3 // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23] ps_madds0 vd5, vb5, va5, vd5 // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23 + a03] ps_madd vd1, u01, va1, vd1 // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23 + a13] ps_madd vd3, u01, va3, vd3 // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23 + a23] ps_madd vd5, u01, va5, vd5 // [a00*b02 + a01*b12 + a02*b22][a00*b03 + a01*b13 + a02*b23 + a03] psq_st vd1, 8(dstBase), 0, 0 // [a10*b02 + a11*b12 + a12*b22][a10*b03 + a11*b13 + a12*b23 + a13] psq_st vd3, 24(dstBase), 0, 0 // [a20*b02 + a21*b12 + a22*b22][a20*b03 + a21*b13 + a22*b23 + a23] psq_st vd5, 40(dstBase), 0, 0 //--------------------------------- } } #if ( defined(__MWERKS__) && defined(_DEBUG) ) #pragma optimization_level 0 #pragma global_optimizer reset #endif #endif /*---------------------------------------------------------------------* Name: MTXTranspose Description: computes the transpose of a matrix. As matrices are 3x4, fourth column (translation component) is lost and becomes (0,0,0). This function is intended for use in computing an inverse-transpose matrix to transform normals for lighting. In this case, lost translation component doesn't matter. Arguments: src source matrix. xPose destination (transposed) matrix. ok if src == xPose. Return: none *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ void C_MTXTranspose ( const Mtx src, Mtx xPose ) { Mtx mTmp; MtxPtr m; ASSERTMSG( (src != 0), MTX_TRANSPOSE_1 ); ASSERTMSG( (xPose != 0), MTX_TRANSPOSE_2 ); if(src == xPose) { m = mTmp; } else { m = xPose; } m[0][0] = src[0][0]; m[0][1] = src[1][0]; m[0][2] = src[2][0]; m[0][3] = 0.0f; m[1][0] = src[0][1]; m[1][1] = src[1][1]; m[1][2] = src[2][1]; m[1][3] = 0.0f; m[2][0] = src[0][2]; m[2][1] = src[1][2]; m[2][2] = src[2][2]; m[2][3] = 0.0f; // copy back if needed if( m == mTmp ) { C_MTXCopy( mTmp, xPose ); } } /*---------------------------------------------------------------------* Paired-Single assembler version *---------------------------------------------------------------------* Note that this performs NO error checking. *---------------------------------------------------------------------*/ #ifdef GEKKO void PSMTXTranspose ( const register Mtx src, register Mtx xPose ) { register f32 c_zero = 0.0F; register f32 row0a, row1a, row0b, row1b; register f32 trns0, trns1, trns2; asm { psq_l row0a, 0(src), 0, 0 // [0][0], [0][1] stfs c_zero, 44(xPose) // 0 -> [2][3] psq_l row1a, 16(src), 0, 0 // [1][0], [1][1] ps_merge00 trns0, row0a, row1a // [0][0], [1][0] psq_l row0b, 8(src), 1, 0 // [0][2], 1 ps_merge11 trns1, row0a, row1a // [0][1], [1][1] psq_l row1b, 24(src), 1, 0 // [1][2], 1 psq_st trns0, 0(xPose), 0, 0 // [0][0], [1][0] -> [0][0], [0][1] psq_l row0a, 32(src), 0, 0 // [2][0], [2][1] ps_merge00 trns2, row0b, row1b // [0][2], [1][2] psq_st trns1, 16(xPose), 0, 0 // [0][1], [1][1] -> [1][0], [1][1] ps_merge00 trns0, row0a, c_zero // [2][0], 0 psq_st trns2, 32(xPose), 0, 0 // [0][2], [1][2] -> [2][0], [2][1] ps_merge10 trns1, row0a, c_zero // [2][1], 0 psq_st trns0, 8(xPose), 0, 0 // [2][0], 0 -> [0][2], [0][3] lfs row0b, 40(src) // [2][2] psq_st trns1, 24(xPose), 0, 0 // [2][1], 0 -> [1][2], [1][3] stfs row0b, 40(xPose) // [2][2] -> [2][2] } } #endif // GEKKO /*---------------------------------------------------------------------* Name: MTXInverse Description: computes a fast inverse of a matrix. this algorithm works for matrices with a fourth row of (0,0,0,1). for a matrix M = | A C | where A is the upper 3x3 submatrix, | 0 1 | C is a 1x3 column vector INV(M) = | inv(A) (inv(A))*(-C) | | 0 1 | Arguments: src source matrix. inv destination (inverse) matrix. ok if src == inv. Return: 0 if src is not invertible. 1 on success. *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ u32 C_MTXInverse ( const Mtx src, Mtx inv ) { Mtx mTmp; MtxPtr m; f32 det; ASSERTMSG( (src != 0), MTX_INVERSE_1 ); ASSERTMSG( (inv != 0), MTX_INVERSE_2 ); if( src == inv ) { m = mTmp; } else { m = inv; } // compute the determinant of the upper 3x3 submatrix det = src[0][0]*src[1][1]*src[2][2] + src[0][1]*src[1][2]*src[2][0] + src[0][2]*src[1][0]*src[2][1] - src[2][0]*src[1][1]*src[0][2] - src[1][0]*src[0][1]*src[2][2] - src[0][0]*src[2][1]*src[1][2]; // check if matrix is singular if( det == 0.0f ) { return 0; } // compute the inverse of the upper submatrix: // find the transposed matrix of cofactors of the upper submatrix // and multiply by (1/det) det = 1.0f / det; m[0][0] = (src[1][1]*src[2][2] - src[2][1]*src[1][2]) * det; m[0][1] = -(src[0][1]*src[2][2] - src[2][1]*src[0][2]) * det; m[0][2] = (src[0][1]*src[1][2] - src[1][1]*src[0][2]) * det; m[1][0] = -(src[1][0]*src[2][2] - src[2][0]*src[1][2]) * det; m[1][1] = (src[0][0]*src[2][2] - src[2][0]*src[0][2]) * det; m[1][2] = -(src[0][0]*src[1][2] - src[1][0]*src[0][2]) * det; m[2][0] = (src[1][0]*src[2][1] - src[2][0]*src[1][1]) * det; m[2][1] = -(src[0][0]*src[2][1] - src[2][0]*src[0][1]) * det; m[2][2] = (src[0][0]*src[1][1] - src[1][0]*src[0][1]) * det; // compute (invA)*(-C) m[0][3] = -m[0][0]*src[0][3] - m[0][1]*src[1][3] - m[0][2]*src[2][3]; m[1][3] = -m[1][0]*src[0][3] - m[1][1]*src[1][3] - m[1][2]*src[2][3]; m[2][3] = -m[2][0]*src[0][3] - m[2][1]*src[1][3] - m[2][2]*src[2][3]; // copy back if needed if( m == mTmp ) { C_MTXCopy( mTmp,inv ); } return 1; } /*---------------------------------------------------------------------* Paired-Single assembler version *---------------------------------------------------------------------* Note that this performs NO error checking. Results may be a little bit different from the C version because it doesn't perform exactly same calculation. *---------------------------------------------------------------------*/ #ifdef GEKKO asm u32 PSMTXInverse ( const register Mtx src, register Mtx inv ) { nofralloc // fp0 [ 00 ][ 1.0F ] : Load psq_l fp0, 0( src ), 1, 0; // fp1 [ 01 ][ 02 ] : Load psq_l fp1, 4( src ), 0, 0; // fp2 [ 10 ][ 1.0F ] : Load psq_l fp2, 16( src ), 1, 0; // fp6 [ 02 ][ 00 ] ps_merge10 fp6, fp1, fp0; // fp3 [ 11 ][ 12 ] : Load psq_l fp3, 20( src ), 0, 0; // fp4 [ 20 ][ 1.0F ] : Load psq_l fp4, 32( src ), 1, 0; // fp7 [ 12 ][ 10 ] ps_merge10 fp7, fp3, fp2; // fp5 [ 21 ][ 22 ] : Load psq_l fp5, 36( src ), 0, 0; // fp11[ 11*02 ][ 00*12 ] ps_mul fp11, fp3, fp6; // fp8 [ 22 ][ 20 ] ps_merge10 fp8, fp5, fp4; // fp13[ 21*12 ][ 10*22 ] ps_mul fp13, fp5, fp7; // fp11[ 01*12 - 11*02 ][ 10*02 - 00*12 ] ps_msub fp11, fp1, fp7, fp11; // fp12[ 01*22 ][ 20*02 ] ps_mul fp12, fp1, fp8; // fp13[ 11*22 - 21*12 ][ 20*12 - 10*22 ] ps_msub fp13, fp3, fp8, fp13; // fp10[ 20*11 ][ N/A ] ps_mul fp10, fp3, fp4; // fp12[ 21*02 - 01*22 ][ 00*22 - 20*02 ] ps_msub fp12, fp5, fp6, fp12; // fp7 [ 00*(11*22-21*12) ][ N/A ] ps_mul fp7, fp0, fp13; // fp9 [ 00*21 ][ N/A ] ps_mul fp9, fp0, fp5; // fp8 [ 10*01 ][ N/A ] ps_mul fp8, fp1, fp2; // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) ][ N/A ] ps_madd fp7, fp2, fp12, fp7; // fp6 [ 0.0F ][ 0.0F ] ps_sub fp6, fp6, fp6; // fp10[ 10*21 - 20*11 ][ N/A ] ps_msub fp10, fp2, fp5, fp10; // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) + 20*(01*12-11*02) ][ N/A ] : det ps_madd fp7, fp4, fp11, fp7; // fp9 [ 20*01 - 00*21 ][ N/A ] ps_msub fp9, fp1, fp4, fp9; // fp8 [ 00*11 - 10*01 ][ N/A ] ps_msub fp8, fp0, fp3, fp8; // ( det == 0 ) ? ps_cmpo0 cr0, fp7, fp6; bne _regular; // return value (singular) addi r3, 0, 0; blr; _regular: // fp0 [ 1/det ][ N/A ] fres fp0, fp7; // Newton's approximation // Refinement : ( E = est. of 1/K ) -> ( E' = ( 2 - K * E ) * E ) ps_add fp6, fp0, fp0 ps_mul fp5, fp7, fp0 ps_nmsub fp0, fp0, fp5, fp6 // fp1 [ 03 ][ 03 ] : Load lfs fp1, 12(src); // fp13[ ( 11*22 - 21*12 ) * rdet ][ ( 20*12 - 10*22 ) * rdet ] : i[0][0], i[1][0] ps_muls0 fp13, fp13, fp0; // fp2 [ 13 ][ 13 ] : Load lfs fp2, 28(src); // fp12[ ( 21*02 - 01*22 ) * rdet ][ ( 00*22 - 20*02 ) * rdet ] : i[0][1], i[1][1] ps_muls0 fp12, fp12, fp0; // fp3 [ 23 ][ 23 ] : Load lfs fp3, 44(src); // fp11[ ( 01*12 - 11*02 ) * rdet ][ ( 10*02 - 00*12 ) * rdet ] : i[0][2], i[1][2] ps_muls0 fp11, fp11, fp0; // fp5 [ i00 ][ i01 ] ps_merge00 fp5, fp13, fp12; // fp4 [ i10 ][ i11 ] ps_merge11 fp4, fp13, fp12; // fp6 [ i00*03 ][ i10*03 ] ps_mul fp6, fp13, fp1; // [ i00 ][ i01 ] : Store fp5 -> free(fp5[ i00 ][ i01 ]) psq_st fp5, 0(inv), 0, 0; // [ i10 ][ i11 ] : Store fp4 -> free(fp4[ i10 ][ i11 ]) psq_st fp4, 16(inv), 0, 0; // fp10[ ( 10*21 - 20*11 ) * rdet ] : i[2][0] ps_muls0 fp10, fp10, fp0; // fp9 [ ( 20*01 - 00*21 ) * rdet ] : i[2][1] ps_muls0 fp9, fp9, fp0; // fp6 [ i00*03+i01*13 ][ i10*03+i11*13 ] ps_madd fp6, fp12, fp2, fp6; // [ i20 ] : Store fp10 psq_st fp10, 32(inv), 1, 0; // fp8 [ ( 00*11 - 10*01 ) * rdet ] : i[2][2] ps_muls0 fp8, fp8, fp0; // fp6 [ -i00*03-i01*13-i02*23 ][ -i10*03-i11*13-i12*23 ] : i[0][3], i[1][3] ps_nmadd fp6, fp11, fp3, fp6; // [ i21 ] : Store fp9 psq_st fp9, 36(inv), 1, 0; // fp7 [ i20*03 ][ N/A ] ps_mul fp7, fp10, fp1; // fp5 [ i02 ][ i03 ] ps_merge00 fp5, fp11, fp6; // [ i22 ] : Store fp8 psq_st fp8, 40(inv), 1, 0; // fp7 [ i20*03+i21*13 ][ N/A ] ps_madd fp7, fp9, fp2, fp7; // fp4 [ i12 ][ i13 ] ps_merge11 fp4, fp11, fp6; // [ i02 ][ i03 ] : Store fp5 psq_st fp5, 8(inv), 0, 0; // fp7 [ -i20*03-i21*13-i22*23 ][ N/A ] : i[2][3] ps_nmadd fp7, fp8, fp3, fp7; // [ i12 ][ i13 ] : Store fp4 psq_st fp4, 24(inv), 0, 0; // [ i23 ] : Store fp7 psq_st fp7, 44(inv), 1, 0; // return value (regular) addi r3, 0, 1; blr; } #endif // GEKKO /*---------------------------------------------------------------------* Name: MTXInvXpose Description: computes a fast inverse-transpose of a matrix. this algorithm works for matrices with a fourth row of (0,0,0,1). Commonly used for calculating normal transform matrices. This function is equivalent to the combination of two functions MTXInverse + MTXTranspose. Arguments: src source matrix. invx destination (inverse-transpose) matrix. ok if src == invx. Return: 0 if src is not invertible. 1 on success. *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ u32 C_MTXInvXpose ( const Mtx src, Mtx invX ) { Mtx mTmp; MtxPtr m; f32 det; ASSERTMSG( (src != 0), MTX_INVXPOSE_1 ); ASSERTMSG( (invX != 0), MTX_INVXPOSE_2 ); if( src == invX ) { m = mTmp; } else { m = invX; } // compute the determinant of the upper 3x3 submatrix det = src[0][0]*src[1][1]*src[2][2] + src[0][1]*src[1][2]*src[2][0] + src[0][2]*src[1][0]*src[2][1] - src[2][0]*src[1][1]*src[0][2] - src[1][0]*src[0][1]*src[2][2] - src[0][0]*src[2][1]*src[1][2]; // check if matrix is singular if( det == 0.0f ) { return 0; } // compute the inverse-transpose of the upper submatrix: // find the transposed matrix of cofactors of the upper submatrix // and multiply by (1/det) det = 1.0f / det; m[0][0] = (src[1][1]*src[2][2] - src[2][1]*src[1][2]) * det; m[0][1] = -(src[1][0]*src[2][2] - src[2][0]*src[1][2]) * det; m[0][2] = (src[1][0]*src[2][1] - src[2][0]*src[1][1]) * det; m[1][0] = -(src[0][1]*src[2][2] - src[2][1]*src[0][2]) * det; m[1][1] = (src[0][0]*src[2][2] - src[2][0]*src[0][2]) * det; m[1][2] = -(src[0][0]*src[2][1] - src[2][0]*src[0][1]) * det; m[2][0] = (src[0][1]*src[1][2] - src[1][1]*src[0][2]) * det; m[2][1] = -(src[0][0]*src[1][2] - src[1][0]*src[0][2]) * det; m[2][2] = (src[0][0]*src[1][1] - src[1][0]*src[0][1]) * det; // the fourth columns should be all zero m[0][3] = 0.0F; m[1][3] = 0.0F; m[2][3] = 0.0F; // copy back if needed if( m == mTmp ) { C_MTXCopy( mTmp,invX ); } return 1; } /*---------------------------------------------------------------------* Paired-Single assembler version *---------------------------------------------------------------------* Note that this performs NO error checking. Results may be a little bit different from the C version because it doesn't perform exactly same calculation. *---------------------------------------------------------------------*/ #ifdef GEKKO asm u32 PSMTXInvXpose ( const register Mtx src, register Mtx invX ) { nofralloc // fp0 [ 00 ][ 1.0F ] : Load psq_l fp0, 0( src ), 1, 0; // fp1 [ 01 ][ 02 ] : Load psq_l fp1, 4( src ), 0, 0; // fp2 [ 10 ][ 1.0F ] : Load psq_l fp2, 16( src ), 1, 0; // fp6 [ 02 ][ 00 ] ps_merge10 fp6, fp1, fp0; // fp3 [ 11 ][ 12 ] : Load psq_l fp3, 20( src ), 0, 0; // fp4 [ 20 ][ 1.0F ] : Load psq_l fp4, 32( src ), 1, 0; // fp7 [ 12 ][ 10 ] ps_merge10 fp7, fp3, fp2; // fp5 [ 21 ][ 22 ] : Load psq_l fp5, 36( src ), 0, 0; // fp11[ 11*02 ][ 00*12 ] ps_mul fp11, fp3, fp6; // fp8 [ 22 ][ 20 ] ps_merge10 fp8, fp5, fp4; // fp13[ 21*12 ][ 10*22 ] ps_mul fp13, fp5, fp7; // fp11[ 01*12 - 11*02 ][ 10*02 - 00*12 ] ps_msub fp11, fp1, fp7, fp11; // fp12[ 01*22 ][ 20*02 ] ps_mul fp12, fp1, fp8; // fp13[ 11*22 - 21*12 ][ 20*12 - 10*22 ] ps_msub fp13, fp3, fp8, fp13; // fp10[ 20*11 ][ N/A ] ps_mul fp10, fp3, fp4; // fp12[ 21*02 - 01*22 ][ 00*22 - 20*02 ] ps_msub fp12, fp5, fp6, fp12; // fp7 [ 00*(11*22-21*12) ][ N/A ] ps_mul fp7, fp0, fp13; // fp9 [ 00*21 ][ N/A ] ps_mul fp9, fp0, fp5; // fp8 [ 10*01 ][ N/A ] ps_mul fp8, fp1, fp2; // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) ][ N/A ] ps_madd fp7, fp2, fp12, fp7; // fp6 [ 0.0F ][ 0.0F ] ps_sub fp6, fp6, fp6; // fp10[ 10*21 - 20*11 ][ N/A ] ps_msub fp10, fp2, fp5, fp10; // fp7 [ 00*(11*22-21*12) + 10*(21*02-01*22) + 20*(01*12-11*02) ][ N/A ] : det ps_madd fp7, fp4, fp11, fp7; // fp9 [ 20*01 - 00*21 ][ N/A ] ps_msub fp9, fp1, fp4, fp9; // fp8 [ 00*11 - 10*01 ][ N/A ] ps_msub fp8, fp0, fp3, fp8; // ( det == 0 ) ? ps_cmpo0 cr0, fp7, fp6; //bne _regular; bne _regular; // return value (singular) addi r3, 0, 0; blr; _regular: // fp0 [ 1/det ][ N/A ] fres fp0, fp7; psq_st fp6, 12(invX),1, 0 // Newton's approximation // Refinement : ( E = est. of 1/K ) -> ( E' = ( 2 - K * E ) * E ) ps_add fp4, fp0, fp0 ps_mul fp5, fp7, fp0 psq_st fp6, 28(invX),1, 0 ps_nmsub fp0, fp0, fp5, fp4 psq_st fp6, 44(invX),1, 0 // fp13[ ( 11*22 - 21*12 ) * rdet ][ ( 20*12 - 10*22 ) * rdet ] : ix[0][0], ix[0][1] ps_muls0 fp13, fp13, fp0; // fp12[ ( 21*02 - 01*22 ) * rdet ][ ( 00*22 - 20*02 ) * rdet ] : ix[1][0], ix[1][1] ps_muls0 fp12, fp12, fp0; // [ ix00 ][ ix01 ] : Store fp13 psq_st fp13, 0( invX ), 0, 0; // fp11[ ( 01*12 - 11*02 ) * rdet ][ ( 10*02 - 00*12 ) * rdet ] : ix[2][0], ix[2][1] ps_muls0 fp11, fp11, fp0; // [ ix10 ][ ix11 ] : Store fp12 psq_st fp12, 16( invX ), 0, 0; // fp10[ ( 10*21 - 20*11 ) * rdet ] : i[0][2] ps_muls0 fp10, fp10, fp0; // [ ix20 ][ ix21 ] : Store fp11 psq_st fp11, 32( invX ), 0, 0; // fp9 [ ( 20*01 - 00*21 ) * rdet ] : i[1][2] ps_muls0 fp9, fp9, fp0; // [ ix02 ] : Store fp10 psq_st fp10, 8( invX ), 1, 0; // fp8 [ ( 00*11 - 10*01 ) * rdet ] : i[2][2] ps_muls0 fp8, fp8, fp0; // [ ix12 ] : Store fp9 psq_st fp9, 24( invX ), 1, 0; // [ ix22 ] : Store fp8 psq_st fp8, 40( invX ), 1, 0; // return value (regular) addi r3, 0, 1; blr; } #endif /*---------------------------------------------------------------------* MODEL SECTION *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* Name: MTXRotDeg Description: sets a rotation matrix about one of the X, Y or Z axes Arguments: m matrix to be set axis major axis about which to rotate. axis is passed in as a character. it must be one of 'X', 'x', 'Y', 'y', 'Z', 'z' deg rotation angle in degrees. note: counter-clockwise rotation is positive. Return: none *---------------------------------------------------------------------*/ // The function is defined as a macro in mtx.h /* void MTXRotDeg ( Mtx m, char axis, f32 deg ) { MTXRotRad( m, axis, MTXDegToRad( deg ) ); } */ /*---------------------------------------------------------------------* Name: MTXRotRad Description: sets a rotation matrix about one of the X, Y or Z axes Arguments: m matrix to be set axis major axis about which to rotate. axis is passed in as a character. it must be one of 'X', 'x', 'Y', 'y', 'Z', 'z' deg rotation angle in radians. note: counter-clockwise rotation is positive. Return: none *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ void C_MTXRotRad ( Mtx m, char axis, f32 rad ) { f32 sinA, cosA; ASSERTMSG( (m != 0), MTX_ROTRAD_1 ); // verification of "axis" will occur in MTXRotTrig sinA = sinf(rad); cosA = cosf(rad); C_MTXRotTrig( m, axis, sinA, cosA ); } /*---------------------------------------------------------------------* Paired-Single assembler version *---------------------------------------------------------------------* Note that this performs NO error checking. *---------------------------------------------------------------------*/ #ifdef GEKKO void PSMTXRotRad ( Mtx m, char axis, f32 rad ) { f32 sinA, cosA; sinA = sinf(rad); cosA = cosf(rad); PSMTXRotTrig( m, axis, sinA, cosA ); } #endif // GEKKO /*---------------------------------------------------------------------* Name: MTXRotTrig Description: sets a rotation matrix about one of the X, Y or Z axes from specified trig ratios Arguments: m matrix to be set axis major axis about which to rotate. axis is passed in as a character. It must be one of 'X', 'x', 'Y', 'y', 'Z', 'z' sinA sine of rotation angle. cosA cosine of rotation angle. note: counter-clockwise rotation is positive. Return: none *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ void C_MTXRotTrig ( Mtx m, char axis, f32 sinA, f32 cosA ) { ASSERTMSG( (m != 0), MTX_ROTTRIG_1 ); switch(axis) { case 'x': case 'X': m[0][0] = 1.0f; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = 0.0f; m[1][0] = 0.0f; m[1][1] = cosA; m[1][2] = -sinA; m[1][3] = 0.0f; m[2][0] = 0.0f; m[2][1] = sinA; m[2][2] = cosA; m[2][3] = 0.0f; break; case 'y': case 'Y': m[0][0] = cosA; m[0][1] = 0.0f; m[0][2] = sinA; m[0][3] = 0.0f; m[1][0] = 0.0f; m[1][1] = 1.0f; m[1][2] = 0.0f; m[1][3] = 0.0f; m[2][0] = -sinA; m[2][1] = 0.0f; m[2][2] = cosA; m[2][3] = 0.0f; break; case 'z': case 'Z': m[0][0] = cosA; m[0][1] = -sinA; m[0][2] = 0.0f; m[0][3] = 0.0f; m[1][0] = sinA; m[1][1] = cosA; m[1][2] = 0.0f; m[1][3] = 0.0f; m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = 1.0f; m[2][3] = 0.0f; break; default: ASSERTMSG( 0, MTX_ROTTRIG_2 ); break; } } /*---------------------------------------------------------------------* Paired-Single assembler version *---------------------------------------------------------------------* Note that this performs NO error checking. *---------------------------------------------------------------------*/ #ifdef GEKKO void PSMTXRotTrig ( register Mtx m, register char axis, register f32 sinA, register f32 cosA ) { register f32 fc0, fc1, nsinA; register f32 fw0, fw1, fw2, fw3; asm { frsp sinA, sinA // to make sure sinA = single precision frsp cosA, cosA // to make sure cosA = single precision } fc0 = 0.0F; fc1 = 1.0F; asm { // always lower case ori axis, axis, 0x20 ps_neg nsinA, sinA // branches cmplwi axis, 'x' beq _case_x cmplwi axis, 'y' beq _case_y cmplwi axis, 'z' beq _case_z b _end _case_x: psq_st fc1, 0(m), 1, 0 psq_st fc0, 4(m), 0, 0 ps_merge00 fw0, sinA, cosA psq_st fc0, 12(m), 0, 0 ps_merge00 fw1, cosA, nsinA psq_st fc0, 28(m), 0, 0 psq_st fc0, 44(m), 1, 0 psq_st fw0, 36(m), 0, 0 psq_st fw1, 20(m), 0, 0 b _end; _case_y: ps_merge00 fw0, cosA, fc0 ps_merge00 fw1, fc0, fc1 psq_st fc0, 24(m), 0, 0 psq_st fw0, 0(m), 0, 0 ps_merge00 fw2, nsinA, fc0 ps_merge00 fw3, sinA, fc0 psq_st fw0, 40(m), 0, 0; psq_st fw1, 16(m), 0, 0; psq_st fw3, 8(m), 0, 0; psq_st fw2, 32(m), 0, 0; b _end; _case_z: psq_st fc0, 8(m), 0, 0 ps_merge00 fw0, sinA, cosA ps_merge00 fw2, cosA, nsinA psq_st fc0, 24(m), 0, 0 psq_st fc0, 32(m), 0, 0 ps_merge00 fw1, fc1, fc0 psq_st fw0, 16(m), 0, 0 psq_st fw2, 0(m), 0, 0 psq_st fw1, 40(m), 0, 0 _end: } } #endif // GEKKO /*---------------------------------------------------------------------* Name: MTXRotAxisDeg Description: sets a rotation matrix about an arbitrary axis Arguments: m matrix to be set axis ptr to a vector containing the x,y,z axis components. axis does not have to be a unit vector. deg rotation angle in degrees. note: counter-clockwise rotation is positive. Return: none *---------------------------------------------------------------------*/ // The function is defined as a macro in mtx.h /* void MTXRotAxisDeg ( Mtx m, Vec *axis, f32 deg ) { MTXRotAxisRad( m, axis, MTXDegToRad( deg ) ); } */ /*---------------------------------------------------------------------* Name: MTXRotAxisRad Description: sets a rotation matrix about an arbitrary axis Arguments: m matrix to be set axis ptr to a vector containing the x,y,z axis components. axis does not have to be a unit vector. deg rotation angle in radians. note: counter-clockwise rotation is positive. Return: none *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ void C_MTXRotAxisRad( Mtx m, const Vec *axis, f32 rad ) { Vec vN; f32 s, c; // sinTheta, cosTheta f32 t; // ( 1 - cosTheta ) f32 x, y, z; // x, y, z components of normalized axis f32 xSq, ySq, zSq; // x, y, z squared ASSERTMSG( (m != 0), MTX_ROTAXIS_1 ); ASSERTMSG( (axis != 0), MTX_ROTAXIS_2 ); s = sinf(rad); c = cosf(rad); t = 1.0f - c; C_VECNormalize( axis, &vN ); x = vN.x; y = vN.y; z = vN.z; xSq = x * x; ySq = y * y; zSq = z * z; m[0][0] = ( t * xSq ) + ( c ); m[0][1] = ( t * x * y ) - ( s * z ); m[0][2] = ( t * x * z ) + ( s * y ); m[0][3] = 0.0f; m[1][0] = ( t * x * y ) + ( s * z ); m[1][1] = ( t * ySq ) + ( c ); m[1][2] = ( t * y * z ) - ( s * x ); m[1][3] = 0.0f; m[2][0] = ( t * x * z ) - ( s * y ); m[2][1] = ( t * y * z ) + ( s * x ); m[2][2] = ( t * zSq ) + ( c ); m[2][3] = 0.0f; } /*---------------------------------------------------------------------* Paired-Single assembler version *---------------------------------------------------------------------* Note that this performs NO error checking. *---------------------------------------------------------------------*/ #ifdef GEKKO static void __PSMTXRotAxisRadInternal( register Mtx m, const register Vec *axis, register f32 sT, register f32 cT ) { register f32 tT, fc0; register f32 tmp0, tmp1, tmp2, tmp3, tmp4; register f32 tmp5, tmp6, tmp7, tmp8, tmp9; tmp9 = 0.5F; tmp8 = 3.0F; asm { // to make sure cT = (single precision float value) frsp cT, cT // tmp0 = [x][y] : LOAD psq_l tmp0, 0(axis), 0, 0 // to make sure sT = (single precision float value) frsp sT, sT // tmp1 = [z][z] : LOAD lfs tmp1, 8(axis) // tmp2 = [x*x][y*y] ps_mul tmp2, tmp0, tmp0 // tmp7 = [1.0F] fadds tmp7, tmp9, tmp9 // tmp3 = [x*x+z*z][y*y+z*z] ps_madd tmp3, tmp1, tmp1, tmp2 // fc0 = [0.0F] fsubs fc0, tmp9, tmp9 // tmp4 = [S = x*x+y*y+z*z][z] ps_sum0 tmp4, tmp3, tmp1, tmp2 // tT = 1.0F - cT fsubs tT, tmp7, cT // tmp5 = [1.0/sqrt(S)] :estimation[E] frsqrte tmp5, tmp4 // Newton-Rapson refinement step // E' = E/2(3.0 - E*E*S) fmuls tmp2, tmp5, tmp5 // E*E fmuls tmp3, tmp5, tmp9 // E/2 fnmsubs tmp2, tmp2, tmp4, tmp8 // (3-E*E*S) fmuls tmp5, tmp2, tmp3 // (E/2)(3-E*E*S) // cT = [c][c] ps_merge00 cT, cT, cT // tmp0 = [nx = x/sqrt(S)][ny = y/sqrt(S)] ps_muls0 tmp0, tmp0, tmp5 // tmp1 = [nz = z/sqrt(S)][nz = z/sqrt(S)] ps_muls0 tmp1, tmp1, tmp5 // tmp4 = [t*nx][t*ny] ps_muls0 tmp4, tmp0, tT // tmp9 = [s*nx][s*ny] ps_muls0 tmp9, tmp0, sT // tmp5 = [t*nz][t*nz] ps_muls0 tmp5, tmp1, tT // tmp3 = [t*nx*ny][t*ny*ny] ps_muls1 tmp3, tmp4, tmp0 // tmp2 = [t*nx*nx][t*ny*nx] ps_muls0 tmp2, tmp4, tmp0 // tmp4 = [t*nx*nz][t*ny*nz] ps_muls0 tmp4, tmp4, tmp1 // tmp6 = [t*nx*ny-s*nz][t*nx*ny-s*nz] fnmsubs tmp6, tmp1, sT, tmp3 // tmp7 = [t*nx*ny+s*nz][t*ny*ny+s*nz] fmadds tmp7, tmp1, sT, tmp3 // tmp0 = [-s*nx][-s*ny] ps_neg tmp0, tmp9 // tmp8 = [t*nx*nz+s*ny][0] == [m02][m03] ps_sum0 tmp8, tmp4, fc0, tmp9 // tmp2 = [t*nx*nx+c][t*nx*ny-s*nz] == [m00][m01] ps_sum0 tmp2, tmp2, tmp6, cT // tmp3 = [t*nx*ny+s*nz][t*ny*ny+c] == [m10][m11] ps_sum1 tmp3, cT, tmp7, tmp3 // tmp6 = [t*ny*nz-s*nx][0] == [m12][m13] ps_sum0 tmp6, tmp0, fc0 ,tmp4 // tmp8 [m02][m03] : STORE psq_st tmp8, 8(m), 0, 0 // tmp0 = [t*nx*nz-s*ny][t*ny*nz] ps_sum0 tmp0, tmp4, tmp4, tmp0 // tmp2 [m00][m01] : STORE psq_st tmp2, 0(m), 0, 0 // tmp5 = [t*nz*nz][t*nz*nz] ps_muls0 tmp5, tmp5, tmp1 // tmp3 [m10][m11] : STORE psq_st tmp3, 16(m), 0, 0 // tmp4 = [t*nx*nz-s*ny][t*ny*nz+s*nx] == [m20][m21] ps_sum1 tmp4, tmp9, tmp0, tmp4 // tmp6 [m12][m13] : STORE psq_st tmp6, 24(m), 0, 0 // tmp5 = [t*nz*nz+c][0] == [m22][m23] ps_sum0 tmp5, tmp5, fc0, cT // tmp4 [m20][m21] : STORE psq_st tmp4, 32(m), 0, 0 // tmp5 [m22][m23] : STORE psq_st tmp5, 40(m), 0, 0 } } void PSMTXRotAxisRad( Mtx m, const Vec *axis, f32 rad ) { f32 sinT, cosT; sinT = sinf(rad); cosT = cosf(rad); __PSMTXRotAxisRadInternal(m, axis, sinT, cosT); } #endif // GEKKO /*---------------------------------------------------------------------* Name: MTXTrans Description: sets a translation matrix. Arguments: m matrix to be set xT x component of translation. yT y component of translation. zT z component of translation. Return: none *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ void C_MTXTrans ( Mtx m, f32 xT, f32 yT, f32 zT ) { ASSERTMSG( (m != 0), MTX_TRANS_1 ); m[0][0] = 1.0f; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = xT; m[1][0] = 0.0f; m[1][1] = 1.0f; m[1][2] = 0.0f; m[1][3] = yT; m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = 1.0f; m[2][3] = zT; } /*---------------------------------------------------------------------* Paired-Single assembler version *---------------------------------------------------------------------* Note that this performs NO error checking. *---------------------------------------------------------------------*/ #ifdef GEKKO void PSMTXTrans( register Mtx m, register f32 xT, register f32 yT, register f32 zT ) { register f32 c0 = 0.0F; register f32 c1 = 1.0F; asm { stfs xT, 12(m) stfs yT, 28(m) psq_st c0, 4(m), 0, 0 psq_st c0, 32(m), 0, 0 stfs c0, 16(m) stfs c1, 20(m) stfs c0, 24(m) stfs c1, 40(m) stfs zT, 44(m) stfs c1, 0(m) } } #endif // GEKKO /*---------------------------------------------------------------------* Name: MTXTransApply Description: This function performs the operation equivalent to MTXTrans + MTXConcat. Arguments: src matrix to be operated. dst resultant matrix from concat. xT x component of translation. yT y component of translation. zT z component of translation. Return: none *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ void C_MTXTransApply ( const Mtx src, Mtx dst, f32 xT, f32 yT, f32 zT ) { ASSERTMSG( (src != 0), MTX_TRANSAPPLY_1 ); ASSERTMSG( (dst != 0), MTX_TRANSAPPLY_1 ); if ( src != dst ) { dst[0][0] = src[0][0]; dst[0][1] = src[0][1]; dst[0][2] = src[0][2]; dst[1][0] = src[1][0]; dst[1][1] = src[1][1]; dst[1][2] = src[1][2]; dst[2][0] = src[2][0]; dst[2][1] = src[2][1]; dst[2][2] = src[2][2]; } dst[0][3] = src[0][3] + xT; dst[1][3] = src[1][3] + yT; dst[2][3] = src[2][3] + zT; } /*---------------------------------------------------------------------* Paired-Single assembler version *---------------------------------------------------------------------* Note that this performs NO error checking. *---------------------------------------------------------------------*/ #ifdef GEKKO asm void PSMTXTransApply( const register Mtx src, register Mtx dst, register f32 xT, register f32 yT, register f32 zT ) { nofralloc; psq_l fp4, 0(src), 0, 0; frsp xT, xT; // to make sure xT = single precision psq_l fp5, 8(src), 0, 0; frsp yT, yT; // to make sure yT = single precision psq_l fp7, 24(src), 0, 0; frsp zT, zT; // to make sure zT = single precision psq_l fp8, 40(src), 0, 0; psq_st fp4, 0(dst), 0, 0; ps_sum1 fp5, xT, fp5, fp5; psq_l fp6, 16(src), 0, 0; psq_st fp5, 8(dst), 0, 0; ps_sum1 fp7, yT, fp7, fp7; psq_l fp9, 32(src), 0, 0; psq_st fp6, 16(dst), 0, 0; ps_sum1 fp8, zT, fp8, fp8; psq_st fp7, 24(dst), 0, 0; psq_st fp9, 32(dst), 0, 0; psq_st fp8, 40(dst), 0, 0; blr; } #endif // GEKKO /*---------------------------------------------------------------------* Name: MTXScale Description: sets a scaling matrix. Arguments: m matrix to be set xS x scale factor. yS y scale factor. zS z scale factor. Return: none *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ void C_MTXScale ( Mtx m, f32 xS, f32 yS, f32 zS ) { ASSERTMSG( (m != 0), MTX_SCALE_1 ); m[0][0] = xS; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = 0.0f; m[1][0] = 0.0f; m[1][1] = yS; m[1][2] = 0.0f; m[1][3] = 0.0f; m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = zS; m[2][3] = 0.0f; } /*---------------------------------------------------------------------* Paired-Single assembler version *---------------------------------------------------------------------* Note that this performs NO error checking. *---------------------------------------------------------------------*/ #ifdef GEKKO void PSMTXScale( register Mtx m, register f32 xS, register f32 yS, register f32 zS ) { register f32 c0 = 0.0F; asm { stfs xS, 0(m) psq_st c0, 4(m), 0, 0 psq_st c0, 12(m), 0, 0 stfs yS, 20(m) psq_st c0, 24(m), 0, 0 psq_st c0, 32(m), 0, 0 stfs zS, 40(m) stfs c0, 44(m) } } #endif // GEKKO /*---------------------------------------------------------------------* Name: MTXScaleApply Description: This function performs the operation equivalent to MTXScale + MTXConcat Arguments: src matrix to be operated. dst resultant matrix from concat. xS x scale factor. yS y scale factor. zS z scale factor. Return: none *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ void C_MTXScaleApply ( const Mtx src, Mtx dst, f32 xS, f32 yS, f32 zS ) { ASSERTMSG( (src != 0), MTX_SCALEAPPLY_1 ); ASSERTMSG( (dst != 0), MTX_SCALEAPPLY_2 ); dst[0][0] = src[0][0] * xS; dst[0][1] = src[0][1] * xS; dst[0][2] = src[0][2] * xS; dst[0][3] = src[0][3] * xS; dst[1][0] = src[1][0] * yS; dst[1][1] = src[1][1] * yS; dst[1][2] = src[1][2] * yS; dst[1][3] = src[1][3] * yS; dst[2][0] = src[2][0] * zS; dst[2][1] = src[2][1] * zS; dst[2][2] = src[2][2] * zS; dst[2][3] = src[2][3] * zS; } /*---------------------------------------------------------------------* Paired-Single assembler version *---------------------------------------------------------------------* Note that this performs NO error checking. *---------------------------------------------------------------------*/ #ifdef GEKKO asm void PSMTXScaleApply ( const register Mtx src, register Mtx dst, register f32 xS, register f32 yS, register f32 zS ) { nofralloc; frsp xS, xS; // to make sure xS = single precision psq_l fp4, 0(src), 0, 0; frsp yS, yS; // to make sure yS = single precision psq_l fp5, 8(src), 0, 0; frsp zS, zS; // to make sure zS = single precision ps_muls0 fp4, fp4, xS; psq_l fp6, 16(src), 0, 0; ps_muls0 fp5, fp5, xS; psq_l fp7, 24(src), 0, 0; ps_muls0 fp6, fp6, yS; psq_l fp8, 32(src), 0, 0; psq_st fp4, 0(dst), 0, 0; ps_muls0 fp7, fp7, yS; psq_l fp2, 40(src), 0, 0; psq_st fp5, 8(dst), 0, 0; ps_muls0 fp8, fp8, zS; psq_st fp6, 16(dst), 0, 0; ps_muls0 fp2, fp2, zS; psq_st fp7, 24(dst), 0, 0; psq_st fp8, 32(dst), 0, 0; psq_st fp2, 40(dst), 0, 0; blr; } #endif // GEKKO /*---------------------------------------------------------------------* Name: MTXQuat Description: sets a rotation matrix from a quaternion. Arguments: m matrix to be set q ptr to quaternion. Return: none *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ void C_MTXQuat ( Mtx m, const Quaternion *q ) { f32 s,xs,ys,zs,wx,wy,wz,xx,xy,xz,yy,yz,zz; ASSERTMSG( (m != 0), MTX_QUAT_1 ); ASSERTMSG( (q != 0), MTX_QUAT_2 ); ASSERTMSG( ( q->x || q->y || q->z || q->w ), MTX_QUAT_3 ); s = 2.0f / ( (q->x * q->x) + (q->y * q->y) + (q->z * q->z) + (q->w * q->w) ); xs = q->x * s; ys = q->y * s; zs = q->z * s; wx = q->w * xs; wy = q->w * ys; wz = q->w * zs; xx = q->x * xs; xy = q->x * ys; xz = q->x * zs; yy = q->y * ys; yz = q->y * zs; zz = q->z * zs; m[0][0] = 1.0f - (yy + zz); m[0][1] = xy - wz; m[0][2] = xz + wy; m[0][3] = 0.0f; m[1][0] = xy + wz; m[1][1] = 1.0f - (xx + zz); m[1][2] = yz - wx; m[1][3] = 0.0f; m[2][0] = xz - wy; m[2][1] = yz + wx; m[2][2] = 1.0f - (xx + yy); m[2][3] = 0.0f; } /*---------------------------------------------------------------------* Paired-Single assembler version *---------------------------------------------------------------------* Note that this performs NO error checking. *---------------------------------------------------------------------*/ #ifdef GEKKO void PSMTXQuat ( register Mtx m, const register Quaternion *q ) { register f32 c_zero, c_one, c_two, scale; register f32 tmp0, tmp1, tmp2, tmp3, tmp4; register f32 tmp5, tmp6, tmp7, tmp8, tmp9; c_one = 1.0F; asm { // tmp0 = [qx][qy] : LOAD psq_l tmp0, 0(q), 0, 0 // tmp1 = [qz][qw] : LOAD psq_l tmp1, 8(q), 0, 0 // c_zero = [0.0F][0.0F] fsubs c_zero, c_one, c_one // c_two = [2.0F][2.0F] fadds c_two, c_one, c_one // tmp2 = [qx*qx][qy*qy] ps_mul tmp2, tmp0, tmp0 // tmp5 = [qy][qx] ps_merge10 tmp5, tmp0, tmp0 // tmp4 = [qx*qx+qz*qz][qy*qy+qw*qw] ps_madd tmp4, tmp1, tmp1, tmp2 // tmp3 = [qz*qz][qw*qw] ps_mul tmp3, tmp1, tmp1 // scale = [qx*qx+qy*qy+qz*qz+qw*qw][?] ps_sum0 scale, tmp4, tmp4, tmp4 // tmp7 = [qy*qw][qx*qw] ps_muls1 tmp7, tmp5, tmp1 // Newton-Rapson refinment (1/X) : E' = 2E-X*E*E // tmp9 = [E = Est.(1/X)] fres tmp9, scale // tmp4 = [qx*qx+qz*qz][qy*qy+qz*qz] ps_sum1 tmp4, tmp3, tmp4, tmp2 // scale = [2-X*E] ps_nmsub scale, scale, tmp9, c_two // tmp6 = [qz*qw][?] ps_muls1 tmp6, tmp1, tmp1 // scale = [E(2-scale*E) = E'] ps_mul scale, tmp9, scale // tmp2 = [qx*qx+qy*qy] ps_sum0 tmp2, tmp2, tmp2, tmp2 // scale = [s = 2E' = 2.0F/(qx*qx+qy*qy+qz*qz+qw*qw)] fmuls scale, scale, c_two // tmp8 = [qx*qy+qz*qw][?] ps_madd tmp8, tmp0, tmp5, tmp6 // tmp6 = [qx*qy-qz*qw][?] ps_msub tmp6, tmp0, tmp5, tmp6 // c_zero [m03] : STORE psq_st c_zero, 12(m), 1, 0 // tmp2 = [1-s(qx*qx+qy*qy)] : [m22] ps_nmsub tmp2, tmp2, scale, c_one // tmp4 = [1-s(qx*qx+qz*qz)][1-s(qy*qy+qz*qz)] : [m11][m00] ps_nmsub tmp4, tmp4, scale, c_one // c_zero [m23] : STORE psq_st c_zero, 44(m), 1, 0 // tmp8 = [s(qx*qy+qz*qw)][?] : [m10] ps_mul tmp8, tmp8, scale // tmp6 = [s(qx*qy-qz*qw)][?] : [m01] ps_mul tmp6, tmp6, scale // tmp2 [m22] : STORE psq_st tmp2, 40(m), 1, 0 // tmp5 = [qx*qz+qy*qw][qy*qz+qx*qw] ps_madds0 tmp5, tmp0, tmp1, tmp7 // tmp1 = [m10][m11] ps_merge00 tmp1, tmp8, tmp4 // tmp7 = [qx*qz-qy*qw][qy*qz-qx*qw] ps_nmsub tmp7, tmp7, c_two, tmp5 // tmp0 = [m00][m01] ps_merge10 tmp0, tmp4, tmp6 // tmp1 [m10][m11] : STORE psq_st tmp1, 16(m), 0, 0 // tmp5 = [s(qx*qz+qy*qw)][s(qy*qz+qx*qw)] : [m02][m21] ps_mul tmp5, tmp5, scale // tmp7 = [s(qx*qz-qy*qw)][s(qy*qz-qx*qw)] : [m20][m12] ps_mul tmp7, tmp7, scale // tmp0 [m00][m01] : STORE psq_st tmp0, 0(m), 0, 0 // tmp5 [m02] : STORE psq_st tmp5, 8(m), 1, 0 // tmp3 = [m12][m13] ps_merge10 tmp3, tmp7, c_zero // tmp9 = [m20][m21] ps_merge01 tmp9, tmp7, tmp5 // tmp3 [m12][m13] : STORE psq_st tmp3, 24(m), 0, 0 // tmp9 [m20][m21] : STORE psq_st tmp9, 32(m), 0, 0 } } #endif // GEKKO /*---------------------------------------------------------------------* Name: MTXReflect Description: reflect a rotation matrix with respect to a plane. Arguments: m matrix to be set p point on the planar reflector. n normal of the planar reflector. Return: none *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ void C_MTXReflect ( Mtx m, const Vec *p, const Vec *n ) { f32 vxy, vxz, vyz, pdotn; vxy = -2.0f * n->x * n->y; vxz = -2.0f * n->x * n->z; vyz = -2.0f * n->y * n->z; pdotn = 2.0f * C_VECDotProduct(p, n); m[0][0] = 1.0f - 2.0f * n->x * n->x; m[0][1] = vxy; m[0][2] = vxz; m[0][3] = pdotn * n->x; m[1][0] = vxy; m[1][1] = 1.0f - 2.0f * n->y * n->y; m[1][2] = vyz; m[1][3] = pdotn * n->y; m[2][0] = vxz; m[2][1] = vyz; m[2][2] = 1.0f - 2.0f * n->z * n->z; m[2][3] = pdotn * n->z; } /*---------------------------------------------------------------------* Paired-Single assembler version *---------------------------------------------------------------------*/ #ifdef GEKKO void PSMTXReflect( register Mtx m, const register Vec *p, const register Vec *n ) { register f32 c_one = 1.0F; register f32 vn_xy, vn_z1, n2vn_xy, n2vn_z1, pdotn; register f32 tmp0, tmp1, tmp2, tmp3; register f32 tmp4, tmp5, tmp6, tmp7; asm { // vn_z1 = [nz][1.0F] : LOAD psq_l vn_z1, 8(n), 1, 0 // vn_xy = [nx][ny] : LOAD psq_l vn_xy, 0(n), 0, 0 // tmp0 = [px][py] : LOAD psq_l tmp0, 0(p), 0, 0 // n2vn_z1 = [-2nz][-2.0F] ps_nmadd n2vn_z1, vn_z1, c_one, vn_z1 // tmp1 = [pz][1.0F] : LOAD psq_l tmp1, 8(p), 1, 0 // n2vn_xy = [-2nx][-2ny] ps_nmadd n2vn_xy, vn_xy, c_one, vn_xy // tmp4 = [-2nx*nz][-2ny*nz] : [m20][m21] ps_muls0 tmp4, vn_xy, n2vn_z1 // pdotn = [-2(px*nx)][-2(py*ny)] ps_mul pdotn, n2vn_xy, tmp0 // tmp2 = [-2nx*nx][-2nx*ny] ps_muls0 tmp2, vn_xy, n2vn_xy // pdotn = [-2(px*nx+py*ny)][?] ps_sum0 pdotn, pdotn, pdotn, pdotn // tmp3 = [-2nx*ny][-2ny*ny] ps_muls1 tmp3, vn_xy, n2vn_xy // tmp4 = [m20][m21] : STORE psq_st tmp4, 32(m), 0, 0 // tmp2 = [1-2nx*nx][-2nx*ny] : [m00][m01] ps_sum0 tmp2, tmp2, tmp2, c_one // pdotn = [2(px*nx+py*ny+pz*nz)][?] ps_nmadd pdotn, n2vn_z1, tmp1, pdotn // tmp3 = [-2nx*ny][1-2ny*ny] : [m10][m11] ps_sum1 tmp3, c_one, tmp3, tmp3 // tmp2 = [m00][m01] : STORE psq_st tmp2, 0(m), 0, 0 // tmp5 = [pdotn*nx][pdotn*ny] ps_muls0 tmp5, vn_xy, pdotn // tmp6 = [-2nz][pdotn] ps_merge00 tmp6, n2vn_z1, pdotn // tmp3 = [m10][m11] : STORE psq_st tmp3, 16(m), 0, 0 // tmp7 = [-2nx*nz][pdotn*nx] : [m02][m03] ps_merge00 tmp7, tmp4, tmp5 // tmp6 = [-2nz*nz][pdotn*nz] ps_muls0 tmp6, tmp6, vn_z1 // tmp5 = [-2ny*nz][pdotn*ny] : [m12][m13] ps_merge11 tmp5, tmp4, tmp5 // tmp7 = [m02][m03] : STORE psq_st tmp7, 8(m), 0, 0 // tmp6 = [1-2nz*nz][pdotn*nz] : [m22][m23] ps_sum0 tmp6, tmp6, tmp6, c_one // tmp5 = [m12][m13] : STORE psq_st tmp5, 24(m), 0, 0 // tmp6 = [m22][m23] : STORE psq_st tmp6, 40(m), 0, 0 } } #endif // GEKKO /*---------------------------------------------------------------------* VIEW SECTION *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* Name: MTXLookAt Description: compute a matrix to transform points to camera coordinates. Arguments: m matrix to be set camPos camera position. camUp camera 'up' direction. target camera aim point. Return: none *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ void C_MTXLookAt ( Mtx m, const Point3d *camPos, const Vec *camUp, const Point3d *target ) { Vec vLook,vRight,vUp; ASSERTMSG( (m != 0), MTX_LOOKAT_1 ); ASSERTMSG( (camPos != 0), MTX_LOOKAT_2 ); ASSERTMSG( (camUp != 0), MTX_LOOKAT_3 ); ASSERTMSG( (target != 0), MTX_LOOKAT_4 ); // compute unit target vector // use negative value to look down (-Z) axis vLook.x = camPos->x - target->x; vLook.y = camPos->y - target->y; vLook.z = camPos->z - target->z; VECNormalize( &vLook,&vLook ); // vRight = camUp x vLook VECCrossProduct ( camUp, &vLook, &vRight ); VECNormalize( &vRight,&vRight ); // vUp = vLook x vRight VECCrossProduct( &vLook, &vRight, &vUp ); // Don't need to normalize vUp since it should already be unit length // VECNormalize( &vUp, &vUp ); m[0][0] = vRight.x; m[0][1] = vRight.y; m[0][2] = vRight.z; m[0][3] = -( camPos->x * vRight.x + camPos->y * vRight.y + camPos->z * vRight.z ); m[1][0] = vUp.x; m[1][1] = vUp.y; m[1][2] = vUp.z; m[1][3] = -( camPos->x * vUp.x + camPos->y * vUp.y + camPos->z * vUp.z ); m[2][0] = vLook.x; m[2][1] = vLook.y; m[2][2] = vLook.z; m[2][3] = -( camPos->x * vLook.x + camPos->y * vLook.y + camPos->z * vLook.z ); } /*---------------------------------------------------------------------* TEXTURE PROJECTION SECTION *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* Name: MTXLightFrustum Description: Compute a 3x4 projection matrix for texture projection Arguments: m 3x4 matrix to be set t top coord. of view volume at the near clipping plane b bottom coord of view volume at the near clipping plane l left coord. of view volume at near clipping plane r right coord. of view volume at near clipping plane n positive distance from camera to near clipping plane scaleS scale in the S direction for projected coordinates (usually 0.5) scaleT scale in the T direction for projected coordinates (usually 0.5) transS translate in the S direction for projected coordinates (usually 0.5) transT translate in the T direction for projected coordinates (usually 0.5) Return: none. *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ void C_MTXLightFrustum ( Mtx m, float t, float b, float l, float r, float n, float scaleS, float scaleT, float transS, float transT ) { f32 tmp; ASSERTMSG( (m != 0), MTX_LIGHT_FRUSTUM_1 ); ASSERTMSG( (t != b), MTX_LIGHT_FRUSTUM_2 ); ASSERTMSG( (l != r), MTX_LIGHT_FRUSTUM_3 ); // NOTE: Be careful about "l" vs. "1" below!!! tmp = 1.0f / (r - l); m[0][0] = ((2*n) * tmp) * scaleS; m[0][1] = 0.0f; m[0][2] = (((r + l) * tmp) * scaleS) - transS; m[0][3] = 0.0f; tmp = 1.0f / (t - b); m[1][0] = 0.0f; m[1][1] = ((2*n) * tmp) * scaleT; m[1][2] = (((t + b) * tmp) * scaleT) - transT; m[1][3] = 0.0f; m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = -1.0f; m[2][3] = 0.0f; } /*---------------------------------------------------------------------* Name: MTXLightPerspective Description: compute a 3x4 perspective projection matrix from field of view and aspect ratio for texture projection. Arguments: m 3x4 matrix to be set fovy total field of view in in degrees in the YZ plane aspect ratio of view window width:height (X / Y) scaleS scale in the S direction for projected coordinates (usually 0.5) scaleT scale in the T direction for projected coordinates (usually 0.5) transS translate in the S direction for projected coordinates (usually 0.5) transT translate in the T direction for projected coordinates (usually 0.5) Return: none *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ void C_MTXLightPerspective ( Mtx m, f32 fovY, f32 aspect, float scaleS, float scaleT, float transS, float transT ) { f32 angle; f32 cot; ASSERTMSG( (m != 0), MTX_LIGHT_PERSPECTIVE_1 ); ASSERTMSG( ( (fovY > 0.0) && ( fovY < 180.0) ), MTX_LIGHT_PERSPECTIVE_2 ); ASSERTMSG( (aspect != 0), MTX_LIGHT_PERSPECTIVE_3 ); // find the cotangent of half the (YZ) field of view angle = fovY * 0.5f; angle = MTXDegToRad( angle ); cot = 1.0f / tanf(angle); m[0][0] = (cot / aspect) * scaleS; m[0][1] = 0.0f; m[0][2] = -transS; m[0][3] = 0.0f; m[1][0] = 0.0f; m[1][1] = cot * scaleT; m[1][2] = -transT; m[1][3] = 0.0f; m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = -1.0f; m[2][3] = 0.0f; } /*---------------------------------------------------------------------* Name: MTXLightOrtho Description: compute a 3x4 orthographic projection matrix. Arguments: m matrix to be set t top coord. of parallel view volume b bottom coord of parallel view volume l left coord. of parallel view volume r right coord. of parallel view volume scaleS scale in the S direction for projected coordinates (usually 0.5) scaleT scale in the T direction for projected coordinates (usually 0.5) transS translate in the S direction for projected coordinates (usually 0.5) transT translate in the T direction for projected coordinates (usually 0.5) Return: none *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ void C_MTXLightOrtho ( Mtx m, f32 t, f32 b, f32 l, f32 r, float scaleS, float scaleT, float transS, float transT ) { f32 tmp; ASSERTMSG( (m != 0), MTX_LIGHT_ORTHO_1 ); ASSERTMSG( (t != b), MTX_LIGHT_ORTHO_2 ); ASSERTMSG( (l != r), MTX_LIGHT_ORTHO_3 ); // NOTE: Be careful about "l" vs. "1" below!!! tmp = 1.0f / (r - l); m[0][0] = (2.0f * tmp * scaleS); m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = ((-(r + l) * tmp) * scaleS) + transS; tmp = 1.0f / (t - b); m[1][0] = 0.0f; m[1][1] = (2.0f * tmp) * scaleT; m[1][2] = 0.0f; m[1][3] = ((-(t + b) * tmp)* scaleT) + transT; m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = 0.0f; m[2][3] = 1.0f; } /*===========================================================================*/ #if ( __MWERKS__ == 0x00004100 ) #pragma defer_codegen reset #endif