/*---------------------------------------------------------------------------* Project: matrix vector Library File: mtxvec.c Copyright 1998-2001 Nintendo. All rights reserved. These coded instructions, statements, and computer programs contain proprietary information of Nintendo of America Inc. and/or Nintendo Company Ltd., and are protected by Federal copyright law. They may not be disclosed to third parties or copied or duplicated in any form, in whole or in part, without the prior written consent of Nintendo. $Log: mtxvec.c,v $ Revision 1.2 02/20/2006 04:25:42 mitu changed include path from dolphin/ to revolution/. Revision 1.1.1.1 2005/05/12 02:15:49 yasuh-to transitioned from the Dolphin source tree 5 02/04/11 13:11 Hirose const type specifier support. (worked by Hiratsu@IRD) 4 7/23/01 8:46p Hirose Added PSMultVecArraySR. Improved PSMultVecArray. 3 7/07/01 7:40p Hirose added PSMTXMultVecSR made by Ohki-san@NTSC. 2 2/23/01 1:49a Hirose Fixed a bug in PSMTXMultVec. 1 2/22/01 11:56p Hirose This section is moved from mtx.c. Added PSMultVec. $NoKeywords: $ *---------------------------------------------------------------------------*/ #ifdef WIN32 #include #endif #include #include #include "mtxAssert.h" /*---------------------------------------------------------------------* Name: MTXMultVec Description: multiplies a vector by a matrix. m x src = dst. Arguments: m matrix. src source vector for multiply. dst resultant vector from multiply. note: ok if src == dst. Return : none *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ void C_MTXMultVec ( const Mtx m, const Vec *src, Vec *dst ) { Vec vTmp; ASSERTMSG( (m != 0), MTX_MULTVEC_1 ); ASSERTMSG( (src != 0), MTX_MULTVEC_2 ); ASSERTMSG( (dst != 0), MTX_MULTVEC_3 ); // a Vec has a 4th implicit 'w' coordinate of 1 vTmp.x = m[0][0]*src->x + m[0][1]*src->y + m[0][2]*src->z + m[0][3]; vTmp.y = m[1][0]*src->x + m[1][1]*src->y + m[1][2]*src->z + m[1][3]; vTmp.z = m[2][0]*src->x + m[2][1]*src->y + m[2][2]*src->z + m[2][3]; // copy back dst->x = vTmp.x; dst->y = vTmp.y; dst->z = vTmp.z; } /*---------------------------------------------------------------------* Paired-Single assembler version *---------------------------------------------------------------------* Note that NO error checking is performed. *---------------------------------------------------------------------*/ #ifdef GEKKO asm void PSMTXMultVec ( const register Mtx m, const register Vec *src, register Vec *dst ) { nofralloc // load v[0], v[1] psq_l fp0, 0(src), 0, 0 // load m[0][0], m[0][1] psq_l fp2, 0(m), 0, 0 // load v[2], 1 psq_l fp1, 8(src), 1, 0 // m[0][0]*v[0], m[0][1]*v[1] ps_mul fp4, fp2, fp0 // load m[0][2], m[0][3] psq_l fp3, 8(m), 0, 0 // m[0][0]*v[0]+m[0][2]*v[2], m[0][1]*v[1]+m[0][3] ps_madd fp5, fp3, fp1, fp4 // load m[1][0], m[1][1] psq_l fp8, 16(m), 0, 0 // m[0][0]*v[0]+m[0][2]*v[2]+m[0][1]*v[1]+m[0][3], ??? ps_sum0 fp6, fp5, fp6, fp5 // load m[1][2], m[1][3] psq_l fp9, 24(m), 0, 0 // m[1][0]*v[0], m[1][1]*v[1] ps_mul fp10, fp8, fp0 // store dst[0] psq_st fp6, 0(dst), 1, 0 // m[1][0]*v[0]+m[1][2]*v[2], m[1][1]*v[1]+m[1][3] ps_madd fp11, fp9, fp1, fp10 // load m[2][0], m[2][1] psq_l fp2, 32(m), 0, 0 // m[1][0]*v[0]+m[1][2]*v[2]+m[2][1]*v[1]+m[1][3], ??? ps_sum0 fp12, fp11, fp12, fp11 // load m[2][2], m[2][3] psq_l fp3, 40(m), 0, 0 // m[0][0]*v[0], m[0][1]*v[1] ps_mul fp4, fp2, fp0 // store dst[1] psq_st fp12, 4(dst), 1, 0 // m[0][0]*v[0]+m[0][2]*v[2], m[0][1]*v[1]+m[0][3] ps_madd fp5, fp3, fp1, fp4 // m[0][0]*v[0]+m[0][2]*v[2]+m[0][1]*v[1]+m[0][3], ??? ps_sum0 fp6, fp5, fp6, fp5 // store dst[0] psq_st fp6, 8(dst), 1, 0 blr } #endif // GEKKO /*---------------------------------------------------------------------* Name: MTXMultVecArray Description: multiplies an array of vectors by a matrix. Arguments: m matrix. srcBase start of source vector array. dstBase start of resultant vector array. note: ok if srcBase == dstBase. count number of vectors in srcBase, dstBase arrays note: cannot check for array overflow Return : none *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ void C_MTXMultVecArray ( const Mtx m, const Vec *srcBase, Vec *dstBase, u32 count ) { u32 i; Vec vTmp; ASSERTMSG( (m != 0), MTX_MULTVECARRAY_1 ); ASSERTMSG( (srcBase != 0), MTX_MULTVECARRAY_2 ); ASSERTMSG( (dstBase != 0), MTX_MULTVECARRAY_3 ); ASSERTMSG( (count > 1), MTX_MULTVECARRAY_4 ); for(i=0; i< count; i++) { // Vec has a 4th implicit 'w' coordinate of 1 vTmp.x = m[0][0]*srcBase->x + m[0][1]*srcBase->y + m[0][2]*srcBase->z + m[0][3]; vTmp.y = m[1][0]*srcBase->x + m[1][1]*srcBase->y + m[1][2]*srcBase->z + m[1][3]; vTmp.z = m[2][0]*srcBase->x + m[2][1]*srcBase->y + m[2][2]*srcBase->z + m[2][3]; // copy back dstBase->x = vTmp.x; dstBase->y = vTmp.y; dstBase->z = vTmp.z; srcBase++; dstBase++; } } /*---------------------------------------------------------------------* Paired-Single assembler version *---------------------------------------------------------------------* Note that NO error checking is performed. The count should be greater than 1. *---------------------------------------------------------------------*/ #ifdef GEKKO asm void PSMTXMultVecArray ( const register Mtx m, const register Vec *srcBase, register Vec *dstBase, register u32 count ) { nofralloc // fp13 [m00][m01] : LOAD psq_l fp13, 0(m), 0, 0 // fp12 [m10][m11] : LOAD psq_l fp12, 16(m), 0, 0 // decrement loop count due to unrolling subi count, count, 1 // fp11 [m02][m03] : LOAD psq_l fp11, 8(m), 0, 0 // fp0 [m00][m10] ps_merge00 fp0, fp13, fp12 // base pointer adjustment subi dstBase, dstBase, 4 // fp10 [m12][m13] : LOAD psq_l fp10, 24(m), 0, 0 // fp1 [m01][m11] ps_merge11 fp1, fp13, fp12 // loop counter mtctr count // fp4 [m20][m21] : LOAD psq_l fp4, 32(m), 0, 0 // fp2 [m02][m12] ps_merge00 fp2, fp11, fp10 // fp5 [m22][m23] : LOAD psq_l fp5, 40(m), 0, 0 // fp3 [m03][m13] ps_merge11 fp3, fp11, fp10 // fp6 [v0][v1] : LOAD psq_l fp6, 0(srcBase), 0, 0 // fp7 [v2][1.0F] : LOAD psq_lu fp7, 8(srcBase), 1, 0 // fp8 [m00*v0+m03][m10*v0+m13] ps_madds0 fp8, fp0, fp6, fp3 // fp9 [m20*v0][m21*v1] ps_mul fp9, fp4, fp6 // fp8 [m00*v0+m01*v1+m03][m10*v0+m11*v1+m13] ps_madds1 fp8, fp1, fp6, fp8 // fp10 [m20*v0+m22*v2][m21*v1+m23*1.0F] ps_madd fp10, fp5, fp7, fp9 _mloop: //-------- Unrolled loop -------- // fp6 [v0][v1] : LOAD psq_lu fp6, 4(srcBase), 0, 0 // fp12 [m00*v0+m01*v1+m02*v2+m03][m10*v0+m11*v1+m12*v2+m13] ps_madds0 fp12, fp2, fp7, fp8 // fp7 [v2][1.0F] : LOAD psq_lu fp7, 8(srcBase), 1, 0 // fp13 [m20*v0+m21*v1+m22*v2+m23][?] ps_sum0 fp13, fp10, fp9, fp10 // fp8 [m00*v0+m03][m10*v0+m13] ps_madds0 fp8, fp0, fp6, fp3 // fp9 [m20*v0][m21*v1] ps_mul fp9, fp4, fp6 // fp12 [v0'][v1'] : STORE psq_stu fp12, 4(dstBase), 0, 0 // fp8 [m00*v0+m01*v1+m03][m10*v0+m11*v1+m13] ps_madds1 fp8, fp1, fp6, fp8 // fp13 [v2'][ ? ] : STORE psq_stu fp13, 8(dstBase), 1, 0 // fp10 [m20*v0+m22*v2][m21*v1+m23*1.0F] ps_madd fp10, fp5, fp7, fp9 // LOOP bdnz _mloop // fp12 [m00*v0+m01*v1+m02*v2+m03][m10*v0+m11*v1+m12*v2+m13] ps_madds0 fp12, fp2, fp7, fp8 // fp13 [m20*v0+m21*v1+m22*v2+m23][?] ps_sum0 fp13, fp10, fp9, fp10 // fp12 [v0'][v1'] : STORE psq_stu fp12, 4(dstBase), 0, 0 // fp13 [v2'][ ? ] : STORE psq_stu fp13, 8(dstBase), 1, 0 blr } #endif // GEKKO /*---------------------------------------------------------------------* Name: MTXMultVecSR Description: multiplies a vector by a matrix 3x3 (Scaling and Rotation) component. m x src = dst. Arguments: m matrix. src source vector for multiply. dst resultant vector from multiply. note: ok if src == dst. Return : none *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ void C_MTXMultVecSR ( const Mtx m, const Vec *src, Vec *dst ) { Vec vTmp; ASSERTMSG( (m != 0), MTX_MULTVECSR_1 ); ASSERTMSG( (src != 0), MTX_MULTVECSR_2 ); ASSERTMSG( (dst != 0), MTX_MULTVECSR_3 ); // a Vec has a 4th implicit 'w' coordinate of 1 vTmp.x = m[0][0]*src->x + m[0][1]*src->y + m[0][2]*src->z; vTmp.y = m[1][0]*src->x + m[1][1]*src->y + m[1][2]*src->z; vTmp.z = m[2][0]*src->x + m[2][1]*src->y + m[2][2]*src->z; // copy back dst->x = vTmp.x; dst->y = vTmp.y; dst->z = vTmp.z; } /*---------------------------------------------------------------------* Paired-Single assembler version *---------------------------------------------------------------------* Note that this performs NO error checking. *---------------------------------------------------------------------*/ #ifdef GEKKO asm void PSMTXMultVecSR ( const register Mtx m, const register Vec *src, register Vec *dst ) { nofralloc psq_l fp0, 0(m), 0, 0 // m[0][0], m[0][1] GQR0 = 0 // fp6 - x y psq_l fp6, 0(src), 0, 0 psq_l fp2, 16(m), 0, 0 // m[1][0], m[1][1] // fp8 = m00x m01y // next X ps_mul fp8, fp0, fp6 psq_l fp4, 32(m), 0, 0 // m[2][0], m[2][1] // fp10 = m10x m11y // next Y ps_mul fp10, fp2, fp6 psq_l fp7, 8(src), 1, 0 // fp7 - z,1.0 // fp12 = m20x m21y // next Z ps_mul fp12, fp4, fp6 // YYY last FP6 usage psq_l fp3, 24(m), 0, 0 // m[1][2], m[1][3] ps_sum0 fp8, fp8, fp8, fp8 psq_l fp5, 40(m), 0, 0 // m[2][2], m[2][3] ps_sum0 fp10, fp10, fp10, fp10 psq_l fp1, 8(m), 0, 0 // m[0][2], m[0][3] ps_sum0 fp12, fp12, fp12, fp12 ps_madd fp9, fp1, fp7, fp8 psq_st fp9, 0(dst), 1, 0 // store X ps_madd fp11, fp3, fp7, fp10 psq_st fp11, 4(dst), 1, 0 // store Y ps_madd fp13, fp5, fp7, fp12 psq_st fp13, 8(dst), 1, 0 // sore Z blr } #endif // GEKKO /*---------------------------------------------------------------------* Name: MTXMultVecArraySR Description: multiplies an array of vectors by a matrix 3x3 (Scaling and Rotation) component. Arguments: m matrix. srcBase start of source vector array. dstBase start of resultant vector array. note: ok if srcBase == dstBase. count number of vectors in srcBase, dstBase arrays note: cannot check for array overflow Return : none *---------------------------------------------------------------------*/ /*---------------------------------------------------------------------* C version *---------------------------------------------------------------------*/ void C_MTXMultVecArraySR ( const Mtx m, const Vec *srcBase, Vec *dstBase, u32 count ) { u32 i; Vec vTmp; ASSERTMSG( (m != 0), MTX_MULTVECARRAYSR_1 ); ASSERTMSG( (srcBase != 0), MTX_MULTVECARRAYSR_2 ); ASSERTMSG( (dstBase != 0), MTX_MULTVECARRAYSR_3 ); ASSERTMSG( (count > 1), MTX_MULTVECARRAYSR_4 ); for ( i = 0; i < count; i ++ ) { // Vec has a 4th implicit 'w' coordinate of 1 vTmp.x = m[0][0]*srcBase->x + m[0][1]*srcBase->y + m[0][2]*srcBase->z; vTmp.y = m[1][0]*srcBase->x + m[1][1]*srcBase->y + m[1][2]*srcBase->z; vTmp.z = m[2][0]*srcBase->x + m[2][1]*srcBase->y + m[2][2]*srcBase->z; // copy back dstBase->x = vTmp.x; dstBase->y = vTmp.y; dstBase->z = vTmp.z; srcBase++; dstBase++; } } /*---------------------------------------------------------------------* Paired-Single assembler version *---------------------------------------------------------------------* Note that NO error checking is performed. *---------------------------------------------------------------------*/ #ifdef GEKKO asm void PSMTXMultVecArraySR ( const register Mtx m, const register Vec *srcBase, register Vec *dstBase, register u32 count ) { nofralloc // fp13 [m00][m01] : LOAD psq_l fp13, 0(m), 0, 0 // fp12 [m10][m11] : LOAD psq_l fp12, 16(m), 0, 0 // decrement loop count due to unrolling subi count, count, 1 // fp11 [m02][1.0F] : LOAD psq_l fp11, 8(m), 1, 0 // fp0 [m00][m10] ps_merge00 fp0, fp13, fp12 // base pointer adjustment subi dstBase, dstBase, 4 // fp10 [m12][1.0F] : LOAD psq_l fp10, 24(m), 1, 0 // fp1 [m01][m11] ps_merge11 fp1, fp13, fp12 // loop counter mtctr count // fp3 [m20][m21] : LOAD psq_l fp3, 32(m), 0, 0 // fp2 [m02][m12] ps_merge00 fp2, fp11, fp10 // fp4 [m22][1.0F] : LOAD psq_l fp4, 40(m), 1, 0 // fp6 [v0][v1] : LOAD psq_l fp6, 0(srcBase), 0, 0 // fp7 [v2][1.0F] : LOAD psq_lu fp7, 8(srcBase), 1, 0 // fp8 [m00*v0][m10*v0] ps_muls0 fp8, fp0, fp6 // fp9 [m20*v0][m21*v1] ps_mul fp9, fp3, fp6 // fp8 [m00*v0+m01*v1][m10*v0+m11*v1] ps_madds1 fp8, fp1, fp6, fp8 // fp10 [m20*v0+m22*v2][?] ps_madd fp10, fp4, fp7, fp9 _mloop: //-------- Unrolled loop -------- // fp6 [v0][v1] : LOAD psq_lu fp6, 4(srcBase), 0, 0 // fp12 [m00*v0+m01*v1+m02*v2][m10*v0+m11*v1+m12*v2] ps_madds0 fp12, fp2, fp7, fp8 // fp7 [v2][1.0F] : LOAD psq_lu fp7, 8(srcBase), 1, 0 // fp13 [m20*v0+m21*v1+m22*v2][?] ps_sum0 fp13, fp10, fp9, fp9 // fp8 [m00*v0][m10*v0] ps_muls0 fp8, fp0, fp6 // fp9 [m20*v0][m21*v1] ps_mul fp9, fp3, fp6 // fp12 [v0'][v1'] : STORE psq_stu fp12, 4(dstBase), 0, 0 // fp8 [m00*v0+m01*v1][m10*v0+m11*v1] ps_madds1 fp8, fp1, fp6, fp8 // fp13 [v2'][ ? ] : STORE psq_stu fp13, 8(dstBase), 1, 0 // fp10 [m20*v0+m22*v2][?] ps_madd fp10, fp4, fp7, fp9 // LOOP bdnz _mloop // fp12 [m00*v0+m01*v1+m02*v2][m10*v0+m11*v1+m12*v2] ps_madds0 fp12, fp2, fp7, fp8 // fp13 [m20*v0+m21*v1+m22*v2][?] ps_sum0 fp13, fp10, fp9, fp9 // fp12 [v0'][v1'] : STORE psq_stu fp12, 4(dstBase), 0, 0 // fp13 [v2'][ ? ] : STORE psq_stu fp13, 8(dstBase), 1, 0 blr } #endif // GEKKO /*===========================================================================*/