mtx44.c - OpenGrok cross reference for /RvlSDK-3.2.2/build/libraries/mtx/src/mtx44.c

/*---------------------------------------------------------------------------*
  Project: matrix vector Library
  File:    mtx44.c

  Copyright 1998-2006 Nintendo.  All rights reserved.

  These coded instructions, statements, and computer programs contain
  proprietary information of Nintendo of America Inc. and/or Nintendo
  Company Ltd., and are protected by Federal copyright law.  They may
  not be disclosed to third parties or copied or duplicated in any form,
  in whole or in part, without the prior written consent of Nintendo.


  $Log: mtx44.c,v $
  Revision 1.4  02/20/2006 04:25:42  mitu
  changed include path from dolphin/ to revolution/.

  Revision 1.3  01/11/2006 12:17:58  hirose
  Removed obsolete flags.

  Revision 1.2  12/30/2005 06:01:44  hirose
  Temporary workaround for CW3.0 Alpha3 problem.


    9     02/04/11 13:10 Hirose
    const type specifier support. (worked by Hiratsu@IRD)

    8     02/02/18 9:27 Hirose
    Workaround for floating-point precision mismatch issue.

    7     9/18/01 1:38p Hirose
    Fixed PSMTX44RotRad definition. Corrected wrong argument types.

    6     8/30/01 10:36p Hirose
    Added C_MTX44Inverse.

    5     7/30/01 11:00p Hirose
    Fixed MAC build errors regarding missing "#ifdef GEKKO".

    4     7/30/01 10:22p Hirose
    Added 4x4 model matrix section.

    3     7/24/01 6:03p Hirose

    2     7/09/01 11:18p Hirose
    added general 4x4 matrix functions. (original made by Ohki-san@NTSC.)

    1     2/22/01 11:54p Hirose
    This section is moved from mtx.c

  $NoKeywords: $
 *---------------------------------------------------------------------------*/

#include <math.h>
#include <revolution/mtx.h>
#include <revolution/mtx/mtx44ext.h>
#include "mtxAssert.h"
#include "mtx44extAssert.h"

static f32 mtxUnit[] = {0.0f, 1.0f, 0.5f, 3.0f};

#if ( __MWERKS__ == 0x00004100 )
#pragma defer_codegen on    // This will be removed when compiler is fixed.
#endif

/*---------------------------------------------------------------------*


                             PROJECTION SECTION


 *---------------------------------------------------------------------*/


/*---------------------------------------------------------------------*

Name:           MTXFrustum

Description:    compute a 4x4 perspective projection matrix from a
                specified view volume.


Arguments:      m:        	4x4 matrix to be set

                t:        	top coord. of view volume at the near clipping plane

                b:        	bottom coord of view volume at the near clipping plane

                l:        	left coord. of view volume at near clipping plane

                r:        	right coord. of view volume at near clipping plane

                n:        	positive distance from camera to near clipping plane

                f:        	positive distance from camera to far clipping plane


Return:         none

 *---------------------------------------------------------------------*/
/*---------------------------------------------------------------------*
    C version
 *---------------------------------------------------------------------*/
void C_MTXFrustum ( Mtx44 m, f32 t, f32 b, f32 l, f32 r, f32 n, f32 f )
{
    f32 tmp;


    ASSERTMSG( (m != 0),  MTX_FRUSTUM_1     );
    ASSERTMSG( (t != b),  MTX_FRUSTUM_2     );
    ASSERTMSG( (l != r),  MTX_FRUSTUM_3     );
    ASSERTMSG( (n != f),  MTX_FRUSTUM_4     );


    // NOTE: Be careful about "l" vs. "1" below!!!

    tmp     =  1.0f / (r - l);
    m[0][0] =  (2*n) * tmp;
    m[0][1] =  0.0f;
    m[0][2] =  (r + l) * tmp;
    m[0][3] =  0.0f;

    tmp     =  1.0f / (t - b);
    m[1][0] =  0.0f;
    m[1][1] =  (2*n) * tmp;
    m[1][2] =  (t + b) * tmp;
    m[1][3] =  0.0f;

    m[2][0] =  0.0f;
    m[2][1] =  0.0f;

    tmp     =  1.0f / (f - n);

    // scale z to (-w, 0) range
    m[2][2] = -(n) * tmp;
    m[2][3] = -(f*n) * tmp;

    m[3][0] =  0.0f;
    m[3][1] =  0.0f;
    m[3][2] = -1.0f;
    m[3][3] =  0.0f;


}

/*---------------------------------------------------------------------*

Name:           MTXPerspective

Description:    compute a 4x4 perspective projection matrix from
                field of view and aspect ratio.


Arguments:      m:       	4x4 matrix to be set

                fovy:    	total field of view in in degrees in the YZ plane

                aspect:  	ratio of view window width:height (X / Y)

                n:       	positive distance from camera to near clipping plane

                f:       	positive distance from camera to far clipping plane


Return:         none

 *---------------------------------------------------------------------*/
/*---------------------------------------------------------------------*
    C version
 *---------------------------------------------------------------------*/
void C_MTXPerspective ( Mtx44 m, f32 fovY, f32 aspect, f32 n, f32 f )
{
    f32 angle;
    f32 cot;
    f32 tmp;


    ASSERTMSG( (m != 0),                             MTX_PERSPECTIVE_1    );
    ASSERTMSG( ( (fovY > 0.0) && ( fovY < 180.0) ),  MTX_PERSPECTIVE_2    );
    ASSERTMSG( (aspect != 0),                        MTX_PERSPECTIVE_3    );


    // find the cotangent of half the (YZ) field of view
    angle = fovY * 0.5f;
    angle = MTXDegToRad( angle );

    cot = 1.0f / tanf(angle);


    m[0][0] =  cot / aspect;
    m[0][1] =  0.0f;
    m[0][2] =  0.0f;
    m[0][3] =  0.0f;

    m[1][0] =  0.0f;
    m[1][1] =   cot;
    m[1][2] =  0.0f;
    m[1][3] =  0.0f;

    m[2][0] =  0.0f;
    m[2][1] =  0.0f;

    tmp     = 1.0f / (f - n);

    // scale z to (-w, 0) range
    m[2][2] = -(n) * tmp;
    m[2][3] = -(f*n) * tmp;

    m[3][0] =  0.0f;
    m[3][1] =  0.0f;
    m[3][2] = -1.0f;
    m[3][3] =  0.0f;
}

/*---------------------------------------------------------------------*

Name:           MTXOrtho

Description:    compute a 4x4 orthographic projection matrix.


Arguments:      m:        	4x4 matrix to be set

                t:        	top coord. of parallel view volume

                b:        	bottom coord of parallel view volume

                l:        	left coord. of parallel view volume

                r:        	right coord. of parallel view volume

                n:        	positive distance from camera to near clipping plane

                f:        	positive distance from camera to far clipping plane


Return:         none

 *---------------------------------------------------------------------*/
/*---------------------------------------------------------------------*
    C version
 *---------------------------------------------------------------------*/
void C_MTXOrtho ( Mtx44 m, f32 t, f32 b, f32 l, f32 r, f32 n, f32 f )
{
    f32 tmp;


    ASSERTMSG( (m != 0),  MTX_ORTHO_1  );
    ASSERTMSG( (t != b),  MTX_ORTHO_2  );
    ASSERTMSG( (l != r),  MTX_ORTHO_3  );
    ASSERTMSG( (n != f),  MTX_ORTHO_4  );


    // NOTE: Be careful about "l" vs. "1" below!!!

    tmp     =  1.0f / (r - l);
    m[0][0] =  2.0f * tmp;
    m[0][1] =  0.0f;
    m[0][2] =  0.0f;
    m[0][3] = -(r + l) * tmp;

    tmp     =  1.0f / (t - b);
    m[1][0] =  0.0f;
    m[1][1] =  2.0f * tmp;
    m[1][2] =  0.0f;
    m[1][3] = -(t + b) * tmp;

    m[2][0] =  0.0f;
    m[2][1] =  0.0f;

    tmp     =  1.0f / (f - n);

    // scale z to (-1, 0) range
    m[2][2] = -(1.0f) * tmp;
    m[2][3] = -(f) * tmp;

    m[3][0] =  0.0f;
    m[3][1] =  0.0f;
    m[3][2] =  0.0f;
    m[3][3] =  1.0f;
}

/*---------------------------------------------------------------------*


                             GENERAL SECTION


 *---------------------------------------------------------------------*/
/* NOTE: Prototypes for these functions are defined in "mtx44ext.h".   */


/*---------------------------------------------------------------------*
Name:           MTX44Identity

Description:    sets a matrix to identity

Arguments:      m :  matrix to be set

Return:         none

 *---------------------------------------------------------------------*/
/*---------------------------------------------------------------------*
    C version
 *---------------------------------------------------------------------*/
void C_MTX44Identity( Mtx44 m )
{

    ASSERTMSG( (m != 0), MTX44_IDENTITY_1 );


    m[0][0] = 1.0f; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = 0.0f;

    m[1][0] = 0.0f; m[1][1] = 1.0f; m[1][2] = 0.0f; m[1][3] = 0.0f;

    m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = 1.0f; m[2][3] = 0.0f;

    m[3][0] = 0.0f; m[3][1] = 0.0f; m[3][2] = 0.0f; m[3][3] = 1.0f;

}

/*---------------------------------------------------------------------*
    Paired-Single assembler version
 *---------------------------------------------------------------------*
            Note that this performs NO error checking.
            Actually there is not much performance advantage.
 *---------------------------------------------------------------------*/
#ifdef  GEKKO
void PSMTX44Identity( register Mtx44 m )
{
    register f32 c1 = 1.0F;
    register f32 c0 = 0.0F;

    asm
    {
        stfs        c1,  0(m);
        psq_st      c0,  4(m), 0, 0;
        psq_st      c0, 12(m), 0, 0;
        stfs        c1, 20(m);
        psq_st      c0, 24(m), 0, 0;
        psq_st      c0, 32(m), 0, 0;
        stfs        c1, 40(m);
        psq_st      c0, 44(m), 0, 0;
        psq_st      c0, 52(m), 0, 0;
        stfs        c1, 60(m);
    }
}
#endif // GEKKO

/*---------------------------------------------------------------------*
Name:           MTX44Copy

Description:    copies the contents of one matrix into another

Arguments:      src:        	source matrix for copy
                dst:        	destination matrix for copy


Return:         none
 *---------------------------------------------------------------------*/
/*---------------------------------------------------------------------*
    C version
 *---------------------------------------------------------------------*/
void C_MTX44Copy( const Mtx44 src, Mtx44 dst )
{

    ASSERTMSG( (src != 0) , MTX44_COPY_1 );
    ASSERTMSG( (dst != 0) , MTX44_COPY_2 );

    if( src == dst )
    {
        return;
    }


    dst[0][0] = src[0][0]; dst[0][1] = src[0][1]; dst[0][2] = src[0][2]; dst[0][3] = src[0][3];

    dst[1][0] = src[1][0]; dst[1][1] = src[1][1]; dst[1][2] = src[1][2]; dst[1][3] = src[1][3];

    dst[2][0] = src[2][0]; dst[2][1] = src[2][1]; dst[2][2] = src[2][2]; dst[2][3] = src[2][3];

    dst[3][0] = src[3][0]; dst[3][1] = src[3][1]; dst[3][2] = src[3][2]; dst[3][3] = src[3][3];

}

/*---------------------------------------------------------------------*
    Paired-Single assembler version
 *---------------------------------------------------------------------*
                Note that this performs NO error checking.
 *---------------------------------------------------------------------*/
#ifdef  GEKKO
asm void PSMTX44Copy( const register Mtx44 src, register Mtx44 dst )
{
    nofralloc;
    psq_l       fp1,  0(src), 0, 0;
    psq_st      fp1,  0(dst), 0, 0;
    psq_l       fp1,  8(src), 0, 0;
    psq_st      fp1,  8(dst), 0, 0;
    psq_l       fp1, 16(src), 0, 0;
    psq_st      fp1, 16(dst), 0, 0;
    psq_l       fp1, 24(src), 0, 0;
    psq_st      fp1, 24(dst), 0, 0;
    psq_l       fp1, 32(src), 0, 0;
    psq_st      fp1, 32(dst), 0, 0;
    psq_l       fp1, 40(src), 0, 0;
    psq_st      fp1, 40(dst), 0, 0;
    psq_l       fp1, 48(src), 0, 0;
    psq_st      fp1, 48(dst), 0, 0;
    psq_l       fp1, 56(src), 0, 0;
    psq_st      fp1, 56(dst), 0, 0;
    blr;
}
#endif  // GEKKO


/*---------------------------------------------------------------------*
Name:           MTX44Concat

Description:    concatenates two matrices.
                order of operation is A x B = AB.
                ok for any of ab == a == b.

                saves a MTXCopy operation if ab != to a or b.

Arguments:      a:        	first matrix for concat.
                b:        	second matrix for concat.
                ab:       	resultant matrix from concat.

Return:         none
 *---------------------------------------------------------------------*/
/*---------------------------------------------------------------------*
    C version
 *---------------------------------------------------------------------*/
void C_MTX44Concat( const Mtx44 a, const Mtx44 b, Mtx44 ab )
{
    Mtx44       mTmp;
    Mtx44Ptr    m;

    ASSERTMSG( (a  != 0), MTX44_CONCAT_1 );
    ASSERTMSG( (b  != 0), MTX44_CONCAT_2 );
    ASSERTMSG( (ab != 0), MTX44_CONCAT_3 );

    if( (ab == a) || (ab == b) )
    {
        m = mTmp;
    }
    else
    {
        m = ab;
    }

    // compute (a x b) -> m

    m[0][0] = a[0][0]*b[0][0] + a[0][1]*b[1][0] + a[0][2]*b[2][0] + a[0][3]*b[3][0];
    m[0][1] = a[0][0]*b[0][1] + a[0][1]*b[1][1] + a[0][2]*b[2][1] + a[0][3]*b[3][1];
    m[0][2] = a[0][0]*b[0][2] + a[0][1]*b[1][2] + a[0][2]*b[2][2] + a[0][3]*b[3][2];
    m[0][3] = a[0][0]*b[0][3] + a[0][1]*b[1][3] + a[0][2]*b[2][3] + a[0][3]*b[3][3];

    m[1][0] = a[1][0]*b[0][0] + a[1][1]*b[1][0] + a[1][2]*b[2][0] + a[1][3]*b[3][0];
    m[1][1] = a[1][0]*b[0][1] + a[1][1]*b[1][1] + a[1][2]*b[2][1] + a[1][3]*b[3][1];
    m[1][2] = a[1][0]*b[0][2] + a[1][1]*b[1][2] + a[1][2]*b[2][2] + a[1][3]*b[3][2];
    m[1][3] = a[1][0]*b[0][3] + a[1][1]*b[1][3] + a[1][2]*b[2][3] + a[1][3]*b[3][3];

    m[2][0] = a[2][0]*b[0][0] + a[2][1]*b[1][0] + a[2][2]*b[2][0] + a[2][3]*b[3][0];
    m[2][1] = a[2][0]*b[0][1] + a[2][1]*b[1][1] + a[2][2]*b[2][1] + a[2][3]*b[3][1];
    m[2][2] = a[2][0]*b[0][2] + a[2][1]*b[1][2] + a[2][2]*b[2][2] + a[2][3]*b[3][2];
    m[2][3] = a[2][0]*b[0][3] + a[2][1]*b[1][3] + a[2][2]*b[2][3] + a[2][3]*b[3][3];

    m[3][0] = a[3][0]*b[0][0] + a[3][1]*b[1][0] + a[3][2]*b[2][0] + a[3][3]*b[3][0];
    m[3][1] = a[3][0]*b[0][1] + a[3][1]*b[1][1] + a[3][2]*b[2][1] + a[3][3]*b[3][1];
    m[3][2] = a[3][0]*b[0][2] + a[3][1]*b[1][2] + a[3][2]*b[2][2] + a[3][3]*b[3][2];
    m[3][3] = a[3][0]*b[0][3] + a[3][1]*b[1][3] + a[3][2]*b[2][3] + a[3][3]*b[3][3];

    // overwrite a or b if needed
    if(m == mTmp)
    {
        C_MTX44Copy( mTmp, ab );
    }

}


/*---------------------------------------------------------------------*
    Paired-Single assembler version
 *---------------------------------------------------------------------*
                Note that this performs NO error checking.
 *---------------------------------------------------------------------*/
#ifdef  GEKKO
asm void PSMTX44Concat(
    const register Mtx44 a,
    const register Mtx44 b,
          register Mtx44 ab
)
{
    nofralloc;
    psq_l       fp0 ,  0(a), 0, 0;          // a00,a01
    psq_l       fp2 ,  0(b), 0, 0;          // b00,b01
    ps_muls0    fp6 ,   fp2,  fp0;          // b00a00,b01a00
    psq_l       fp3 , 16(b), 0, 0;          // b10,b11
    psq_l       fp4 , 32(b), 0, 0;          // b20,b21
    ps_madds1   fp6 ,   fp3,  fp0,  fp6;    // b00a00+b10a01,b01a00+b11a01
    psq_l       fp1 ,  8(a), 0, 0;          // a02,a03
    psq_l       fp5 , 48(b), 0, 0;          // b30,b31

    // b00a00+b10a01+b20a02,b01a00+b11a01+b21a02
    ps_madds0   fp6 ,   fp4,  fp1,  fp6;
    psq_l       fp0 , 16(a), 0, 0;          // a10,a11

    // b00a00+b10a01+b20a02+b30a03,b01a00+b11a01+b21a02+b31a03
    ps_madds1   fp6 ,   fp5,  fp1,  fp6;
    psq_l       fp1 , 24(a), 0, 0;          // a12,a13
    ps_muls0    fp8 ,   fp2,  fp0;          // b00a10,b01a10
    ps_madds1   fp8 ,   fp3,  fp0,  fp8;    // b00a10+b10a11,b01a11+b11a11
    psq_l       fp0 , 32(a), 0, 0;          // a20,a21

    // b00a10+b10a11+b20a12,b01a11+b11a11+b21a12
    ps_madds0   fp8 ,   fp4,  fp1,  fp8;

    // b00a10+b10a11+b20a12+b30a13,b01a10+b11a11+b21a12+b31a13
    ps_madds1   fp8 ,   fp5,  fp1,  fp8;
    psq_l       fp1 , 40(a), 0, 0;          // a22,a23
    ps_muls0    fp10,   fp2,  fp0;          // b00a20,b01a20
    ps_madds1   fp10,   fp3,  fp0, fp10;    // b00a20+b10a21,b01a20+b11a21
    psq_l       fp0 , 48(a), 0, 0;          // a30,a31

    // b00a20+b10a21+b20a22,b01a20+b11a21+b21a22
    ps_madds0   fp10,   fp4,  fp1, fp10;

    // b00a20+b10a21+b20a22+b30a23,b01a20+b11a21+b21a22+b31a23
    ps_madds1   fp10,   fp5,  fp1, fp10;
    psq_l       fp1 , 56(a), 0, 0;          // a32,a33

    ps_muls0    fp12,   fp2,  fp0;          // b00a30,b01a30
    psq_l       fp2 ,  8(b), 0, 0;          // b02,b03
    ps_madds1   fp12,   fp3,  fp0, fp12;    // b00a30+b10a31,b01a30+b11a31
    psq_l       fp0 ,  0(a), 0, 0;          // a00,a01

    // b00a30+b10a31+b20a32,b01a30+b11a31+b21a32
    ps_madds0   fp12,   fp4,  fp1, fp12;
    psq_l       fp3 , 24(b), 0, 0;          // b12,b13

    // b00a30+b10a31+b20a32+b30a33,b01a30+b11a31+b21a32+b31a33
    ps_madds1   fp12,   fp5,  fp1, fp12;
    psq_l       fp1 ,  8(a), 0, 0;          // a02,a03

    ps_muls0    fp7 ,   fp2,  fp0;          // b02a00,b03a00
    psq_l       fp4 , 40(b), 0, 0;          // b22,b23
    ps_madds1   fp7 ,   fp3,  fp0, fp7;     // b02a00+b12a01,b03a00+b13a01
    psq_l       fp5 , 56(b), 0, 0;          // b32,b33

    // b02a00+b12a01+b22a02,b03a00+b13a01+b23a02
    ps_madds0   fp7 ,   fp4,  fp1, fp7;

    psq_l       fp0 , 16(a), 0, 0;          // a10,a11

    // b02a00+b12a01+b22a02+b32a03,b03a00+b13a01+b23a02+b33a03
    ps_madds1   fp7 ,   fp5,  fp1, fp7;
    psq_l       fp1 , 24(a), 0, 0;          // a12,a13

    ps_muls0    fp9 ,   fp2,  fp0;          // b02a10,b03a10
    psq_st      fp6 , 0(ab), 0, 0;          // ab00,ab01
    ps_madds1   fp9 ,   fp3,  fp0, fp9;     // b02a10+b12a11,b03a10+b13a11
    psq_l       fp0 , 32(a), 0, 0;          // a20,a21

    // b02a10+b12a11+b22a12,b03a10+b13a11+b23a12
    ps_madds0   fp9,    fp4,  fp1, fp9;
    psq_st      fp8 ,16(ab), 0, 0;          // ab10,ab11

    // b02a10+b12a11+b22a12+b32a13,b03a10+b13a11+b23a12+b33a13
    ps_madds1   fp9 ,   fp5,  fp1, fp9;
    psq_l       fp1 , 40(a), 0, 0;          // a22,a23
    ps_muls0    fp11,   fp2,  fp0;          // b02a20,b03a20
    psq_st      fp10,32(ab), 0, 0;          // ab20,ab21
    ps_madds1   fp11,   fp3,  fp0, fp11;    // b02a20+b12a21,b03a20+b13a21
    psq_l       fp0 , 48(a), 0, 0;          // a30,a31

    // b02a20+b12a21+b22a22,b03a20+b13a21+b23a22
    ps_madds0   fp11,   fp4,  fp1, fp11;
    psq_st      fp12,48(ab), 0, 0;          // ab30,ab31

    // b02a20+b12a21+b22a22+b32a23,b03a20+b13a21+b23a22+b33a23
    ps_madds1   fp11,   fp5,  fp1, fp11;

    psq_l       fp1,  56(a), 0, 0;          // a32,a33
    ps_muls0    fp13,   fp2,  fp0;          // b02a30,b03a30
    psq_st      fp7 , 8(ab), 0, 0;          // ab02,ab03
    ps_madds1   fp13,   fp3,  fp0, fp13;    // b02a30+b12a31,b03a30+b13a31
    psq_st      fp9 ,24(ab), 0, 0;          // ab12,ab13

    // b02a30+b12a31+b22a32,b03a30+b13a31+b23a32
    ps_madds0   fp13,   fp4,  fp1, fp13;
    psq_st      fp11,40(ab), 0, 0;          // ab22,ab23

    // b02a30+b12a31+b22a32+b32a33,b03a30+b13a31+b23a32+b33a33
    ps_madds1   fp13,   fp5,  fp1, fp13;

    psq_st      fp13,56(ab), 0, 0;          // ab32,ab33
    blr

}
#endif  // GEKKO

/*---------------------------------------------------------------------*
Name:           MTX44Transpose

Description:    computes the transpose of a matrix.

Arguments:      src:       	source matrix.
                xPose:     	destination (transposed) matrix.
                          ok if src == xPose.

Return:         none
 *---------------------------------------------------------------------*/
/*---------------------------------------------------------------------*
    C version
 *---------------------------------------------------------------------*/
void C_MTX44Transpose ( const Mtx44 src, Mtx44 xPose )
{
    Mtx44       mTmp;
    Mtx44Ptr    m;

    ASSERTMSG( (src   != 0), MTX44_TRANSPOSE_1  );
    ASSERTMSG( (xPose != 0), MTX44_TRANSPOSE_2  );

    if(src == xPose)
    {
        m = mTmp;
    }
    else
    {
        m = xPose;
    }


    m[0][0] = src[0][0];    m[0][1] = src[1][0];    m[0][2] = src[2][0];    m[0][3] = src[3][0];
    m[1][0] = src[0][1];    m[1][1] = src[1][1];    m[1][2] = src[2][1];    m[1][3] = src[3][1];
    m[2][0] = src[0][2];    m[2][1] = src[1][2];    m[2][2] = src[2][2];    m[2][3] = src[3][2];
    m[3][0] = src[0][3];    m[3][1] = src[1][3];    m[3][2] = src[2][3];    m[3][3] = src[3][3];

    // copy back if needed
    if( m == mTmp )
    {
        MTX44Copy( mTmp, xPose );
    }
}
/*---------------------------------------------------------------------*
    Paired-Single assembler version
 *---------------------------------------------------------------------*
                Note that this performs NO error checking.
 *---------------------------------------------------------------------*/
#ifdef  GEKKO
asm void PSMTX44Transpose ( const register Mtx44 src, register Mtx44 xPose )
{
    nofralloc;
    psq_l       fp0,  0(src), 0, 0;     // fp0 <= s00,s01
    psq_l       fp1, 16(src), 0, 0;     // fp1 <= s10,s11

    ps_merge00  fp4, fp0, fp1;              // fp4 <= t00,t10
    psq_l       fp2,  8(src), 0, 0;     // fp2 <= s02,s03
    psq_st      fp4,  0(xPose), 0, 0;

    ps_merge11  fp5, fp0, fp1;              // fp5 <= t01,t11
    psq_l       fp3, 24(src), 0, 0;     // fp3 <= s12,s13
    psq_st      fp5, 16(xPose), 0, 0;

    ps_merge00  fp4, fp2, fp3;              // fp4 <= t02,t12
    psq_l       fp0, 32(src), 0, 0;     // fp0 <= s20,s21
    psq_st      fp4, 32(xPose), 0, 0;

    ps_merge11  fp5, fp2, fp3;              // fp5 <= t03,t13
    psq_l       fp1, 48(src), 0, 0;     // fp1 <= s30,s31
    psq_st      fp5, 48(xPose), 0, 0;

    ps_merge00  fp4, fp0, fp1;              // fp4 <= t20,t30
    psq_l       fp2, 40(src), 0, 0;     // fp2 <= s22,s23
    psq_st      fp4,  8(xPose), 0, 0;

    ps_merge11  fp5, fp0, fp1;              // fp5 <= t21,t31
    psq_l       fp3, 56(src), 0, 0;     // fp2 <= s32,s33
    psq_st      fp5, 24(xPose), 0, 0;

    ps_merge00  fp4, fp2, fp3;              // fp4 <= s22,s32
    psq_st      fp4, 40(xPose), 0, 0;

    ps_merge11  fp5, fp2, fp3;              // fp5 <= s23,s33
    psq_st      fp5, 56(xPose), 0, 0;

    blr;

}
#endif  // GEKKO


/*---------------------------------------------------------------------*
Name:           MTX44Inverse

Description:    computes a fast inverse of a matrix.
                uses Gauss-Jordan(with partial pivoting)

Arguments:      src:       	source matrix.
                inv:       	destination (inverse) matrix.
                          ok if src == inv.

Return:         0 if src is not invertible.
                1 on success.
 *---------------------------------------------------------------------*/
/*---------------------------------------------------------------------*
    C version only
 *---------------------------------------------------------------------*/
#define NUM         4
#define SWAPF(a,b)  { f32 tmp; tmp = (a); (a) = (b); (b)=tmp; }

u32 C_MTX44Inverse( const Mtx44 src, Mtx44 inv )
{
    Mtx44       gjm;
    s32         i, j, k;
    f32         w;

    ASSERTMSG( (src != 0), MTX44_INVERSE_1 );
    ASSERTMSG( (inv != 0), MTX44_INVERSE_2 );

    MTX44Copy(src, gjm);
    MTX44Identity(inv);

    for ( i = 0 ; i < NUM ; ++i )
    {
        f32 max = 0.0f;
        s32 swp = i;

        // ---- partial pivoting -----
        for( k = i ; k < NUM ; k++ )
        {
            f32 ftmp;
            ftmp = fabsf(gjm[k][i]);
            if ( ftmp > max )
            {
                max = ftmp;
                swp = k;
            }
        }

        // check singular matrix
        //(or can't solve inverse matrix with this algorithm)
        if ( max == 0.0f )
        {
            return 0;
        }

        // swap row
        if( swp != i )
        {
            for ( k = 0 ; k < NUM ; k++ )
            {
                SWAPF(gjm[i][k], gjm[swp][k]);
                SWAPF(inv[i][k], inv[swp][k]);
            }
        }

        // ---- pivoting end ----

        w = 1.0F / gjm[i][i];
        for ( j = 0 ; j < NUM ; ++j )
        {
            gjm[i][j] *= w;
            inv[i][j] *= w;
        }

        for ( k = 0 ; k < NUM ; ++k )
        {
            if ( k == i )
                continue;

            w = gjm[k][i];
            for ( j = 0 ; j < NUM ; ++j )
            {
                gjm[k][j] -= gjm[i][j] * w;
                inv[k][j] -= inv[i][j] * w;
            }
        }


    }

    return 1;
}

#undef SWAPF
#undef NUM

/*---------------------------------------------------------------------*


                             MODEL SECTION


 *---------------------------------------------------------------------*/
/* NOTE: Prototypes for these functions are defined in "mtx44ext.h".   */

/*---------------------------------------------------------------------*
Name:           MTX44Trans

Description:    sets a translation matrix.

Arguments:       m:        	matrix to be set
                xT:        	x component of translation.
                yT:        	y component of translation.
                zT:        	z component of translation.

Return:         none
 *---------------------------------------------------------------------*/
/*---------------------------------------------------------------------*
    C version
 *---------------------------------------------------------------------*/
void C_MTX44Trans ( Mtx44 m, f32 xT, f32 yT, f32 zT )
{
    ASSERTMSG( (m != 0), MTX44_TRANS_1 );


    m[0][0] = 1.0f;     m[0][1] = 0.0f;  m[0][2] = 0.0f;  m[0][3] =  xT;
    m[1][0] = 0.0f;     m[1][1] = 1.0f;  m[1][2] = 0.0f;  m[1][3] =  yT;
    m[2][0] = 0.0f;     m[2][1] = 0.0f;  m[2][2] = 1.0f;  m[2][3] =  zT;
    m[3][0] = 0.0f;     m[3][1] = 0.0f;  m[3][2] = 0.0f;  m[3][3] =  1.0f;

}

/*---------------------------------------------------------------------*
    Paired-Single assembler version
 *---------------------------------------------------------------------*
                Note that this performs NO error checking.
 *---------------------------------------------------------------------*/
#ifdef  GEKKO
void PSMTX44Trans (
    register Mtx44 m,
    register f32 xT,
    register f32 yT,
    register f32 zT
)
{
    register f32 c_zero = 0.0F;
    register f32 c_one  = 1.0F;
    register f32 c_01;

    asm
    {
        stfs        xT,     12(m);              // m03
        stfs        yT,     28(m);              // m13
        ps_merge00  c_01,   c_zero, c_one;      // c_01 <- 0.0, 1.0
        stfs        zT,     44(m);              // m23
        psq_st      c_one,   0(m), 1, 0;        // m00
        psq_st      c_zero,  4(m), 0, 0;        // m01,m02
        psq_st      c_01,   16(m), 0, 0;        // m10,m11
        psq_st      c_zero, 24(m), 1, 0;        // m12
        psq_st      c_zero, 32(m), 0, 0;        // m20,m21
        psq_st      c_one,  40(m), 1, 0;        // m22
        psq_st      c_zero, 48(m), 0, 0;        // m30,m31
        psq_st      c_01,   56(m), 0, 0;        // m32,m33
    }
}
#endif  // GEKKO

/*---------------------------------------------------------------------*
Name:           MTX44TransApply

Description:    This function performs the operation equivalent to
                MTXTrans + MTXConcat.

Arguments:      src:       	matrix to be operated.
                dst:       	resultant matrix from concat.
                xT:        	x component of translation.
                yT:        	y component of translation.
                zT:        	z component of translation.

Return:         none
 *---------------------------------------------------------------------*/
/*---------------------------------------------------------------------*
    C version
 *---------------------------------------------------------------------*/
void C_MTX44TransApply ( const Mtx44 src, Mtx44 dst, f32 xT, f32 yT, f32 zT )
{
    ASSERTMSG( (src != 0), MTX44_TRANSAPPLY_1 );
    ASSERTMSG( (dst != 0), MTX44_TRANSAPPLY_1 );

    if ( src != dst )
    {
        dst[0][0] = src[0][0];    dst[0][1] = src[0][1];    dst[0][2] = src[0][2];
        dst[1][0] = src[1][0];    dst[1][1] = src[1][1];    dst[1][2] = src[1][2];
        dst[2][0] = src[2][0];    dst[2][1] = src[2][1];    dst[2][2] = src[2][2];
        dst[3][0] = src[3][0];    dst[3][1] = src[3][1];    dst[3][2] = src[3][2];
        dst[3][3] = src[3][3];
    }

    dst[0][3] = src[0][3] + xT;
    dst[1][3] = src[1][3] + yT;
    dst[2][3] = src[2][3] + zT;

}

/*---------------------------------------------------------------------*
    Paired-Single assembler version
 *---------------------------------------------------------------------*
                Note that this performs NO error checking.
 *---------------------------------------------------------------------*/
#ifdef  GEKKO
asm void PSMTX44TransApply (
    const register Mtx44 src,
          register Mtx44 dst,
          register f32 xT,
          register f32 yT,
          register f32 zT
)
{
    nofralloc;
    psq_l       fp4, 0(src),     0, 0;
    frsp        xT, xT;                         // to make sure xS = single precision
    psq_l       fp5, 8(src),     0, 0;
    frsp        yT, yT;                         // to make sure yS = single precision
    psq_l       fp6, 16(src),    0, 0;
    frsp        zT, zT;                         // to make sure zS = single precision
    psq_l       fp7, 24(src),    0, 0;
    psq_st      fp4, 0(dst),     0, 0;
    ps_sum1     fp5, xT, fp5, fp5;
    psq_l       fp4, 40(src),    0, 0;
    psq_st      fp6, 16(dst),    0, 0;
    ps_sum1     fp7, yT, fp7, fp7;
    psq_l       fp8, 32(src),    0, 0;
    psq_st      fp5, 8(dst),     0, 0;
    ps_sum1     fp4, zT, fp4, fp4;
    psq_st      fp7, 24(dst),    0, 0;
    psq_st      fp8, 32(dst),    0, 0;
    psq_l       fp5, 48(src),    0, 0;
    psq_l       fp6, 56(src),    0, 0;
    psq_st      fp4, 40(dst),    0, 0;
    psq_st      fp5, 48(dst),    0, 0;
    psq_st      fp6, 56(dst),    0, 0;
    blr;

}
#endif  // GEKKO

/*---------------------------------------------------------------------*
Name:            MTX44Scale

Description:     sets a scaling matrix.

Arguments:       m:        	matrix to be set
                xS:        	x scale factor.
                yS:        	y scale factor.
                zS:        	z scale factor.

Return:         none
 *---------------------------------------------------------------------*/
/*---------------------------------------------------------------------*
    C version
 *---------------------------------------------------------------------*/
void C_MTX44Scale ( Mtx44 m, f32 xS, f32 yS, f32 zS )
{
    ASSERTMSG( (m != 0), MTX44_SCALE_1 );


    m[0][0] = xS;      m[0][1] = 0.0f;  m[0][2] = 0.0f;  m[0][3] = 0.0f;
    m[1][0] = 0.0f;    m[1][1] = yS;    m[1][2] = 0.0f;  m[1][3] = 0.0f;
    m[2][0] = 0.0f;    m[2][1] = 0.0f;  m[2][2] = zS;    m[2][3] = 0.0f;
    m[3][0] = 0.0f;    m[3][1] = 0.0f;  m[3][2] = 0.0f;  m[3][3] = 1.0f;
}

/*---------------------------------------------------------------------*
    Paired-Single assembler version
 *---------------------------------------------------------------------*
                Note that this performs NO error checking.
 *---------------------------------------------------------------------*/
#ifdef  GEKKO
void PSMTX44Scale (
    register Mtx44 m,
    register f32 xS,
    register f32 yS,
    register f32 zS
)
{
    register f32 c_zero = 0.0F;
    register f32 c_one  = 1.0F;

    asm
    {
        stfs        xS,      0(m);
        psq_st      c_zero,  4(m), 0, 0;        // m01,m02
        psq_st      c_zero, 12(m), 0, 0;        // m03,m10
        stfs        yS,     20(m);              // m11
        psq_st      c_zero, 24(m), 0, 0;        // m12,m13
        psq_st      c_zero, 32(m), 0, 0;        // m20,m21
        stfs        zS,     40(m);              // m22
        psq_st      c_zero, 44(m), 0, 0;        // m23,m30
        psq_st      c_zero, 52(m), 0, 0;        // m31,m32
        stfs        c_one,  60(m);              // m33
    }
}
#endif  // GEKKO

/*---------------------------------------------------------------------*
Name:           MTX44ScaleApply

Description:    This function performs the operation equivalent to
                MTXScale + MTXConcat

Arguments:      src:       	matrix to be operated.
                dst:       	resultant matrix from concat.
                xS:        	x scale factor.
                yS:        	y scale factor.
                zS:        	z scale factor.

Return:         none
*---------------------------------------------------------------------*/
/*---------------------------------------------------------------------*
    C version
 *---------------------------------------------------------------------*/
void C_MTX44ScaleApply ( const Mtx44 src, Mtx44 dst, f32 xS, f32 yS, f32 zS )
{
    ASSERTMSG( (src != 0), MTX44_SCALEAPPLY_1 );
    ASSERTMSG( (dst != 0), MTX44_SCALEAPPLY_2 );

    dst[0][0] = src[0][0] * xS;     dst[0][1] = src[0][1] * xS;
    dst[0][2] = src[0][2] * xS;     dst[0][3] = src[0][3] * xS;

    dst[1][0] = src[1][0] * yS;     dst[1][1] = src[1][1] * yS;
    dst[1][2] = src[1][2] * yS;     dst[1][3] = src[1][3] * yS;

    dst[2][0] = src[2][0] * zS;     dst[2][1] = src[2][1] * zS;
    dst[2][2] = src[2][2] * zS;     dst[2][3] = src[2][3] * zS;

    dst[3][0] = src[3][0] ; dst[3][1] = src[3][1];
    dst[3][2] = src[3][2] ; dst[3][3] = src[3][3];
}

/*---------------------------------------------------------------------*
    Paired-Single assembler version
 *---------------------------------------------------------------------*
                Note that this performs NO error checking.
 *---------------------------------------------------------------------*/
#ifdef  GEKKO
asm void  PSMTX44ScaleApply (
    const register Mtx44 src,
          register Mtx44 dst,
          register f32 xS,
          register f32 yS,
          register f32 zS
)
{
    nofralloc;
    psq_l       fp4,     0(src), 0, 0;          // fp4 <- src00,src01
    frsp        xS, xS;                         // to make sure xS = single precision
    psq_l       fp5,     8(src), 0, 0;          // fp5 <- src02,src03
    frsp        yS, yS;                         // to make sure yS = single precision
    psq_l       fp6,    16(src), 0, 0;          // fp6 <- src10,src11
    ps_muls0    fp4,    fp4, xS;                // fp4 <- src00*xS,src01*xS
    psq_l       fp7,    24(src), 0, 0;          // fp7 <- src12,src13
    ps_muls0    fp5,    fp5, xS;                // fp5 <- src02*xS,src03*xS
    psq_l       fp8,    32(src), 0, 0;          // fp8 <- src20,src21
    frsp        zS, zS;                         // to make sure zS = single precision
    psq_st      fp4,     0(dst), 0, 0;          // dst00,dst01
    ps_muls0    fp6,    fp6, yS;                // fp6 <- src10*yS,src11*yS
    psq_l       fp9,    40(src), 0, 0;          // fp9 <- src22,src23
    psq_st      fp5,     8(dst), 0, 0;          // dst02,dst03
    ps_muls0    fp7,    fp7, yS;                // fp7 <- src12*yS,src13*yS
    psq_l       fp10,   48(src), 0, 0;          // fp10 <- src30src31
    psq_st      fp6,    16(dst), 0, 0;          // dst10,dst11
    ps_muls0    fp8,    fp8, zS;                // fp8 <- src20*zS,src21*zS
    psq_l       fp11,   56(src), 0, 0;          // fp11 <- src32,src33
    psq_st      fp7,    24(dst), 0, 0;          // dst12,dst13
    ps_muls0    fp9,    fp9, zS;                // fp9 <- src22*zS,src23*zS
    psq_st      fp8,    32(dst), 0, 0;          // dst20,dst21
    psq_st      fp9,    40(dst), 0, 0;          // dst22,dst23
    psq_st      fp10,   48(dst), 0, 0;          // dst30,dst31
    psq_st      fp11,   56(dst), 0, 0;          // dst32,dst33
    blr;
}
#endif  // GEKKO

/*---------------------------------------------------------------------*
Name:           MTX44RotRad

Description:    sets a rotation matrix about one of the X, Y or Z axes

Arguments:      m:       	matrix to be set
                axis:    	major axis about which to rotate.
                        axis is passed in as a character.
                        it must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
                deg:     	rotation angle in radians.
                        note:  counter-clockwise rotation is positive.

Return:         none
 *---------------------------------------------------------------------*/
/*---------------------------------------------------------------------*
    C version
 *---------------------------------------------------------------------*/
void C_MTX44RotRad ( Mtx44 m, char axis, f32 rad )
{

    f32 sinA, cosA;

    ASSERTMSG( (m != 0), MTX44_ROTRAD_1 );

    // verification of "axis" will occur in MTXRotTrig

    sinA = sinf(rad);
    cosA = cosf(rad);

    C_MTX44RotTrig( m, axis, sinA, cosA );
}

/*---------------------------------------------------------------------*
    Paired-Single assembler version
 *---------------------------------------------------------------------*
                Note that this performs NO error checking.
 *---------------------------------------------------------------------*/
#ifdef GEKKO
void PSMTX44RotRad ( Mtx44 m, char axis, f32 rad )
{
    f32 sinA, cosA;

    sinA = sinf(rad);
    cosA = cosf(rad);

    PSMTX44RotTrig( m, axis, sinA, cosA );
}
#endif // GEKKO

/*---------------------------------------------------------------------*
Name:           MTX44RotTrig

Arguments:      m:       	matrix to be set
                axis:    	major axis about which to rotate.
                        axis is passed in as a character.
                        It must be one of 'X', 'x', 'Y', 'y', 'Z', 'z'
                sinA:    	sine of rotation angle.
                cosA:    	cosine of rotation angle.
                        note:  counter-clockwise rotation is positive.

Return:         none
 *---------------------------------------------------------------------*/
/*---------------------------------------------------------------------*
    C version
 *---------------------------------------------------------------------*/
void C_MTX44RotTrig ( Mtx44 m, char axis, f32 sinA, f32 cosA )
{
    ASSERTMSG( (m != 0), MTX44_ROTTRIG_1 );

    axis |= 0x20;
    switch(axis)
    {

    case 'x':
        m[0][0] =  1.0f;  m[0][1] =  0.0f;    m[0][2] =  0.0f;  m[0][3] = 0.0f;
        m[1][0] =  0.0f;  m[1][1] =  cosA;    m[1][2] = -sinA;  m[1][3] = 0.0f;
        m[2][0] =  0.0f;  m[2][1] =  sinA;    m[2][2] =  cosA;  m[2][3] = 0.0f;
        m[3][0] =  0.0f;  m[3][1] =  0.0f;    m[3][2] =  0.0f;  m[3][3] = 1.0f;
        break;

    case 'y':
        m[0][0] =  cosA;  m[0][1] =  0.0f;    m[0][2] =  sinA;  m[0][3] = 0.0f;
        m[1][0] =  0.0f;  m[1][1] =  1.0f;    m[1][2] =  0.0f;  m[1][3] = 0.0f;
        m[2][0] = -sinA;  m[2][1] =  0.0f;    m[2][2] =  cosA;  m[2][3] = 0.0f;
        m[3][0] =  0.0f;  m[3][1] =  0.0f;    m[3][2] =  0.0f;  m[3][3] = 1.0f;
        break;

    case 'z':
        m[0][0] =  cosA;  m[0][1] = -sinA;    m[0][2] =  0.0f;  m[0][3] = 0.0f;
        m[1][0] =  sinA;  m[1][1] =  cosA;    m[1][2] =  0.0f;  m[1][3] = 0.0f;
        m[2][0] =  0.0f;  m[2][1] =  0.0f;    m[2][2] =  1.0f;  m[2][3] = 0.0f;
        m[3][0] =  0.0f;  m[3][1] =  0.0f;    m[3][2] =  0.0f;  m[3][3] = 1.0f;
        break;

    default:
        ASSERTMSG( 0, MTX44_ROTTRIG_2 );
        break;
    }
}

/*---------------------------------------------------------------------*
    Paired-Single assembler version
 *---------------------------------------------------------------------*
                Note that this performs NO error checking.
 *---------------------------------------------------------------------*/
#ifdef  GEKKO
void PSMTX44RotTrig(
    register Mtx44  m,
    register char   axis,
    register f32    sinA,
    register f32    cosA
)
{
    register f32 ftmp0, ftmp1, ftmp2, ftmp3, ftmp4;
    register f32 c_zero, c_one;

    c_zero = 0.0F;
    c_one  = 1.0F;

    asm
    {
        frsp        sinA, sinA      // to make sure sinA = single precision

        // always lower case
        ori         axis, axis, 0x20

        frsp        cosA, cosA      // to make sure cosA = single precision

        // branches
        cmplwi      axis, 'x';                  // if 'x'
        beq         _case_x;
        cmplwi      axis, 'y';                  // if 'y'
        beq         _case_y;
        cmplwi      axis, 'z';                  // if 'z'
        beq         _case_z;
        b           _end;

    _case_x:
        psq_st      c_one,   0(m), 1, 0;        // m00 <= 1.0
        psq_st      c_zero,  4(m), 0, 0;        // m01,m02 <= 0.0,0.0
        ps_neg      ftmp0, sinA;                // ftmp0 <= -sinA
        psq_st      c_zero, 12(m), 0, 0;        // m03,m10 <= 0.0,0.0
        ps_merge00  ftmp1, sinA, cosA;          // ftmp1 <= sinA,cosA
        psq_st      c_zero, 28(m), 0, 0;        // m13,m20 <= 0.0,0.0
        ps_merge00  ftmp0, cosA, ftmp0;         // ftmp0 <= cosA,-sinA
        psq_st      c_zero, 44(m), 0, 0;        // m23,m30 <= 0.0,0.0
        psq_st      c_zero, 52(m), 0, 0;        // m23,m30 <= 0.0,0.0
        psq_st      ftmp1,  36(m), 0, 0;        // m21,m22 <= sinA,cosA
        psq_st      ftmp0,  20(m), 0, 0;        // m11,m12 <= cosA,-sinA
        psq_st      c_one,  60(m), 1, 0;        // m33 <= 0.0
        b           _end;

    _case_y:
        ps_merge00  ftmp1, cosA, c_zero;        // ftmp1 <= cosA,0.0
        psq_st      c_zero, 48(m), 0, 0;        // m30,m31 <= 0.0,0.0
        ps_neg      ftmp0, sinA;                // ftmp0 <= -sinA
        psq_st      c_zero, 24(m), 0, 0;        // m12,m13 <= 0.0,0.0
        ps_merge00  ftmp3, c_zero, c_one;       // ftmp3 <= 0.0,1.0
        psq_st      ftmp1,   0(m), 0, 0;        // m00,m01 <= cosA,0.0
        ps_merge00  ftmp4, ftmp0, c_zero;       // ftmp4 <= -sinA,0.0
        ps_merge00  ftmp2, sinA,  c_zero;       // ftmp2 <= sinA,0.0
        psq_st      ftmp3,  16(m), 0, 0;        // m10,m11 <= 0.0,1.0
        psq_st      ftmp2,   8(m), 0, 0;        // m02,m03 <= sinA,0.0
        psq_st      ftmp4,  32(m), 0, 0;        // m20,m21 <= -sinA,0.0
        psq_st      ftmp1,  40(m), 0, 0;        // m22,m23 <= cosA,0.0
        psq_st      ftmp3,  56(m), 0, 0;        // m32,m33 <= 0.0,1.0
        b           _end;

    _case_z:
        psq_st      c_zero,  8(m), 0, 0;        // m02,m03 <= 0.0,0.0
        ps_neg      ftmp0, sinA;                // ftmp0 <= -sinA
        psq_st      c_zero, 24(m), 0, 0;        // m12,m13 <= 0.0,0.0
        ps_merge00  ftmp1, sinA, cosA;          // ftmp1 <= sinA,cosA
        psq_st      c_zero, 32(m), 0, 0;        // m20,m21 <= 0.0,0.0
        ps_merge00  ftmp2, c_one, c_zero;       // ftmp2 <= 1.0,0.0
        psq_st      c_zero, 48(m), 0, 0;        // m30,m31 <= 0.0,0.0
        ps_merge00  ftmp3, c_zero, c_one;       // ftmp2 <= 0.0,1.0
        psq_st      ftmp1,  16(m), 0, 0;        // m10,m11 <= sinA,cosA
        ps_merge00  ftmp4, cosA, ftmp0;         // ftmp4 <= cosA, -sinA
        psq_st      ftmp2,  40(m), 0, 0;        // m22,m23 <= 1.0,0.0
        psq_st      ftmp3,  56(m), 0, 0;        // m32,m33 <= 0.0,1.0
        psq_st      ftmp4,   0(m), 0, 0;        // m00,m00 <= cosA,-sinA

    _end:

    }
}
#endif  // GEKKO

/*---------------------------------------------------------------------*
Name:           C_MTX44RotAxisRad
 *---------------------------------------------------------------------*/
/*---------------------------------------------------------------------*
    C version
 *---------------------------------------------------------------------*/
void C_MTX44RotAxisRad( Mtx44 m, const Vec *axis, f32 rad )
{
    Vec vN;
    f32 s, c;             // sinTheta, cosTheta
    f32 t;                // ( 1 - cosTheta )
    f32 x, y, z;          // x, y, z components of normalized axis
    f32 xSq, ySq, zSq;    // x, y, z squared


    ASSERTMSG( (m    != 0), MTX44_ROTAXIS_1  );
    ASSERTMSG( (axis != 0), MTX44_ROTAXIS_2  );

    s = sinf(rad);
    c = cosf(rad);
    t = 1.0f - c;

    C_VECNormalize( axis, &vN );

    x = vN.x;
    y = vN.y;
    z = vN.z;

    xSq = x * x;
    ySq = y * y;
    zSq = z * z;

    m[0][0] = ( t * xSq )   + ( c );
    m[0][1] = ( t * x * y ) - ( s * z );
    m[0][2] = ( t * x * z ) + ( s * y );
    m[0][3] =    0.0f;

    m[1][0] = ( t * x * y ) + ( s * z );
    m[1][1] = ( t * ySq )   + ( c );
    m[1][2] = ( t * y * z ) - ( s * x );
    m[1][3] =    0.0f;

    m[2][0] = ( t * x * z ) - ( s * y );
    m[2][1] = ( t * y * z ) + ( s * x );
    m[2][2] = ( t * zSq )   + ( c );
    m[2][3] =    0.0f;

    m[3][0] = 0.0f;
    m[3][1] = 0.0f;
    m[3][2] = 0.0f;
    m[3][3] = 1.0f;

}

/*---------------------------------------------------------------------*
    Paired-Single assembler version
 *---------------------------------------------------------------------*
                Note that this performs NO error checking.
 *---------------------------------------------------------------------*/
#ifdef GEKKO

static void __PSMTX44RotAxisRadInternal(
          register Mtx44  m,
    const register Vec   *axis,
          register f32    sT,
          register f32    cT )
{
    register f32    tT, fc0;
    register f32    tmp0, tmp1, tmp2, tmp3, tmp4;
    register f32    tmp5, tmp6, tmp7, tmp8, tmp9;

    tmp9 = 0.5F;
    tmp8 = 3.0F;

    asm
    {
        // to make sure cT = (single precision float value)
        frsp        cT, cT
        // tmp0 = [x][y] : LOAD
        psq_l       tmp0, 0(axis), 0, 0
        // to make sure sT = (single precision float value)
        frsp        sT, sT
        // tmp1 = [z][z] : LOAD
        lfs         tmp1, 8(axis)

        // tmp2 = [x*x][y*y]
        ps_mul      tmp2, tmp0, tmp0
        // tmp7 = [1.0F]
        fadds       tmp7, tmp9, tmp9
        // tmp3 = [x*x+z*z][y*y+z*z]
        ps_madd     tmp3, tmp1, tmp1, tmp2
        // fc0 = [0.0F]
        fsubs       fc0, tmp9, tmp9
        // tmp4 = [S = x*x+y*y+z*z][z]
        ps_sum0     tmp4, tmp3, tmp1, tmp2

        // tT = 1.0F - cT
        fsubs       tT, tmp7, cT

        // tmp5 = [1.0/sqrt(S)] :estimation[E]
        frsqrte     tmp5, tmp4
        // tmp7 = [0][1]
        ps_merge00  tmp7, fc0, tmp7
        // Newton-Rapson refinement step
        // E' = E/2(3.0 - E*E*S)
        fmuls       tmp2, tmp5, tmp5            // E*E
        fmuls       tmp3, tmp5, tmp9            // E/2
            // fc0 [m30=0][m31=0] : STORE
            psq_st      fc0, 48(m), 0, 0
        fnmsubs     tmp2, tmp2, tmp4, tmp8      // (3-E*E*S)
        fmuls       tmp5, tmp2, tmp3            // (E/2)(3-E*E*S)
            // tmp7 [m32=0][m33=1] : STORE
            psq_st      tmp7, 56(m), 0, 0
        // cT = [c][c]
        ps_merge00  cT, cT, cT

        // tmp0 = [nx = x/sqrt(S)][ny = y/sqrt(S)]
        ps_muls0    tmp0, tmp0, tmp5
        // tmp1 = [nz = z/sqrt(S)][nz = z/sqrt(S)]
        ps_muls0    tmp1, tmp1, tmp5
        // tmp4 = [t*nx][t*ny]
        ps_muls0    tmp4, tmp0, tT
        // tmp9 = [s*nx][s*ny]
        ps_muls0    tmp9, tmp0, sT
        // tmp5 = [t*nz][t*nz]
        ps_muls0    tmp5, tmp1, tT
        // tmp3 = [t*nx*ny][t*ny*ny]
        ps_muls1    tmp3, tmp4, tmp0
        // tmp2 = [t*nx*nx][t*ny*nx]
        ps_muls0    tmp2, tmp4, tmp0
        // tmp4 = [t*nx*nz][t*ny*nz]
        ps_muls0    tmp4, tmp4, tmp1

        // tmp6 = [t*nx*ny-s*nz][t*nx*ny-s*nz]
        fnmsubs     tmp6, tmp1, sT, tmp3
        // tmp7 = [t*nx*ny+s*nz][t*ny*ny+s*nz]
        fmadds      tmp7, tmp1, sT, tmp3

        // tmp0 = [-s*nx][-s*ny]
        ps_neg      tmp0, tmp9
        // tmp8 = [t*nx*nz+s*ny][0] == [m02][m03]
        ps_sum0     tmp8, tmp4, fc0, tmp9
        // tmp2 = [t*nx*nx+c][t*nx*ny-s*nz] == [m00][m01]
        ps_sum0     tmp2, tmp2, tmp6, cT
        // tmp3 = [t*nx*ny+s*nz][t*ny*ny+c] == [m10][m11]
        ps_sum1     tmp3, cT, tmp7, tmp3
        // tmp6 = [t*ny*nz-s*nx][0] == [m12][m13]
        ps_sum0     tmp6, tmp0, fc0 ,tmp4

            // tmp8 [m02][m03] : STORE
            psq_st      tmp8, 8(m), 0, 0
        // tmp0 = [t*nx*nz-s*ny][t*ny*nz]
        ps_sum0     tmp0, tmp4, tmp4, tmp0
            // tmp2 [m00][m01] : STORE
            psq_st      tmp2, 0(m), 0, 0
        // tmp5 = [t*nz*nz][t*nz*nz]
        ps_muls0    tmp5, tmp5, tmp1
            // tmp3 [m10][m11] : STORE
            psq_st      tmp3, 16(m), 0, 0
        // tmp4 = [t*nx*nz-s*ny][t*ny*nz+s*nx] == [m20][m21]
        ps_sum1     tmp4, tmp9, tmp0, tmp4
            // tmp6 [m12][m13] : STORE
            psq_st      tmp6, 24(m), 0, 0
        // tmp5 = [t*nz*nz+c][0]   == [m22][m23]
        ps_sum0     tmp5, tmp5, fc0, cT
            // tmp4 [m20][m21] : STORE
            psq_st      tmp4, 32(m), 0, 0
            // tmp5 [m22][m23] : STORE
            psq_st      tmp5, 40(m), 0, 0
    }
}

void PSMTX44RotAxisRad(
    Mtx44           m,
    const Vec      *axis,
    f32             rad )
{
    f32     sinT, cosT;

    sinT = sinf(rad);
    cosT = cosf(rad);

    __PSMTX44RotAxisRadInternal(m, axis, sinT, cosT);
}

#endif // GEKKO

#if ( __MWERKS__ == 0x00004100 )
#pragma defer_codegen reset
#endif

/*===========================================================================*/