/*---------------------------------------------------------------------------* Project: matrix vector Library File: mtx44_asm.s Copyright (C) Nintendo. All rights reserved. These coded instructions, statements, and computer programs contain proprietary information of Nintendo of America Inc. and/or Nintendo Company Ltd., and are protected by Federal copyright law. They may not be disclosed to third parties or copied or duplicated in any form, in whole or in part, without the prior written consent of Nintendo. *---------------------------------------------------------------------------*/ .data .align 2 CONST_0_0F: .float 0.0 CONST_0_5F: .float 0.5 CONST_1_0F: .float 1.0 CONST_3_0F: .float 3.0 .text //////////////////////////////////////////////////////////////////////////////// //void ASM_MTX44Identity(Mtx44 m) #define m r3 #define c1 fp1 #define c0 fp2 .global ASM_MTX44Identity ASM_MTX44Identity: .type ASM_MTX44Identity, @function // c1 = 1.0F; lis r4, CONST_1_0F@h ori r4, r4, CONST_1_0F@l lfs c1, 0(r4) // c0 = 0.0F; lis r5, CONST_0_0F@h ori r5, r5, CONST_0_0F@l lfs c0, 0(r5) stfs c1, 0(m) psq_st c0, 4(m), 0, 0 psq_st c0, 12(m), 0, 0 stfs c1, 20(m) psq_st c0, 24(m), 0, 0 psq_st c0, 32(m), 0, 0 stfs c1, 40(m) psq_st c0, 44(m), 0, 0 psq_st c0, 52(m), 0, 0 stfs c1, 60(m) blr .size ASM_MTX44Identity,$-ASM_MTX44Identity #undef m #undef c1 #undef c0 //////////////////////////////////////////////////////////////////////////////// //void ASM_MTX44Copy(const Mtx44 src, Mtx44 dst) #define src r3 #define dst r4 .global ASM_MTX44Copy ASM_MTX44Copy: .type ASM_MTX44Copy, @function psq_l fp1, 0(src), 0, 0 psq_st fp1, 0(dst), 0, 0 psq_l fp1, 8(src), 0, 0 psq_st fp1, 8(dst), 0, 0 psq_l fp1, 16(src), 0, 0 psq_st fp1, 16(dst), 0, 0 psq_l fp1, 24(src), 0, 0 psq_st fp1, 24(dst), 0, 0 psq_l fp1, 32(src), 0, 0 psq_st fp1, 32(dst), 0, 0 psq_l fp1, 40(src), 0, 0 psq_st fp1, 40(dst), 0, 0 psq_l fp1, 48(src), 0, 0 psq_st fp1, 48(dst), 0, 0 psq_l fp1, 56(src), 0, 0 psq_st fp1, 56(dst), 0, 0 blr .size ASM_MTX44Copy,$-ASM_MTX44Copy #undef src #undef dst //////////////////////////////////////////////////////////////////////////////// //void ASM_MTX44Transpose(const Mtx44 src, Mtx44 xPose) #define src r3 #define xPose r4 .global ASM_MTX44Transpose ASM_MTX44Transpose: .type ASM_MTX44Transpose, @function psq_l fp0, 0(src), 0, 0 // fp0 <= s00,s01 psq_l fp1, 16(src), 0, 0 // fp1 <= s10,s11 ps_merge00 fp4, fp0, fp1 // fp4 <= t00,t10 psq_l fp2, 8(src), 0, 0 // fp2 <= s02,s03 psq_st fp4, 0(xPose), 0, 0 ps_merge11 fp5, fp0, fp1 // fp5 <= t01,t11 psq_l fp3, 24(src), 0, 0 // fp3 <= s12,s13 psq_st fp5, 16(xPose), 0, 0; ps_merge00 fp4, fp2, fp3 // fp4 <= t02,t12 psq_l fp0, 32(src), 0, 0 // fp0 <= s20,s21 psq_st fp4, 32(xPose), 0, 0 ps_merge11 fp5, fp2, fp3 // fp5 <= t03,t13 psq_l fp1, 48(src), 0, 0 // fp1 <= s30,s31 psq_st fp5, 48(xPose), 0, 0 ps_merge00 fp4, fp0, fp1 // fp4 <= t20,t30 psq_l fp2, 40(src), 0, 0 // fp2 <= s22,s23 psq_st fp4, 8(xPose), 0, 0 ps_merge11 fp5, fp0, fp1 // fp5 <= t21,t31 psq_l fp3, 56(src), 0, 0 // fp2 <= s32,s33 psq_st fp5, 24(xPose), 0, 0 ps_merge00 fp4, fp2, fp3 // fp4 <= s22,s32 psq_st fp4, 40(xPose), 0, 0 ps_merge11 fp5, fp2, fp3 // fp5 <= s23,s33 psq_st fp5, 56(xPose), 0, 0 blr .size ASM_MTX44Transpose,$-ASM_MTX44Transpose #undef src #undef xPose //////////////////////////////////////////////////////////////////////////////// // void ASM_MTX44Concat(const Mtx44 a, const Mtx44 b, Mtx44 ab) #define a r3 #define b r4 #define ab r5 .global ASM_MTX44Concat ASM_MTX44Concat: .type ASM_MTX44Concat, @function psq_l fp0 , 0(a), 0, 0 // a00,a01 psq_l fp2 , 0(b), 0, 0 // b00,b01 ps_muls0 fp6 , fp2, fp0 // b00a00,b01a00 psq_l fp3 , 16(b), 0, 0 // b10,b11 psq_l fp4 , 32(b), 0, 0 // b20,b21 ps_madds1 fp6 , fp3, fp0, fp6 // b00a00+b10a01,b01a00+b11a01 psq_l fp1 , 8(a), 0, 0 // a02,a03 psq_l fp5 , 48(b), 0, 0 // b30,b31 // b00a00+b10a01+b20a02,b01a00+b11a01+b21a02 ps_madds0 fp6 , fp4, fp1, fp6 psq_l fp0 , 16(a), 0, 0 // a10,a11 // b00a00+b10a01+b20a02+b30a03,b01a00+b11a01+b21a02+b31a03 ps_madds1 fp6 , fp5, fp1, fp6 psq_l fp1 , 24(a), 0, 0 // a12,a13 ps_muls0 fp8 , fp2, fp0 // b00a10,b01a10 ps_madds1 fp8 , fp3, fp0, fp8 // b00a10+b10a11,b01a11+b11a11 psq_l fp0 , 32(a), 0, 0 // a20,a21 // b00a10+b10a11+b20a12,b01a11+b11a11+b21a12 ps_madds0 fp8 , fp4, fp1, fp8 // b00a10+b10a11+b20a12+b30a13,b01a10+b11a11+b21a12+b31a13 ps_madds1 fp8 , fp5, fp1, fp8 psq_l fp1 , 40(a), 0, 0 // a22,a23 ps_muls0 fp10, fp2, fp0 // b00a20,b01a20 ps_madds1 fp10, fp3, fp0, fp10 // b00a20+b10a21,b01a20+b11a21 psq_l fp0 , 48(a), 0, 0 // a30,a31 // b00a20+b10a21+b20a22,b01a20+b11a21+b21a22 ps_madds0 fp10, fp4, fp1, fp10 // b00a20+b10a21+b20a22+b30a23,b01a20+b11a21+b21a22+b31a23 ps_madds1 fp10, fp5, fp1, fp10 psq_l fp1 , 56(a), 0, 0 // a32,a33 ps_muls0 fp12, fp2, fp0 // b00a30,b01a30 psq_l fp2 , 8(b), 0, 0 // b02,b03 ps_madds1 fp12, fp3, fp0, fp12 // b00a30+b10a31,b01a30+b11a31 psq_l fp0 , 0(a), 0, 0 // a00,a01 // b00a30+b10a31+b20a32,b01a30+b11a31+b21a32 ps_madds0 fp12, fp4, fp1, fp12 psq_l fp3 , 24(b), 0, 0 // b12,b13 // b00a30+b10a31+b20a32+b30a33,b01a30+b11a31+b21a32+b31a33 ps_madds1 fp12, fp5, fp1, fp12 psq_l fp1 , 8(a), 0, 0 // a02,a03 ps_muls0 fp7 , fp2, fp0 // b02a00,b03a00 psq_l fp4 , 40(b), 0, 0 // b22,b23 ps_madds1 fp7 , fp3, fp0, fp7 // b02a00+b12a01,b03a00+b13a01 psq_l fp5 , 56(b), 0, 0 // b32,b33 // b02a00+b12a01+b22a02,b03a00+b13a01+b23a02 ps_madds0 fp7 , fp4, fp1, fp7 psq_l fp0 , 16(a), 0, 0 // a10,a11 // b02a00+b12a01+b22a02+b32a03,b03a00+b13a01+b23a02+b33a03 ps_madds1 fp7 , fp5, fp1, fp7 psq_l fp1 , 24(a), 0, 0 // a12,a13 ps_muls0 fp9 , fp2, fp0 // b02a10,b03a10 psq_st fp6 , 0(ab), 0, 0 // ab00,ab01 ps_madds1 fp9 , fp3, fp0, fp9 // b02a10+b12a11,b03a10+b13a11 psq_l fp0 , 32(a), 0, 0 // a20,a21 // b02a10+b12a11+b22a12,b03a10+b13a11+b23a12 ps_madds0 fp9, fp4, fp1, fp9 psq_st fp8 ,16(ab), 0, 0 // ab10,ab11 // b02a10+b12a11+b22a12+b32a13,b03a10+b13a11+b23a12+b33a13 ps_madds1 fp9 , fp5, fp1, fp9 psq_l fp1 , 40(a), 0, 0 // a22,a23 ps_muls0 fp11, fp2, fp0 // b02a20,b03a20 psq_st fp10,32(ab), 0, 0 // ab20,ab21 ps_madds1 fp11, fp3, fp0, fp11 // b02a20+b12a21,b03a20+b13a21 psq_l fp0 , 48(a), 0, 0 // a30,a31 // b02a20+b12a21+b22a22,b03a20+b13a21+b23a22 ps_madds0 fp11, fp4, fp1, fp11 psq_st fp12,48(ab), 0, 0 // ab30,ab31 // b02a20+b12a21+b22a22+b32a23,b03a20+b13a21+b23a22+b33a23 ps_madds1 fp11, fp5, fp1, fp11 psq_l fp1, 56(a), 0, 0 // a32,a33 ps_muls0 fp13, fp2, fp0 // b02a30,b03a30 psq_st fp7 , 8(ab), 0, 0 // ab02,ab03 ps_madds1 fp13, fp3, fp0, fp13 // b02a30+b12a31,b03a30+b13a31 psq_st fp9 ,24(ab), 0, 0 // ab12,ab13 // b02a30+b12a31+b22a32,b03a30+b13a31+b23a32 ps_madds0 fp13, fp4, fp1, fp13 psq_st fp11,40(ab), 0, 0 // ab22,ab23 // b02a30+b12a31+b22a32+b32a33,b03a30+b13a31+b23a32+b33a33 ps_madds1 fp13, fp5, fp1, fp13 psq_st fp13,56(ab), 0, 0 // ab32,ab33 blr .size ASM_MTX44Concat,$-ASM_MTX44Concat #undef a #undef b #undef ab //////////////////////////////////////////////////////////////////////////////// // void _ASM_MTX44RotAxisRadInternal(Mtx44 m, const Vec *axis, f32 sT, f32 cT) .global _ASM_MTX44RotAxisRadInternal #define m r3 #define axis r4 #define sT fp1 #define cT fp2 _ASM_MTX44RotAxisRadInternal: .type _ASM_MTX44RotAxisRadInternal, @function #define tT fp3 #define fc0 fp4 #define tmp0 fp5 #define tmp1 fp6 #define tmp2 fp7 #define tmp3 fp8 #define tmp4 fp9 #define tmp5 fp10 #define tmp6 fp11 #define tmp7 fp12 #define tmp8 fp13 #define tmp9 fp0 // tmp9 = 0.5F; lis r5, CONST_0_5F@h ori r5, r5, CONST_0_5F@l lfs tmp9, 0(r5) // tmp8 = 3.0F; lis r5, CONST_3_0F@h ori r5, r5, CONST_3_0F@l lfs tmp8, 0(r5) // to make sure cT = (single precision float value) frsp cT, cT // tmp0 = [x][y] : LOAD psq_l tmp0, 0(axis), 0, 0 // to make sure sT = (single precision float value) frsp sT, sT // tmp1 = [z][z] : LOAD lfs tmp1, 8(axis) // tmp2 = [x*x][y*y] ps_mul tmp2, tmp0, tmp0 // tmp7 = [1.0F] fadds tmp7, tmp9, tmp9 // tmp3 = [x*x+z*z][y*y+z*z] ps_madd tmp3, tmp1, tmp1, tmp2 // fc0 = [0.0F] fsubs fc0, tmp9, tmp9 // tmp4 = [S = x*x+y*y+z*z][z] ps_sum0 tmp4, tmp3, tmp1, tmp2 // tT = 1.0F - cT fsubs tT, tmp7, cT // tmp5 = [1.0/sqrt(S)] :estimation[E] frsqrte tmp5, tmp4 // tmp7 = [0][1] ps_merge00 tmp7, fc0, tmp7 // Newton-Rapson refinement step // E' = E/2(3.0 - E*E*S) fmuls tmp2, tmp5, tmp5 // E*E fmuls tmp3, tmp5, tmp9 // E/2 // fc0 [m30=0][m31=0] : STORE psq_st fc0, 48(m), 0, 0 fnmsubs tmp2, tmp2, tmp4, tmp8 // (3-E*E*S) fmuls tmp5, tmp2, tmp3 // (E/2)(3-E*E*S) // tmp7 [m32=0][m33=1] : STORE psq_st tmp7, 56(m), 0, 0 // cT = [c][c] ps_merge00 cT, cT, cT // tmp0 = [nx = x/sqrt(S)][ny = y/sqrt(S)] ps_muls0 tmp0, tmp0, tmp5 // tmp1 = [nz = z/sqrt(S)][nz = z/sqrt(S)] ps_muls0 tmp1, tmp1, tmp5 // tmp4 = [t*nx][t*ny] ps_muls0 tmp4, tmp0, tT // tmp9 = [s*nx][s*ny] ps_muls0 tmp9, tmp0, sT // tmp5 = [t*nz][t*nz] ps_muls0 tmp5, tmp1, tT // tmp3 = [t*nx*ny][t*ny*ny] ps_muls1 tmp3, tmp4, tmp0 // tmp2 = [t*nx*nx][t*ny*nx] ps_muls0 tmp2, tmp4, tmp0 // tmp4 = [t*nx*nz][t*ny*nz] ps_muls0 tmp4, tmp4, tmp1 // tmp6 = [t*nx*ny-s*nz][t*nx*ny-s*nz] fnmsubs tmp6, tmp1, sT, tmp3 // tmp7 = [t*nx*ny+s*nz][t*ny*ny+s*nz] fmadds tmp7, tmp1, sT, tmp3 // tmp0 = [-s*nx][-s*ny] ps_neg tmp0, tmp9 // tmp8 = [t*nx*nz+s*ny][0] == [m02][m03] ps_sum0 tmp8, tmp4, fc0, tmp9 // tmp2 = [t*nx*nx+c][t*nx*ny-s*nz] == [m00][m01] ps_sum0 tmp2, tmp2, tmp6, cT // tmp3 = [t*nx*ny+s*nz][t*ny*ny+c] == [m10][m11] ps_sum1 tmp3, cT, tmp7, tmp3 // tmp6 = [t*ny*nz-s*nx][0] == [m12][m13] ps_sum0 tmp6, tmp0, fc0 ,tmp4 // tmp8 [m02][m03] : STORE psq_st tmp8, 8(m), 0, 0 // tmp0 = [t*nx*nz-s*ny][t*ny*nz] ps_sum0 tmp0, tmp4, tmp4, tmp0 // tmp2 [m00][m01] : STORE psq_st tmp2, 0(m), 0, 0 // tmp5 = [t*nz*nz][t*nz*nz] ps_muls0 tmp5, tmp5, tmp1 // tmp3 [m10][m11] : STORE psq_st tmp3, 16(m), 0, 0 // tmp4 = [t*nx*nz-s*ny][t*ny*nz+s*nx] == [m20][m21] ps_sum1 tmp4, tmp9, tmp0, tmp4 // tmp6 [m12][m13] : STORE psq_st tmp6, 24(m), 0, 0 // tmp5 = [t*nz*nz+c][0] == [m22][m23] ps_sum0 tmp5, tmp5, fc0, cT // tmp4 [m20][m21] : STORE psq_st tmp4, 32(m), 0, 0 // tmp5 [m22][m23] : STORE psq_st tmp5, 40(m), 0, 0 blr .size _ASM_MTX44RotAxisRadInternal,$-_ASM_MTX44RotAxisRadInternal #undef m #undef axis #undef sT #undef cT #undef tT #undef fc0 #undef tmp0 #undef tmp1 #undef tmp2 #undef tmp3 #undef tmp4 #undef tmp5 #undef tmp6 #undef tmp7 #undef tmp8 #undef tmp9 //////////////////////////////////////////////////////////////////////////////// // void ASM_MTX44ScaleApply(const Mtx44 src, Mtx44 dst, f32 xS, f32 yS, f32 zS) .global ASM_MTX44ScaleApply #define src r3 #define dst r4 #define xS fp1 #define yS fp2 #define zS fp3 ASM_MTX44ScaleApply: .type ASM_MTX44ScaleApply, @function psq_l fp4, 0(src), 0, 0 // fp4 <- src00,src01 frsp xS, xS // to make sure xS = single precision psq_l fp5, 8(src), 0, 0 // fp5 <- src02,src03 frsp yS, yS // to make sure yS = single precision psq_l fp6, 16(src), 0, 0 // fp6 <- src10,src11 ps_muls0 fp4, fp4, xS // fp4 <- src00*xS,src01*xS psq_l fp7, 24(src), 0, 0 // fp7 <- src12,src13 ps_muls0 fp5, fp5, xS // fp5 <- src02*xS,src03*xS psq_l fp8, 32(src), 0, 0 // fp8 <- src20,src21 frsp zS, zS // to make sure zS = single precision psq_st fp4, 0(dst), 0, 0 // dst00,dst01 ps_muls0 fp6, fp6, yS // fp6 <- src10*yS,src11*yS psq_l fp9, 40(src), 0, 0 // fp9 <- src22,src23 psq_st fp5, 8(dst), 0, 0 // dst02,dst03 ps_muls0 fp7, fp7, yS // fp7 <- src12*yS,src13*yS psq_l fp10, 48(src), 0, 0 // fp10 <- src30src31 psq_st fp6, 16(dst), 0, 0 // dst10,dst11 ps_muls0 fp8, fp8, zS // fp8 <- src20*zS,src21*zS psq_l fp11, 56(src), 0, 0 // fp11 <- src32,src33 psq_st fp7, 24(dst), 0, 0 // dst12,dst13 ps_muls0 fp9, fp9, zS // fp9 <- src22*zS,src23*zS psq_st fp8, 32(dst), 0, 0 // dst20,dst21 psq_st fp9, 40(dst), 0, 0 // dst22,dst23 psq_st fp10, 48(dst), 0, 0 // dst30,dst31 psq_st fp11, 56(dst), 0, 0 // dst32,dst33 blr .size ASM_MTX44ScaleApply,$-ASM_MTX44ScaleApply #undef src #undef dst #undef xS #undef yS #undef zS //////////////////////////////////////////////////////////////////////////////// //void ASM_MTX44Trans(Mtx44 m, f32 xT, f32 yT, f32 zT) .global ASM_MTX44Trans #define m r3 #define xT fp1 #define yT fp2 #define zT fp3 #define c_one fp4 #define c_zero fp5 #define c_01 fp6 ASM_MTX44Trans: .type ASM_MTX44Trans, @function // c_one = 1.0F; lis r4, CONST_1_0F@h ori r4, r4, CONST_1_0F@l lfs c_one, 0(r4) // c_zero = 0.0F; lis r5, CONST_0_0F@h ori r5, r5, CONST_0_0F@l lfs c_zero, 0(r5) stfs xT, 12(m) // m03 stfs yT, 28(m) // m13 ps_merge00 c_01, c_zero, c_one // c_01 <- 0.0, 1.0 stfs zT, 44(m) // m23 psq_st c_one, 0(m), 1, 0 // m00 psq_st c_zero, 4(m), 0, 0 // m01,m02 psq_st c_01, 16(m), 0, 0 // m10,m11 psq_st c_zero, 24(m), 1, 0 // m12 psq_st c_zero, 32(m), 0, 0 // m20,m21 psq_st c_one, 40(m), 1, 0 // m22 psq_st c_zero, 48(m), 0, 0 // m30,m31 psq_st c_01, 56(m), 0, 0 // m32,m33 blr .size ASM_MTX44Trans,$-ASM_MTX44Trans #undef m #undef xT #undef yT #undef zT #undef c_zero #undef c_one #undef c_01 //////////////////////////////////////////////////////////////////////////////// //void ASM_MTX44TransApply(const Mtx44 src, Mtx44 dst, f32 xT, f32 yT, f32 zT) .global ASM_MTX44TransApply #define src r3 #define dst r4 #define xT fp1 #define yT fp2 #define zT fp3 ASM_MTX44TransApply: .type ASM_MTX44TransApply, @function psq_l fp4, 0(src), 0, 0 frsp xT, xT // to make sure xS = single precision psq_l fp5, 8(src), 0, 0 frsp yT, yT // to make sure yS = single precision psq_l fp6, 16(src), 0, 0 frsp zT, zT // to make sure zS = single precision psq_l fp7, 24(src), 0, 0 psq_st fp4, 0(dst), 0, 0 ps_sum1 fp5, xT, fp5, fp5 psq_l fp4, 40(src), 0, 0 psq_st fp6, 16(dst), 0, 0 ps_sum1 fp7, yT, fp7, fp7 psq_l fp8, 32(src), 0, 0 psq_st fp5, 8(dst), 0, 0 ps_sum1 fp4, zT, fp4, fp4 psq_st fp7, 24(dst), 0, 0 psq_st fp8, 32(dst), 0, 0 psq_l fp5, 48(src), 0, 0 psq_l fp6, 56(src), 0, 0 psq_st fp4, 40(dst), 0, 0 psq_st fp5, 48(dst), 0, 0 psq_st fp6, 56(dst), 0, 0 blr .size ASM_MTX44TransApply,$-ASM_MTX44TransApply #undef m #undef xT #undef yT #undef zT #undef c_zero #undef c_one #undef c_01 //////////////////////////////////////////////////////////////////////////////// //void ASM_MTX44Scale(Mtx44 m, f32 xS, f32 yS, f32 zS) .global ASM_MTX44Scale #define m r3 #define xS fp1 #define yS fp2 #define zS fp3 #define c_one fp4 #define c_zero fp5 ASM_MTX44Scale: .type ASM_MTX44Scale, @function // c_one = 1.0F; lis r4, CONST_1_0F@h ori r4, r4, CONST_1_0F@l lfs c_one, 0(r4) // c_zero = 0.0F; lis r5, CONST_0_0F@h ori r5, r5, CONST_0_0F@l lfs c_zero, 0(r5) stfs xS, 0(m) psq_st c_zero, 4(m), 0, 0 // m01,m02 psq_st c_zero, 12(m), 0, 0 // m03,m10 stfs yS, 20(m) // m11 psq_st c_zero, 24(m), 0, 0 // m12,m13 psq_st c_zero, 32(m), 0, 0 // m20,m21 stfs zS, 40(m) // m22 psq_st c_zero, 44(m), 0, 0 // m23,m30 psq_st c_zero, 52(m), 0, 0 // m31,m32 stfs c_one, 60(m) // m33 blr .size ASM_MTX44Scale,$-ASM_MTX44Scale #undef m #undef xS #undef yS #undef zS #undef c_zero #undef c_one //////////////////////////////////////////////////////////////////////////////// //void ASM_MTX44RotTrig(Mtx44 m, char axis, f32 sinA, f32 cosA) .global ASM_MTX44RotTrig #define m r3 #define axis r4 #define sinA fp1 #define cosA fp2 #define ftmp0 fp3 #define ftmp1 fp4 #define ftmp2 fp5 #define ftmp3 fp6 #define ftmp4 fp7 #define c_one fp8 #define c_zero fp9 ASM_MTX44RotTrig: .type ASM_MTX44RotTrig, @function // c_one = 1.0F; lis r5, CONST_1_0F@h ori r5, r5, CONST_1_0F@l lfs c_one, 0(r5) // c_zero = 0.0F; lis r6, CONST_0_0F@h ori r6, r6, CONST_0_0F@l lfs c_zero, 0(r6) frsp sinA, sinA // to make sure sinA = single precision // always lower case ori axis, axis, 0x20 frsp cosA, cosA // to make sure cosA = single precision // branches cmplwi axis, 'x'; // if 'x' beq _case_x; cmplwi axis, 'y'; // if 'y' beq _case_y; cmplwi axis, 'z'; // if 'z' beq _case_z; b _end; _case_x: psq_st c_one, 0(m), 1, 0; // m00 <= 1.0 psq_st c_zero, 4(m), 0, 0; // m01,m02 <= 0.0,0.0 ps_neg ftmp0, sinA; // ftmp0 <= -sinA psq_st c_zero, 12(m), 0, 0; // m03,m10 <= 0.0,0.0 ps_merge00 ftmp1, sinA, cosA; // ftmp1 <= sinA,cosA psq_st c_zero, 28(m), 0, 0; // m13,m20 <= 0.0,0.0 ps_merge00 ftmp0, cosA, ftmp0; // ftmp0 <= cosA,-sinA psq_st c_zero, 44(m), 0, 0; // m23,m30 <= 0.0,0.0 psq_st c_zero, 52(m), 0, 0; // m23,m30 <= 0.0,0.0 psq_st ftmp1, 36(m), 0, 0; // m21,m22 <= sinA,cosA psq_st ftmp0, 20(m), 0, 0; // m11,m12 <= cosA,-sinA psq_st c_one, 60(m), 1, 0; // m33 <= 0.0 b _end; _case_y: ps_merge00 ftmp1, cosA, c_zero; // ftmp1 <= cosA,0.0 psq_st c_zero, 48(m), 0, 0; // m30,m31 <= 0.0,0.0 ps_neg ftmp0, sinA; // ftmp0 <= -sinA psq_st c_zero, 24(m), 0, 0; // m12,m13 <= 0.0,0.0 ps_merge00 ftmp3, c_zero, c_one; // ftmp3 <= 0.0,1.0 psq_st ftmp1, 0(m), 0, 0; // m00,m01 <= cosA,0.0 ps_merge00 ftmp4, ftmp0, c_zero; // ftmp4 <= -sinA,0.0 ps_merge00 ftmp2, sinA, c_zero; // ftmp2 <= sinA,0.0 psq_st ftmp3, 16(m), 0, 0; // m10,m11 <= 0.0,1.0 psq_st ftmp2, 8(m), 0, 0; // m02,m03 <= sinA,0.0 psq_st ftmp4, 32(m), 0, 0; // m20,m21 <= -sinA,0.0 psq_st ftmp1, 40(m), 0, 0; // m22,m23 <= cosA,0.0 psq_st ftmp3, 56(m), 0, 0; // m32,m33 <= 0.0,1.0 b _end; _case_z: psq_st c_zero, 8(m), 0, 0; // m02,m03 <= 0.0,0.0 ps_neg ftmp0, sinA; // ftmp0 <= -sinA psq_st c_zero, 24(m), 0, 0; // m12,m13 <= 0.0,0.0 ps_merge00 ftmp1, sinA, cosA; // ftmp1 <= sinA,cosA psq_st c_zero, 32(m), 0, 0; // m20,m21 <= 0.0,0.0 ps_merge00 ftmp2, c_one, c_zero; // ftmp2 <= 1.0,0.0 psq_st c_zero, 48(m), 0, 0; // m30,m31 <= 0.0,0.0 ps_merge00 ftmp3, c_zero, c_one; // ftmp2 <= 0.0,1.0 psq_st ftmp1, 16(m), 0, 0; // m10,m11 <= sinA,cosA ps_merge00 ftmp4, cosA, ftmp0; // ftmp4 <= cosA, -sinA psq_st ftmp2, 40(m), 0, 0; // m22,m23 <= 1.0,0.0 psq_st ftmp3, 56(m), 0, 0; // m32,m33 <= 0.0,1.0 psq_st ftmp4, 0(m), 0, 0; // m00,m00 <= cosA,-sinA _end: blr .size ASM_MTX44RotTrig,$-ASM_MTX44RotTrig #undef m #undef axis #undef sinA #undef cosA #undef ftmp0 #undef ftmp1 #undef ftmp2 #undef ftmp3 #undef ftmp4 #undef c_one #undef c_zero //////////////////////////////////////////////////////////////////////////////// //void ASM_MTX34To44( MTX_CONST Mtx src, Mtx44 dst ) .global ASM_MTX34To44 #define src r3 #define dst r4 #define c_00 fp1 #define c_11 fp2 #define c_01 fp3 #define tmp fp4 ASM_MTX34To44: .type ASM_MTX34To44, @function // c_11 = 1.0F; lis r5, CONST_1_0F@h ori r5, r5, CONST_1_0F@l lfs c_11, 0(r5) // c_00 = 0.0F; lis r6, CONST_0_0F@h ori r6, r6, CONST_0_0F@l lfs c_00, 0(r6) psq_l tmp, 0(src), 0, 0; psq_st tmp, 0(dst), 0, 0; psq_l tmp, 8(src), 0, 0; psq_st tmp, 8(dst), 0, 0; psq_l tmp, 16(src), 0, 0; psq_st tmp, 16(dst), 0, 0; ps_merge00 c_01, c_00, c_11; psq_l tmp, 24(src), 0, 0; psq_st tmp, 24(dst), 0, 0; psq_l tmp, 32(src), 0, 0; psq_st tmp, 32(dst), 0, 0; psq_l tmp, 40(src), 0, 0; psq_st tmp, 40(dst), 0, 0; psq_st c_00, 48(dst), 0, 0; psq_st c_01, 56(dst), 0, 0; blr .size ASM_MTX34To44,$-ASM_MTX34To44 #undef src #undef dst #undef c_00 #undef c_11 #undef c_01