/*---------------------------------------------------------------------------* Project: matrix vector Library File: mtx44Vec_asm.s Copyright (C) Nintendo. All rights reserved. These coded instructions, statements, and computer programs contain proprietary information of Nintendo of America Inc. and/or Nintendo Company Ltd., and are protected by Federal copyright law. They may not be disclosed to third parties or copied or duplicated in any form, in whole or in part, without the prior written consent of Nintendo. *---------------------------------------------------------------------------*/ .data /*** Unit01: .float 0.0 .float 1.0 ***/ .text //////////////////////////////////////////////////////////////////////////////// // void ASM_MTX44MultVec(const Mtx44 m, const Vec *src, Vec *dst) #define m r3 #define src r4 #define dst r5 .global ASM_MTX44MultVec ASM_MTX44MultVec: .type ASM_MTX44MultVec, @function psq_l fp0, 0(src), 0, 0 // fp0 <-src.x, src.y psq_l fp2, 48(m), 0, 0 psq_l fp1, 8(src), 1, 0 // fp1 <-src.z, 1.0 ps_mul fp4, fp0, fp2 psq_l fp3, 56(m), 0, 0 ps_madd fp5, fp1, fp3, fp4 ps_merge11 fp12, fp1, fp1 // fp12 = 1.0, 1.0 ps_sum0 fp13, fp5, fp5, fp5 // fp3 <- w psq_l fp4, 0(m), 0, 0 ps_merge00 fp13, fp13, fp13 psq_l fp5, 8(m), 0, 0 ps_div fp13, fp12, fp13 // fp13 <- 1/w psq_l fp6, 16(m), 0, 0 psq_l fp7, 24(m), 0, 0 psq_l fp8, 32(m), 0, 0 psq_l fp9, 40(m), 0, 0 ps_mul fp4, fp0, fp4 ps_madd fp2, fp1, fp5, fp4 ps_mul fp6, fp0, fp6 ps_madd fp3, fp1, fp7, fp6 ps_mul fp8, fp0, fp8 ps_sum0 fp2, fp2, fp2, fp2 // fp2 <- dst.x, -- ps_madd fp9, fp1, fp9, fp8 ps_sum1 fp2, fp3, fp2, fp3 // fp2 <- dst.x, dst.y ps_sum0 fp3, fp9, fp9, fp9 ps_mul fp2, fp2, fp13 psq_st fp2, 0(dst), 0, 0 ps_mul fp3, fp3, fp13 psq_st fp3, 8(dst), 1, 0 blr .size ASM_MTX44MultVec,$-ASM_MTX44MultVec #undef m #undef src #undef dst //////////////////////////////////////////////////////////////////////////////// // void ASM_MTX44MultVecArray (const Mtx44 m, const Vec *srcBase, Vec *dstBase, u32 count) #define m r3 #define srcBase r4 #define dstBase r5 #define count r6 .global ASM_MTX44MultVecArray #define rsp sp ASM_MTX44MultVecArray: .type ASM_MTX44MultVecArray, @function stwu rsp, -24(rsp) addi count, count, -1 psq_l fp6, 48(m), 0, 0 // fp6 <- m30, m31 mtctr count psq_l fp8, 0(srcBase), 0, 0 // fp8 <- src.x, src.y addi dstBase, dstBase, -4 stfd fp14, 8(rsp) psq_l fp7, 56(m), 0, 0 // fp7 <- m32, m33 psq_lu fp9, 8(srcBase), 1, 0 // fp9 <- src.z, 1.0 ps_mul fp13, fp6, fp8 psq_l fp0, 0(m), 0, 0 // fp0 <- m00, m01 psq_st fp14, 16(rsp), 0, 0 ps_madd fp13, fp7, fp9, fp13 psq_l fp2, 16(m), 0, 0 // fp2 <- m10, m11 ps_merge11 fp14, fp9, fp9 // fp9 = 1.0F, 1.0F ps_mul fp10, fp0, fp8 psq_l fp4, 32(m), 0, 0 // fp4 <- m20, m21 ps_mul fp11, fp2, fp8 psq_l fp1, 8(m), 0, 0 // fp1 <- m02, m03 ps_mul fp12, fp4, fp8 psq_l fp3, 24(m), 0, 0 // fp3 <- m12, m13 ps_sum0 fp13, fp13, fp13, fp13 // fp13 <- w psq_l fp5, 40(m), 0, 0 // fp5 <- m22, m23 _ASM_MTX44MultVecArray_loop: ps_madd fp10, fp1, fp9, fp10 ps_madd fp11, fp3, fp9, fp11 ps_madd fp12, fp5, fp9, fp12 ps_sum0 fp10, fp10, fp10, fp10 // fp10 <- x ps_sum0 fp11, fp11, fp11, fp11 // fp11 <- y ps_sum0 fp12, fp12, fp12, fp12 // fp12 <- z ps_div fp13, fp14, fp13 psq_lu fp8, 4(srcBase), 0, 0 psq_lu fp9, 8(srcBase), 1, 0 ps_mul fp10, fp10, fp13 psq_stu fp10, 4(dstBase), 1, 0 ps_mul fp11, fp11, fp13 psq_stu fp11, 4(dstBase), 1, 0 ps_mul fp12, fp12, fp13 psq_stu fp12, 4(dstBase), 1, 0 ps_mul fp13, fp6, fp8 ps_mul fp10, fp0, fp8 ps_mul fp11, fp2, fp8 ps_madd fp13, fp7, fp9, fp13 ps_mul fp12, fp4, fp8 ps_sum0 fp13, fp13, fp13, fp13 bdnz+ _ASM_MTX44MultVecArray_loop ps_madd fp10, fp1, fp9, fp10 ps_madd fp11, fp3, fp9, fp11 ps_madd fp12, fp5, fp9, fp12 ps_sum0 fp10, fp10, fp10, fp10 // fp10 <- x ps_sum0 fp11, fp11, fp11, fp11 // fp11 <- y ps_sum0 fp12, fp12, fp12, fp12 // fp12 <- z ps_div fp13, fp14, fp13 ps_mul fp10, fp10, fp13 psq_st fp10, 4(dstBase), 1, 0 ps_mul fp11, fp11, fp13 psq_st fp11, 8(dstBase), 1, 0 ps_mul fp12, fp12, fp13 psq_st fp12, 12(dstBase), 1, 0 psq_l fp14, 16(rsp), 0, 0 lfd fp14, 8(rsp) addi rsp, rsp, 24 blr .size ASM_MTX44MultVecArray,$-ASM_MTX44MultVecArray #undef m #undef srcBase #undef dstBase #undef count #undef rsp //////////////////////////////////////////////////////////////////////////////// // void ASM_MTX44MultVecSR(const Mtx44 m, const Vec *src, Vec *dst) #define m r3 #define src r4 #define dst r5 .global ASM_MTX44MultVecSR ASM_MTX44MultVecSR: .type ASM_MTX44MultVecSR, @function psq_l fp0, 0(m), 0, 0 // m[0][0], m[0][1] GQR0 = 0 // fp6 - x y psq_l fp6, 0(src), 0, 0 psq_l fp2, 16(m), 0, 0 // m[1][0], m[1][1] // fp8 = m00x m01y // next X ps_mul fp8, fp0, fp6 psq_l fp4, 32(m), 0, 0 // m[2][0], m[2][1] // fp10 = m10x m11y // next Y ps_mul fp10, fp2, fp6 psq_l fp7, 8(src), 1, 0 // fp7 - z,1.0 // fp12 = m20x m21y // next Z ps_mul fp12, fp4, fp6 // YYY last FP6 usage psq_l fp3, 24(m), 0, 0 // m[1][2], m[1][3] ps_sum0 fp8, fp8, fp8, fp8 psq_l fp5, 40(m), 0, 0 // m[2][2], m[2][3] ps_sum0 fp10, fp10, fp10, fp10 psq_l fp1, 8(m), 0, 0 // m[0][2], m[0][3] ps_sum0 fp12, fp12, fp12, fp12 ps_madd fp9, fp1, fp7, fp8 psq_st fp9, 0(dst), 1, 0 // store X ps_madd fp11, fp3, fp7, fp10 psq_st fp11, 4(dst), 1, 0 // store Y ps_madd fp13, fp5, fp7, fp12 psq_st fp13, 8(dst), 1, 0 // sore Z blr .size ASM_MTX44MultVecSR,$-ASM_MTX44MultVecSR #undef m #undef src #undef dst //////////////////////////////////////////////////////////////////////////////// // void ASM_MTX44MultVecArraySR(const Mtx44 m, const Vec *srcBase, Vec *dstBase, u32 count) #define m r3 #define srcBase r4 #define dstBase r5 #define count r6 .global ASM_MTX44MultVecArraySR ASM_MTX44MultVecArraySR: .type ASM_MTX44MultVecArraySR, @function psq_l fp0, 0(m), 0, 0 // fp0 <- m00, m01 addi count, count, -1 psq_l fp6, 0(srcBase), 0, 0 // fp6 <- src.x, src.y ps_mul fp8, fp0, fp6 psq_l fp2, 16(m), 0, 0 // fp2 <- m10, m11 ps_mul fp9, fp2, fp6 psq_l fp4, 32(m), 0, 0 // fp4 <- m20, m21 psq_lu fp7, 8(srcBase), 1, 0 // fp7 <- src.z, 1.0 ps_mul fp10, fp4, fp6 psq_l fp1, 8(m), 1, 0 // fp1 <- m02, 1.0 mtctr count psq_l fp3, 24(m), 1, 0 // fp3 <- m12, 1.0 addi dstBase, dstBase, -4 psq_l fp5, 40(m), 1, 0 // fp5 <- m22, 1.0 _ASM_MTX44MultVecArraySR_loop: ps_madd fp11, fp1, fp7, fp8 psq_lu fp6, 4(srcBase), 0, 0 ps_madd fp12, fp3, fp7, fp9 ps_madd fp13, fp5, fp7, fp10 psq_lu fp7, 8(srcBase), 1, 0 ps_sum0 fp11, fp11, fp8, fp8 psq_stu fp11, 4(dstBase), 1, 0 ps_sum0 fp12, fp12, fp9, fp9 psq_stu fp12, 4(dstBase), 1, 0 ps_sum0 fp13, fp13, fp10, fp10 psq_stu fp13, 4(dstBase), 1, 0 ps_mul fp8, fp0, fp6 ps_mul fp9, fp2, fp6 ps_mul fp10, fp4, fp6 bdnz+ _ASM_MTX44MultVecArraySR_loop ps_madd fp11, fp1, fp7, fp8 ps_madd fp12, fp3, fp7, fp9 ps_madd fp13, fp5, fp7, fp10 ps_sum0 fp11, fp11, fp8, fp8 psq_stu fp11, 4(dstBase), 1, 0 ps_sum0 fp12, fp12, fp9, fp9 psq_stu fp12, 4(dstBase), 1, 0 ps_sum0 fp13, fp13, fp10, fp10 psq_stu fp13, 4(dstBase), 1, 0 blr .size ASM_MTX44MultVecArraySR,$-ASM_MTX44MultVecArraySR #undef m #undef srcBase #undef dstBase #undef count