1/*---------------------------------------------------------------------------* 2 Project: matrix vector Library 3 File: mtx44Vec_asm.s 4 5 Copyright 1998-2011 Nintendo. All rights reserved. 6 7 These coded instructions, statements, and computer programs contain 8 proprietary information of Nintendo of America Inc. and/or Nintendo 9 Company Ltd., and are protected by Federal copyright law. They may 10 not be disclosed to third parties or copied or duplicated in any form, 11 in whole or in part, without the prior written consent of Nintendo. 12 13 *---------------------------------------------------------------------------*/ 14 15 .data 16/*** 17Unit01: .float 0.0 18 .float 1.0 19 20***/ 21 22 .text 23 24 25//////////////////////////////////////////////////////////////////////////////// 26// void ASM_MTX44MultVec(const Mtx44 m, const Vec *src, Vec *dst) 27#define m r3 28#define src r4 29#define dst r5 30 .global ASM_MTX44MultVec 31ASM_MTX44MultVec: 32 .type ASM_MTX44MultVec, @function 33 34 psq_l fp0, 0(src), 0, 0 // fp0 <-src.x, src.y 35 psq_l fp2, 48(m), 0, 0 36 psq_l fp1, 8(src), 1, 0 // fp1 <-src.z, 1.0 37 ps_mul fp4, fp0, fp2 38 psq_l fp3, 56(m), 0, 0 39 ps_madd fp5, fp1, fp3, fp4 40 ps_merge11 fp12, fp1, fp1 // fp12 = 1.0, 1.0 41 ps_sum0 fp13, fp5, fp5, fp5 // fp3 <- w 42 psq_l fp4, 0(m), 0, 0 43 ps_merge00 fp13, fp13, fp13 44 psq_l fp5, 8(m), 0, 0 45 ps_div fp13, fp12, fp13 // fp13 <- 1/w 46 psq_l fp6, 16(m), 0, 0 47 psq_l fp7, 24(m), 0, 0 48 psq_l fp8, 32(m), 0, 0 49 psq_l fp9, 40(m), 0, 0 50 ps_mul fp4, fp0, fp4 51 ps_madd fp2, fp1, fp5, fp4 52 ps_mul fp6, fp0, fp6 53 ps_madd fp3, fp1, fp7, fp6 54 ps_mul fp8, fp0, fp8 55 ps_sum0 fp2, fp2, fp2, fp2 // fp2 <- dst.x, -- 56 ps_madd fp9, fp1, fp9, fp8 57 ps_sum1 fp2, fp3, fp2, fp3 // fp2 <- dst.x, dst.y 58 ps_sum0 fp3, fp9, fp9, fp9 59 ps_mul fp2, fp2, fp13 60 psq_st fp2, 0(dst), 0, 0 61 ps_mul fp3, fp3, fp13 62 psq_st fp3, 8(dst), 1, 0 63 blr 64 .size ASM_MTX44MultVec,$-ASM_MTX44MultVec 65 66#undef m 67#undef src 68#undef dst 69 70 71 72//////////////////////////////////////////////////////////////////////////////// 73// void ASM_MTX44MultVecArray (const Mtx44 m, const Vec *srcBase, Vec *dstBase, u32 count) 74#define m r3 75#define srcBase r4 76#define dstBase r5 77#define count r6 78 .global ASM_MTX44MultVecArray 79#define rsp sp 80ASM_MTX44MultVecArray: 81 .type ASM_MTX44MultVecArray, @function 82 83 stwu rsp, -24(rsp) 84 addi count, count, -1 85 psq_l fp6, 48(m), 0, 0 // fp6 <- m30, m31 86 mtctr count 87 psq_l fp8, 0(srcBase), 0, 0 // fp8 <- src.x, src.y 88 addi dstBase, dstBase, -4 89 stfd fp14, 8(rsp) 90 psq_l fp7, 56(m), 0, 0 // fp7 <- m32, m33 91 psq_lu fp9, 8(srcBase), 1, 0 // fp9 <- src.z, 1.0 92 ps_mul fp13, fp6, fp8 93 psq_l fp0, 0(m), 0, 0 // fp0 <- m00, m01 94 psq_st fp14, 16(rsp), 0, 0 95 ps_madd fp13, fp7, fp9, fp13 96 psq_l fp2, 16(m), 0, 0 // fp2 <- m10, m11 97 ps_merge11 fp14, fp9, fp9 // fp9 = 1.0F, 1.0F 98 ps_mul fp10, fp0, fp8 99 psq_l fp4, 32(m), 0, 0 // fp4 <- m20, m21 100 ps_mul fp11, fp2, fp8 101 psq_l fp1, 8(m), 0, 0 // fp1 <- m02, m03 102 ps_mul fp12, fp4, fp8 103 psq_l fp3, 24(m), 0, 0 // fp3 <- m12, m13 104 ps_sum0 fp13, fp13, fp13, fp13 // fp13 <- w 105 psq_l fp5, 40(m), 0, 0 // fp5 <- m22, m23 106 107_ASM_MTX44MultVecArray_loop: 108 ps_madd fp10, fp1, fp9, fp10 109 ps_madd fp11, fp3, fp9, fp11 110 ps_madd fp12, fp5, fp9, fp12 111 ps_sum0 fp10, fp10, fp10, fp10 // fp10 <- x 112 ps_sum0 fp11, fp11, fp11, fp11 // fp11 <- y 113 ps_sum0 fp12, fp12, fp12, fp12 // fp12 <- z 114 ps_div fp13, fp14, fp13 115 116 psq_lu fp8, 4(srcBase), 0, 0 117 psq_lu fp9, 8(srcBase), 1, 0 118 119 ps_mul fp10, fp10, fp13 120 psq_stu fp10, 4(dstBase), 1, 0 121 ps_mul fp11, fp11, fp13 122 psq_stu fp11, 4(dstBase), 1, 0 123 ps_mul fp12, fp12, fp13 124 psq_stu fp12, 4(dstBase), 1, 0 125 126 ps_mul fp13, fp6, fp8 127 128 ps_mul fp10, fp0, fp8 129 ps_mul fp11, fp2, fp8 130 ps_madd fp13, fp7, fp9, fp13 131 ps_mul fp12, fp4, fp8 132 ps_sum0 fp13, fp13, fp13, fp13 133 134 bdnz+ _ASM_MTX44MultVecArray_loop 135 136 ps_madd fp10, fp1, fp9, fp10 137 ps_madd fp11, fp3, fp9, fp11 138 ps_madd fp12, fp5, fp9, fp12 139 ps_sum0 fp10, fp10, fp10, fp10 // fp10 <- x 140 ps_sum0 fp11, fp11, fp11, fp11 // fp11 <- y 141 ps_sum0 fp12, fp12, fp12, fp12 // fp12 <- z 142 ps_div fp13, fp14, fp13 143 144 ps_mul fp10, fp10, fp13 145 psq_st fp10, 4(dstBase), 1, 0 146 ps_mul fp11, fp11, fp13 147 psq_st fp11, 8(dstBase), 1, 0 148 ps_mul fp12, fp12, fp13 149 psq_st fp12, 12(dstBase), 1, 0 150 151 psq_l fp14, 16(rsp), 0, 0 152 lfd fp14, 8(rsp) 153 addi rsp, rsp, 24 154 blr 155 .size ASM_MTX44MultVecArray,$-ASM_MTX44MultVecArray 156#undef m 157#undef srcBase 158#undef dstBase 159#undef count 160#undef rsp 161 162 163 164//////////////////////////////////////////////////////////////////////////////// 165// void ASM_MTX44MultVecSR(const Mtx44 m, const Vec *src, Vec *dst) 166#define m r3 167#define src r4 168#define dst r5 169 .global ASM_MTX44MultVecSR 170ASM_MTX44MultVecSR: 171 .type ASM_MTX44MultVecSR, @function 172 173 psq_l fp0, 0(m), 0, 0 // m[0][0], m[0][1] GQR0 = 0 174 175 // fp6 - x y 176 psq_l fp6, 0(src), 0, 0 177 178 psq_l fp2, 16(m), 0, 0 // m[1][0], m[1][1] 179 180 181 // fp8 = m00x m01y // next X 182 ps_mul fp8, fp0, fp6 183 psq_l fp4, 32(m), 0, 0 // m[2][0], m[2][1] 184 185 // fp10 = m10x m11y // next Y 186 ps_mul fp10, fp2, fp6 187 psq_l fp7, 8(src), 1, 0 // fp7 - z,1.0 188 189 // fp12 = m20x m21y // next Z 190 ps_mul fp12, fp4, fp6 // YYY last FP6 usage 191 psq_l fp3, 24(m), 0, 0 // m[1][2], m[1][3] 192 193 ps_sum0 fp8, fp8, fp8, fp8 194 psq_l fp5, 40(m), 0, 0 // m[2][2], m[2][3] 195 196 ps_sum0 fp10, fp10, fp10, fp10 197 psq_l fp1, 8(m), 0, 0 // m[0][2], m[0][3] 198 199 ps_sum0 fp12, fp12, fp12, fp12 200 ps_madd fp9, fp1, fp7, fp8 201 psq_st fp9, 0(dst), 1, 0 // store X 202 203 ps_madd fp11, fp3, fp7, fp10 204 psq_st fp11, 4(dst), 1, 0 // store Y 205 206 ps_madd fp13, fp5, fp7, fp12 207 psq_st fp13, 8(dst), 1, 0 // sore Z 208 209 blr 210 .size ASM_MTX44MultVecSR,$-ASM_MTX44MultVecSR 211 212#undef m 213#undef src 214#undef dst 215 216 217 218//////////////////////////////////////////////////////////////////////////////// 219// void ASM_MTX44MultVecArraySR(const Mtx44 m, const Vec *srcBase, Vec *dstBase, u32 count) 220#define m r3 221#define srcBase r4 222#define dstBase r5 223#define count r6 224 .global ASM_MTX44MultVecArraySR 225ASM_MTX44MultVecArraySR: 226 .type ASM_MTX44MultVecArraySR, @function 227 228 psq_l fp0, 0(m), 0, 0 // fp0 <- m00, m01 229 addi count, count, -1 230 psq_l fp6, 0(srcBase), 0, 0 // fp6 <- src.x, src.y 231 ps_mul fp8, fp0, fp6 232 psq_l fp2, 16(m), 0, 0 // fp2 <- m10, m11 233 ps_mul fp9, fp2, fp6 234 psq_l fp4, 32(m), 0, 0 // fp4 <- m20, m21 235 psq_lu fp7, 8(srcBase), 1, 0 // fp7 <- src.z, 1.0 236 ps_mul fp10, fp4, fp6 237 psq_l fp1, 8(m), 1, 0 // fp1 <- m02, 1.0 238 mtctr count 239 psq_l fp3, 24(m), 1, 0 // fp3 <- m12, 1.0 240 addi dstBase, dstBase, -4 241 psq_l fp5, 40(m), 1, 0 // fp5 <- m22, 1.0 242 243_ASM_MTX44MultVecArraySR_loop: 244 ps_madd fp11, fp1, fp7, fp8 245 psq_lu fp6, 4(srcBase), 0, 0 246 ps_madd fp12, fp3, fp7, fp9 247 ps_madd fp13, fp5, fp7, fp10 248 psq_lu fp7, 8(srcBase), 1, 0 249 ps_sum0 fp11, fp11, fp8, fp8 250 psq_stu fp11, 4(dstBase), 1, 0 251 ps_sum0 fp12, fp12, fp9, fp9 252 psq_stu fp12, 4(dstBase), 1, 0 253 ps_sum0 fp13, fp13, fp10, fp10 254 psq_stu fp13, 4(dstBase), 1, 0 255 ps_mul fp8, fp0, fp6 256 ps_mul fp9, fp2, fp6 257 ps_mul fp10, fp4, fp6 258 bdnz+ _ASM_MTX44MultVecArraySR_loop 259 260 ps_madd fp11, fp1, fp7, fp8 261 ps_madd fp12, fp3, fp7, fp9 262 ps_madd fp13, fp5, fp7, fp10 263 ps_sum0 fp11, fp11, fp8, fp8 264 psq_stu fp11, 4(dstBase), 1, 0 265 ps_sum0 fp12, fp12, fp9, fp9 266 psq_stu fp12, 4(dstBase), 1, 0 267 ps_sum0 fp13, fp13, fp10, fp10 268 psq_stu fp13, 4(dstBase), 1, 0 269 blr 270 .size ASM_MTX44MultVecArraySR,$-ASM_MTX44MultVecArraySR 271#undef m 272#undef srcBase 273#undef dstBase 274#undef count 275 276 277