1 /*---------------------------------------------------------------------------*
2   Project: matrix vector Library
3   File:    mtx44vec.c
4 
5   Copyright 1998-2001 Nintendo.  All rights reserved.
6 
7   These coded instructions, statements, and computer programs contain
8   proprietary information of Nintendo of America Inc. and/or Nintendo
9   Company Ltd., and are protected by Federal copyright law.  They may
10   not be disclosed to third parties or copied or duplicated in any form,
11   in whole or in part, without the prior written consent of Nintendo.
12 
13 
14   $Log: mtx44vec.c,v $
15   Revision 1.2  02/20/2006 04:25:42  mitu
16   changed include path from dolphin/ to revolution/.
17 
18   Revision 1.1.1.1  2005/05/12 02:15:49  yasuh-to
19   transitioned from the Dolphin source tree
20 
21 
22     2     02/04/11 13:11 Hirose
23     const type specifier support. (worked by Hiratsu@IRD)
24 
25     1     7/30/01 10:51p Hirose
26     Initial check in.
27 
28   $NoKeywords: $
29  *---------------------------------------------------------------------------*/
30 #ifdef WIN32
31 #include <win32/win32.h>
32 #endif
33 
34 #include <math.h>
35 #include <revolution/mtx.h>
36 #include <revolution/mtx/mtx44ext.h>
37 #include "mtx44extAssert.h"
38 
39 
40 
41 /*---------------------------------------------------------------------*
42 
43 
44 
45                              MODEL  SECTION
46 
47 
48 
49  *---------------------------------------------------------------------*/
50 /* NOTE: Prototypes for these functions are defined in "mtx44ext.h".   */
51 
52 /*---------------------------------------------------------------------*
53 Name:           MTX44MultVec
54 
55 Description:    multiplies a vector by a matrix.
56                 m x src = dst.
57 
58 Arguments:      m         matrix.
59                 src       source vector for multiply.
60                 dst       resultant vector from multiply.
61                 note:      ok if src == dst.
62 
63 Return   :         none
64  *---------------------------------------------------------------------*/
65 /*---------------------------------------------------------------------*
66     C version
67  *---------------------------------------------------------------------*/
C_MTX44MultVec(const Mtx44 m,const Vec * src,Vec * dst)68 void C_MTX44MultVec ( const Mtx44 m, const Vec *src, Vec *dst )
69 {
70     Vec vTmp;
71     f32 w;
72 
73     ASSERTMSG( (m   != 0), MTX44_MULTVEC_1 );
74     ASSERTMSG( (src != 0), MTX44_MULTVEC_2 );
75     ASSERTMSG( (dst != 0), MTX44_MULTVEC_3 );
76 
77     // a Vec has a 4th implicit 'w' coordinate of 1
78     vTmp.x = m[0][0]*src->x + m[0][1]*src->y + m[0][2]*src->z + m[0][3];
79     vTmp.y = m[1][0]*src->x + m[1][1]*src->y + m[1][2]*src->z + m[1][3];
80     vTmp.z = m[2][0]*src->x + m[2][1]*src->y + m[2][2]*src->z + m[2][3];
81     w      = m[3][0]*src->x + m[3][1]*src->y + m[3][2]*src->z + m[3][3];
82     w = 1.0f/w;
83 
84     // copy back
85     dst->x = vTmp.x * w;
86     dst->y = vTmp.y * w;
87     dst->z = vTmp.z * w;
88 }
89 
90 /*---------------------------------------------------------------------*
91     Paired-Single assembler version
92  *---------------------------------------------------------------------*
93                 Note that NO error checking is performed.
94  *---------------------------------------------------------------------*/
95 #ifdef  GEKKO
PSMTX44MultVec(const register Mtx44 m,const register Vec * src,register Vec * dst)96 asm void PSMTX44MultVec (
97     const register Mtx44 m,
98     const register Vec *src,
99           register Vec *dst
100 )
101 {
102     nofralloc;
103     psq_l       fp0, 0(src),    0, 0;       // fp0 <-src.x, src.y
104     psq_l       fp2, 48(m),     0, 0;
105     psq_l       fp1, 8(src),    1, 0;       // fp1 <-src.z, 1.0
106     ps_mul      fp4, fp0, fp2;
107     psq_l       fp3, 56(m),     0, 0;
108     ps_madd     fp5, fp1, fp3, fp4;
109     ps_merge11  fp12, fp1, fp1;             // fp12 = 1.0, 1.0
110     ps_sum0     fp13, fp5, fp5, fp5;        // fp3 <-  w
111     psq_l       fp4, 0(m),      0, 0;
112     ps_merge00  fp13, fp13, fp13;
113     psq_l       fp5, 8(m),      0, 0;
114     ps_div      fp13, fp12, fp13;           // fp13 <- 1/w
115     psq_l       fp6, 16(m),     0, 0;
116     psq_l       fp7, 24(m),     0, 0;
117     psq_l       fp8, 32(m),     0, 0;
118     psq_l       fp9, 40(m),     0, 0;
119     ps_mul      fp4, fp0, fp4;
120     ps_madd     fp2, fp1, fp5, fp4;
121     ps_mul      fp6, fp0, fp6;
122     ps_madd     fp3, fp1, fp7, fp6;
123     ps_mul      fp8, fp0, fp8;
124     ps_sum0     fp2, fp2, fp2, fp2;         // fp2 <- dst.x, --
125     ps_madd     fp9, fp1, fp9, fp8;
126     ps_sum1     fp2, fp3, fp2, fp3;         // fp2 <- dst.x, dst.y
127     ps_sum0     fp3, fp9, fp9, fp9;
128     ps_mul      fp2, fp2, fp13;
129     psq_st      fp2, 0(dst),    0, 0;
130     ps_mul      fp3, fp3, fp13;
131     psq_st      fp3, 8(dst),    1, 0;
132     blr;
133 }
134 #endif      //  GEKKO
135 
136 /*---------------------------------------------------------------------*
137 Name:           MTX44MultVecArray
138 
139 Description:    multiplies an array of vectors by a matrix.
140 
141 
142 Arguments:      m         matrix.
143                 srcBase   start of source vector array.
144                 dstBase   start of resultant vector array.
145                 note:     ok if srcBase == dstBase.
146                 count     number of vectors in srcBase, dstBase arrays
147                           note:      cannot check for array overflow
148 
149 Return   :         none
150  *---------------------------------------------------------------------*/
151 /*---------------------------------------------------------------------*
152     C version
153  *---------------------------------------------------------------------*/
C_MTX44MultVecArray(const Mtx44 m,const Vec * srcBase,Vec * dstBase,u32 count)154 void C_MTX44MultVecArray ( const Mtx44 m, const Vec *srcBase, Vec *dstBase, u32 count )
155 {
156     u32 i;
157     Vec vTmp;
158     f32 w;
159 
160     ASSERTMSG( (m       != 0),    MTX44_MULTVECARRAY_1    );
161     ASSERTMSG( (srcBase != 0),    MTX44_MULTVECARRAY_2    );
162     ASSERTMSG( (dstBase != 0),    MTX44_MULTVECARRAY_3    );
163 
164     for(i=0; i< count; i++)
165     {
166         // Vec has a 4th implicit 'w' coordinate of 1
167         vTmp.x = m[0][0]*srcBase->x + m[0][1]*srcBase->y + m[0][2]*srcBase->z + m[0][3];
168         vTmp.y = m[1][0]*srcBase->x + m[1][1]*srcBase->y + m[1][2]*srcBase->z + m[1][3];
169         vTmp.z = m[2][0]*srcBase->x + m[2][1]*srcBase->y + m[2][2]*srcBase->z + m[2][3];
170         w      = m[3][0]*srcBase->x + m[3][1]*srcBase->y + m[3][2]*srcBase->z + m[3][3];
171         w = 1.0f/w;
172 
173         // copy back
174         dstBase->x = vTmp.x * w;
175         dstBase->y = vTmp.y * w;
176         dstBase->z = vTmp.z * w;
177 
178         srcBase++;
179         dstBase++;
180     }
181 }
182 
183 /*---------------------------------------------------------------------*
184     Paired-Single assembler version
185  *---------------------------------------------------------------------*
186                 Note that NO error checking is performed.
187  *---------------------------------------------------------------------*/
188 #ifdef  GEKKO
PSMTX44MultVecArray(const register Mtx44 m,const register Vec * srcBase,register Vec * dstBase,register u32 count)189 asm void PSMTX44MultVecArray (
190     const register Mtx44 m,
191     const register Vec *srcBase,
192           register Vec *dstBase,
193           register u32 count
194 )
195 {
196     nofralloc;
197 //  cmpwi       count, 1;
198 //  bne         @array
199 //  b           PSMTX44MultVec
200 //@array:
201     stwu        rsp, -16(rsp);
202     addi        count, count, -1;
203     psq_l       fp6, 48(m),         0, 0;   // fp6 <- m30, m31
204     mtctr       count;
205     psq_l       fp8, 0(srcBase),    0, 0;   // fp8 <- src.x, src.y
206     addi        dstBase, dstBase, -4;
207     psq_l       fp7, 56(m),         0, 0;   // fp7 <- m32, m33
208     psq_lu      fp9, 8(srcBase),    1, 0;   // fp9 <- src.z, 1.0
209     ps_mul      fp13, fp6, fp8;
210     psq_l       fp0, 0(m),          0, 0;   // fp0 <- m00, m01
211     stfd        fp14, 8(rsp);
212     ps_madd     fp13, fp7, fp9, fp13;
213     psq_l       fp2, 16(m),         0, 0;   // fp2 <- m10, m11
214     ps_merge11  fp14, fp9, fp9;             // fp9 = 1.0F, 1.0F
215     ps_mul      fp10, fp0, fp8;
216     psq_l       fp4, 32(m),         0, 0;   // fp4 <- m20, m21
217     ps_mul      fp11, fp2, fp8;
218     psq_l       fp1, 8(m),          0, 0;   // fp1 <- m02, m03
219     ps_mul      fp12, fp4, fp8;
220     psq_l       fp3, 24(m),         0, 0;   // fp3 <- m12, m13
221     ps_sum0     fp13, fp13, fp13, fp13;     // fp13 <- w
222     psq_l       fp5, 40(m),         0, 0;   // fp5 <- m22, m23
223 
224 @loop:
225     ps_madd     fp10, fp1, fp9, fp10;
226     ps_madd     fp11, fp3, fp9, fp11;
227     ps_madd     fp12, fp5, fp9, fp12;
228     ps_sum0     fp10, fp10, fp10, fp10;     // fp10 <- x
229     ps_sum0     fp11, fp11, fp11, fp11;     // fp11 <- y
230     ps_sum0     fp12, fp12, fp12, fp12;     // fp12 <- z
231     ps_div      fp13, fp14, fp13;
232 
233     psq_lu      fp8, 4(srcBase), 0, 0;
234     psq_lu      fp9, 8(srcBase), 1, 0;
235 
236     ps_mul      fp10, fp10, fp13;
237     psq_stu     fp10, 4(dstBase), 1, 0;
238     ps_mul      fp11, fp11, fp13;
239     psq_stu     fp11, 4(dstBase), 1, 0;
240     ps_mul      fp12, fp12, fp13;
241     psq_stu     fp12, 4(dstBase), 1, 0;
242 
243     ps_mul      fp13, fp6, fp8;
244 
245     ps_mul      fp10, fp0, fp8;
246     ps_mul      fp11, fp2, fp8;
247     ps_madd     fp13, fp7, fp9, fp13;
248     ps_mul      fp12, fp4, fp8;
249     ps_sum0     fp13, fp13, fp13, fp13;
250 
251     bdnz+       @loop
252 
253     ps_madd     fp10, fp1, fp9, fp10;
254     ps_madd     fp11, fp3, fp9, fp11;
255     ps_madd     fp12, fp5, fp9, fp12;
256     ps_sum0     fp10, fp10, fp10, fp10;     // fp10 <- x
257     ps_sum0     fp11, fp11, fp11, fp11;     // fp11 <- y
258     ps_sum0     fp12, fp12, fp12, fp12;     // fp12 <- z
259     ps_div      fp13, fp14, fp13;
260 
261     ps_mul      fp10, fp10, fp13;
262     psq_st      fp10, 4(dstBase), 1, 0;
263     ps_mul      fp11, fp11, fp13;
264     psq_st      fp11, 8(dstBase), 1, 0;
265     ps_mul      fp12, fp12, fp13;
266     psq_st      fp12, 12(dstBase), 1, 0;
267 
268     lfd     fp14,  8(rsp);
269     addi    rsp, rsp, 16;
270     blr;
271 }
272 #endif  // GEKKO
273 
274 
275 /*---------------------------------------------------------------------*
276 Name:         MTX44MultVecSR
277 
278 Description:  multiplies a vector by a matrix 3x3 (Scaling and Rotation)
279               component.
280 
281               m x src = dst.
282 
283 Arguments:    m         matrix.
284               src       source vector for multiply.
285               dst       resultant vector from multiply.
286               note:   ok if src == dst.
287 
288 Return   :       none
289  *---------------------------------------------------------------------*/
C_MTX44MultVecSR(const Mtx44 m,const Vec * src,Vec * dst)290 void C_MTX44MultVecSR ( const Mtx44 m, const Vec *src, Vec *dst )
291 {
292     Vec vTmp;
293 
294     ASSERTMSG( (m   != 0), MTX44_MULTVECSR_1 );
295     ASSERTMSG( (src != 0), MTX44_MULTVECSR_2 );
296     ASSERTMSG( (dst != 0), MTX44_MULTVECSR_3 );
297 
298     // a Vec has a 4th implicit 'w' coordinate of 1
299     vTmp.x = m[0][0]*src->x + m[0][1]*src->y + m[0][2]*src->z;
300     vTmp.y = m[1][0]*src->x + m[1][1]*src->y + m[1][2]*src->z;
301     vTmp.z = m[2][0]*src->x + m[2][1]*src->y + m[2][2]*src->z;
302 
303     // copy back
304     dst->x = vTmp.x;
305     dst->y = vTmp.y;
306     dst->z = vTmp.z;
307 }
308 
309 /*---------------------------------------------------------------------*
310     Paired-Single assembler version
311  *---------------------------------------------------------------------*
312                 Note that this performs NO error checking.
313  *---------------------------------------------------------------------*/
314 #ifdef  GEKKO
PSMTX44MultVecSR(const register Mtx44 m,const register Vec * src,register Vec * dst)315 asm void PSMTX44MultVecSR(
316     const register Mtx44 m,
317     const register Vec *src,
318           register Vec *dst
319 )
320 {
321 
322     nofralloc
323     psq_l   fp0, 0(m), 0, 0    // m[0][0], m[0][1] GQR0 = 0
324 
325     // fp6 - x y
326     psq_l   fp6, 0(src), 0, 0
327 
328     psq_l   fp2, 16(m), 0, 0   // m[1][0], m[1][1]
329 
330 
331     // fp8 = m00x m01y // next X
332     ps_mul  fp8, fp0, fp6
333     psq_l   fp4, 32(m), 0, 0   // m[2][0], m[2][1]
334 
335     // fp10 = m10x m11y // next Y
336     ps_mul  fp10, fp2, fp6
337     psq_l   fp7, 8(src), 1, 0   // fp7 - z,1.0
338 
339     // fp12 = m20x m21y // next Z
340     ps_mul  fp12, fp4, fp6  // YYY last FP6 usage
341     psq_l   fp3, 24(m), 0, 0   // m[1][2], m[1][3]
342 
343     ps_sum0 fp8, fp8, fp8, fp8
344     psq_l   fp5, 40(m), 0, 0   // m[2][2], m[2][3]
345 
346     ps_sum0 fp10, fp10, fp10, fp10
347     psq_l   fp1,  8(m), 0, 0    // m[0][2], m[0][3]
348 
349     ps_sum0 fp12, fp12, fp12, fp12
350     ps_madd fp9, fp1, fp7, fp8
351     psq_st  fp9,  0(dst), 1, 0      // store X
352 
353     ps_madd fp11, fp3, fp7, fp10
354     psq_st  fp11, 4(dst), 1, 0      // store Y
355 
356     ps_madd fp13, fp5, fp7, fp12
357     psq_st  fp13, 8(dst), 1, 0      //  store Z
358 
359     blr
360 
361 }
362 #endif  // GEKKO
363 
364 /*---------------------------------------------------------------------*
365 Name:           MTX44MultVecArraySR
366 
367 Description:    multiplies an array of vectors by a matrix 3x3
368                 (Scaling and Rotation) component.
369 
370 Arguments:      m         matrix.
371                 srcBase   start of source vector array.
372                 dstBase   start of resultant vector array.
373                 note:    ok if srcBase == dstBase.
374 
375                 count     number of vectors in srcBase, dstBase arrays
376                 note:    cannot check for array overflow
377 
378 Return   :         none
379  *---------------------------------------------------------------------*/
C_MTX44MultVecArraySR(const Mtx44 m,const Vec * srcBase,Vec * dstBase,u32 count)380 void C_MTX44MultVecArraySR ( const Mtx44 m, const Vec *srcBase, Vec *dstBase, u32 count )
381 {
382     u32 i;
383     Vec vTmp;
384 
385     ASSERTMSG( (m       != 0), MTX44_MULTVECARRAYSR_1 );
386     ASSERTMSG( (srcBase != 0), MTX44_MULTVECARRAYSR_2 );
387     ASSERTMSG( (dstBase != 0), MTX44_MULTVECARRAYSR_3 );
388 
389     for ( i = 0; i < count; i ++ )
390     {
391         // Vec has a 4th implicit 'w' coordinate of 1
392         vTmp.x = m[0][0]*srcBase->x + m[0][1]*srcBase->y + m[0][2]*srcBase->z;
393         vTmp.y = m[1][0]*srcBase->x + m[1][1]*srcBase->y + m[1][2]*srcBase->z;
394         vTmp.z = m[2][0]*srcBase->x + m[2][1]*srcBase->y + m[2][2]*srcBase->z;
395 
396         // copy back
397         dstBase->x = vTmp.x;
398         dstBase->y = vTmp.y;
399         dstBase->z = vTmp.z;
400 
401         srcBase++;
402         dstBase++;
403     }
404 }
405 
406 /*---------------------------------------------------------------------*
407     Paired-Single assembler version
408  *---------------------------------------------------------------------*
409                 Note that this performs NO error checking.
410  *---------------------------------------------------------------------*/
411 #ifdef  GEKKO
PSMTX44MultVecArraySR(const register Mtx44 m,const register Vec * srcBase,register Vec * dstBase,register u32 count)412 asm void PSMTX44MultVecArraySR(
413     const register Mtx44 m,
414     const register Vec *srcBase,
415           register Vec *dstBase,
416           register u32 count
417 )
418 {
419     nofralloc;
420     psq_l       fp0,  0(m),         0, 0;           // fp0 <- m00, m01
421     addi        count, count, -1;
422     psq_l       fp6,  0(srcBase),   0, 0;           // fp6 <- src.x, src.y
423     ps_mul      fp8,  fp0, fp6;
424     psq_l       fp2,  16(m),        0, 0;           // fp2 <- m10, m11
425     ps_mul      fp9,  fp2, fp6;
426     psq_l       fp4,  32(m),        0, 0;           // fp4 <- m20, m21
427     psq_lu      fp7,  8(srcBase),   1, 0;           // fp7 <- src.z, 1.0
428     ps_mul      fp10, fp4, fp6;
429     psq_l       fp1,  8(m),         1, 0;           // fp1 <- m02, 1.0
430     mtctr       count;
431     psq_l       fp3,  24(m),        1, 0;           // fp3 <- m12, 1.0
432     addi        dstBase, dstBase, -4;
433     psq_l       fp5,  40(m),        1, 0;           // fp5 <- m22, 1.0
434 
435 @loop:
436     ps_madd     fp11, fp1, fp7, fp8;
437     psq_lu      fp6,  4(srcBase),   0, 0;
438     ps_madd     fp12, fp3, fp7, fp9;
439     ps_madd     fp13, fp5, fp7, fp10;
440     psq_lu      fp7,  8(srcBase),   1, 0;
441     ps_sum0     fp11, fp11, fp8, fp8;
442     psq_stu     fp11, 4(dstBase),   1, 0;
443     ps_sum0     fp12, fp12, fp9, fp9;
444     psq_stu     fp12, 4(dstBase),   1, 0;
445     ps_sum0     fp13, fp13, fp10, fp10;
446     psq_stu     fp13, 4(dstBase),   1, 0;
447     ps_mul      fp8,  fp0, fp6;
448     ps_mul      fp9,  fp2, fp6;
449     ps_mul      fp10,  fp4, fp6;
450     bdnz+       @loop
451 
452     ps_madd     fp11, fp1, fp7, fp8;
453     ps_madd     fp12, fp3, fp7, fp9;
454     ps_madd     fp13, fp5, fp7, fp10;
455     ps_sum0     fp11, fp11, fp8, fp8;
456     psq_stu     fp11, 4(dstBase),   1, 0;
457     ps_sum0     fp12, fp12, fp9, fp9;
458     psq_stu     fp12, 4(dstBase),   1, 0;
459     ps_sum0     fp13, fp13, fp10, fp10;
460     psq_stu     fp13, 4(dstBase),   1, 0;
461     blr;
462 }
463 #endif  // GEKKO
464 
465 
466 /*===========================================================================*/
467