1 /*---------------------------------------------------------------------------*
2   Project: Matrix vector Library
3   File:    mtx44vec.c
4 
5   Copyright 1998 - 2001 Nintendo.  All rights reserved.
6 
7   These coded instructions, statements, and computer programs contain
8   proprietary information of Nintendo of America Inc. and/or Nintendo
9   Company Ltd., and are protected by Federal copyright law.     They may
10   not be disclosed to third parties or copied or duplicated in any form,
11   in whole or in part, without the prior written consent of Nintendo.
12 
13 
14   $Log: mtx44vec.c,v $
15   Revision 1.4  2007/08/30 10:41:22  hirose
16   Updated PSMTX44MultVecArray to make it Broadway EABI compliant.
17 
18   Revision 1.3  2007/01/11 00:45:26  aka
19   Removed win32.h.
20 
21   Revision 1.2  2006/02/20 04:25:42  mitu
22   Changed include path from dolphin/ to revolution/.
23 
24   Revision 1.1.1.1  2005/05/12 02:15:49  yasuh-to
25   Ported from dolphin sheath tree.
26 
27 
28     2    2002/04/11 13:11 Hirose
29     const type specifier support. (by Hiratsu@IRD)
30 
31     1     2001/07/30 10:51p Hirose
32     Initial check in.
33 
34   $NoKeywords: $
35  *---------------------------------------------------------------------------*/
36 
37 #include <math.h>
38 #include <revolution/mtx.h>
39 #include <revolution/mtx/mtx44ext.h>
40 #include "mtx44extAssert.h"
41 
42 
43 
44 /*---------------------------------------------------------------------*
45 
46 
47 
48                              MODEL SECTION
49 
50 
51 
52  *---------------------------------------------------------------------*/
53 /* NOTE: Prototypes for these functions are defined in "mtx44ext.h".   */
54 
55 /*---------------------------------------------------------------------*
56 Name:           MTX44MultVec
57 
58 Description:    Multiplies a vector by a matrix.
59                 m x src = dst.
60 
61 Arguments:      m        Matrix.
62                 src     Source vector for multiply.
63                 dst     Resultant vector from multiply.
64                 Note:      OK if src == dst.
65 
66 Return   :         None.
67  *---------------------------------------------------------------------*/
68 /*---------------------------------------------------------------------*
69     C version
70  *---------------------------------------------------------------------*/
C_MTX44MultVec(const Mtx44 m,const Vec * src,Vec * dst)71 void C_MTX44MultVec ( const Mtx44 m, const Vec *src, Vec *dst )
72 {
73     Vec vTmp;
74     f32 w;
75 
76     ASSERTMSG( (m   != 0), MTX44_MULTVEC_1 );
77     ASSERTMSG( (src != 0), MTX44_MULTVEC_2 );
78     ASSERTMSG( (dst != 0), MTX44_MULTVEC_3 );
79 
80     // A Vec has a 4th implicit 'w' coordinate of 1
81     vTmp.x = m[0][0]*src->x + m[0][1]*src->y + m[0][2]*src->z + m[0][3];
82     vTmp.y = m[1][0]*src->x + m[1][1]*src->y + m[1][2]*src->z + m[1][3];
83     vTmp.z = m[2][0]*src->x + m[2][1]*src->y + m[2][2]*src->z + m[2][3];
84     w      = m[3][0]*src->x + m[3][1]*src->y + m[3][2]*src->z + m[3][3];
85     w = 1.0f/w;
86 
87     // Copy back
88     dst->x = vTmp.x * w;
89     dst->y = vTmp.y * w;
90     dst->z = vTmp.z * w;
91 }
92 
93 /*---------------------------------------------------------------------*
94     Paired-Single assembler version
95  *---------------------------------------------------------------------*
96                 Note that NO error checking is performed.
97  *---------------------------------------------------------------------*/
98 #ifdef  GEKKO
PSMTX44MultVec(const register Mtx44 m,const register Vec * src,register Vec * dst)99 asm void PSMTX44MultVec (
100     const register Mtx44 m,
101     const register Vec *src,
102           register Vec *dst
103 )
104 {
105     nofralloc;
106     psq_l       fp0, 0(src),    0, 0;       // fp0 <-src.x, src.y
107     psq_l       fp2, 48(m),     0, 0;
108     psq_l       fp1, 8(src),    1, 0;       // fp1 <-src.z, 1.0
109     ps_mul      fp4, fp0, fp2;
110     psq_l       fp3, 56(m),     0, 0;
111     ps_madd     fp5, fp1, fp3, fp4;
112     ps_merge11  fp12, fp1, fp1;             // fp12 = 1.0, 1.0
113     ps_sum0     fp13, fp5, fp5, fp5;        // fp3 <-  w
114     psq_l       fp4, 0(m),      0, 0;
115     ps_merge00  fp13, fp13, fp13;
116     psq_l       fp5, 8(m),      0, 0;
117     ps_div      fp13, fp12, fp13;           // fp13 <- 1/w
118     psq_l       fp6, 16(m),     0, 0;
119     psq_l       fp7, 24(m),     0, 0;
120     psq_l       fp8, 32(m),     0, 0;
121     psq_l       fp9, 40(m),     0, 0;
122     ps_mul      fp4, fp0, fp4;
123     ps_madd     fp2, fp1, fp5, fp4;
124     ps_mul      fp6, fp0, fp6;
125     ps_madd     fp3, fp1, fp7, fp6;
126     ps_mul      fp8, fp0, fp8;
127     ps_sum0     fp2, fp2, fp2, fp2;         // fp2 <- dst.x, --
128     ps_madd     fp9, fp1, fp9, fp8;
129     ps_sum1     fp2, fp3, fp2, fp3;         // fp2 <- dst.x, dst.y
130     ps_sum0     fp3, fp9, fp9, fp9;
131     ps_mul      fp2, fp2, fp13;
132     psq_st      fp2, 0(dst),    0, 0;
133     ps_mul      fp3, fp3, fp13;
134     psq_st      fp3, 8(dst),    1, 0;
135     blr;
136 }
137 #endif      //  GEKKO
138 
139 /*---------------------------------------------------------------------*
140 Name:           MTX44MultVecArray
141 
142 Description:    Multiplies an array of vectors by a matrix.
143 
144 
145 Arguments:      m        Matrix.
146                 srcBase  Start of source vector array.
147                 dstBase  Start of resultant vector array.
148                 Note:     OK if srcBase == dstBase.
149                 count    Number of vectors in srcBase, dstBase arrays
150                           Note:      Cannot check for array overflow
151 
152 Return   :         None.
153  *---------------------------------------------------------------------*/
154 /*---------------------------------------------------------------------*
155     C version
156  *---------------------------------------------------------------------*/
C_MTX44MultVecArray(const Mtx44 m,const Vec * srcBase,Vec * dstBase,u32 count)157 void C_MTX44MultVecArray ( const Mtx44 m, const Vec *srcBase, Vec *dstBase, u32 count )
158 {
159     u32 i;
160     Vec vTmp;
161     f32 w;
162 
163     ASSERTMSG( (m       != 0),    MTX44_MULTVECARRAY_1    );
164     ASSERTMSG( (srcBase != 0),    MTX44_MULTVECARRAY_2    );
165     ASSERTMSG( (dstBase != 0),    MTX44_MULTVECARRAY_3    );
166 
167     for(i=0; i< count; i++)
168     {
169         // Vec has a 4th implicit 'w' coordinate of 1
170         vTmp.x = m[0][0]*srcBase->x + m[0][1]*srcBase->y + m[0][2]*srcBase->z + m[0][3];
171         vTmp.y = m[1][0]*srcBase->x + m[1][1]*srcBase->y + m[1][2]*srcBase->z + m[1][3];
172         vTmp.z = m[2][0]*srcBase->x + m[2][1]*srcBase->y + m[2][2]*srcBase->z + m[2][3];
173         w      = m[3][0]*srcBase->x + m[3][1]*srcBase->y + m[3][2]*srcBase->z + m[3][3];
174         w = 1.0f/w;
175 
176         // Copy back
177         dstBase->x = vTmp.x * w;
178         dstBase->y = vTmp.y * w;
179         dstBase->z = vTmp.z * w;
180 
181         srcBase++;
182         dstBase++;
183     }
184 }
185 
186 /*---------------------------------------------------------------------*
187     Paired-Single assembler version
188  *---------------------------------------------------------------------*
189                 Note that NO error checking is performed.
190  *---------------------------------------------------------------------*/
191 #ifdef  GEKKO
PSMTX44MultVecArray(const register Mtx44 m,const register Vec * srcBase,register Vec * dstBase,register u32 count)192 asm void PSMTX44MultVecArray (
193     const register Mtx44 m,
194     const register Vec *srcBase,
195           register Vec *dstBase,
196           register u32 count
197 )
198 {
199     nofralloc;
200 //  cmpwi       count, 1;
201 //  bne         @array
202 //  b           PSMTX44MultVec
203 //@array:
204     stwu        rsp, -24(rsp);
205     addi        count, count, -1;
206     psq_l       fp6, 48(m),         0, 0;   // fp6 <- m30, m31
207     mtctr       count;
208     psq_l       fp8, 0(srcBase),    0, 0;   // fp8 <- src.x, src.y
209     addi        dstBase, dstBase, -4;
210     stfd        fp14, 8(rsp);
211     psq_l       fp7, 56(m),         0, 0;   // fp7 <- m32, m33
212     psq_lu      fp9, 8(srcBase),    1, 0;   // fp9 <- src.z, 1.0
213     ps_mul      fp13, fp6, fp8;
214     psq_l       fp0, 0(m),          0, 0;   // fp0 <- m00, m01
215     psq_st      fp14, 16(rsp),      0, 0;
216     ps_madd     fp13, fp7, fp9, fp13;
217     psq_l       fp2, 16(m),         0, 0;   // fp2 <- m10, m11
218     ps_merge11  fp14, fp9, fp9;             // fp9 = 1.0F, 1.0F
219     ps_mul      fp10, fp0, fp8;
220     psq_l       fp4, 32(m),         0, 0;   // fp4 <- m20, m21
221     ps_mul      fp11, fp2, fp8;
222     psq_l       fp1, 8(m),          0, 0;   // fp1 <- m02, m03
223     ps_mul      fp12, fp4, fp8;
224     psq_l       fp3, 24(m),         0, 0;   // fp3 <- m12, m13
225     ps_sum0     fp13, fp13, fp13, fp13;     // fp13 <- w
226     psq_l       fp5, 40(m),         0, 0;   // fp5 <- m22, m23
227 
228 @loop:
229     ps_madd     fp10, fp1, fp9, fp10;
230     ps_madd     fp11, fp3, fp9, fp11;
231     ps_madd     fp12, fp5, fp9, fp12;
232     ps_sum0     fp10, fp10, fp10, fp10;     // fp10 <- x
233     ps_sum0     fp11, fp11, fp11, fp11;     // fp11 <- y
234     ps_sum0     fp12, fp12, fp12, fp12;     // fp12 <- z
235     ps_div      fp13, fp14, fp13;
236 
237     psq_lu      fp8, 4(srcBase), 0, 0;
238     psq_lu      fp9, 8(srcBase), 1, 0;
239 
240     ps_mul      fp10, fp10, fp13;
241     psq_stu     fp10, 4(dstBase), 1, 0;
242     ps_mul      fp11, fp11, fp13;
243     psq_stu     fp11, 4(dstBase), 1, 0;
244     ps_mul      fp12, fp12, fp13;
245     psq_stu     fp12, 4(dstBase), 1, 0;
246 
247     ps_mul      fp13, fp6, fp8;
248 
249     ps_mul      fp10, fp0, fp8;
250     ps_mul      fp11, fp2, fp8;
251     ps_madd     fp13, fp7, fp9, fp13;
252     ps_mul      fp12, fp4, fp8;
253     ps_sum0     fp13, fp13, fp13, fp13;
254 
255     bdnz+       @loop
256 
257     ps_madd     fp10, fp1, fp9, fp10;
258     ps_madd     fp11, fp3, fp9, fp11;
259     ps_madd     fp12, fp5, fp9, fp12;
260     ps_sum0     fp10, fp10, fp10, fp10;     // fp10 <- x
261     ps_sum0     fp11, fp11, fp11, fp11;     // fp11 <- y
262     ps_sum0     fp12, fp12, fp12, fp12;     // fp12 <- z
263     ps_div      fp13, fp14, fp13;
264 
265     ps_mul      fp10, fp10, fp13;
266     psq_st      fp10, 4(dstBase), 1, 0;
267     ps_mul      fp11, fp11, fp13;
268     psq_st      fp11, 8(dstBase), 1, 0;
269     ps_mul      fp12, fp12, fp13;
270     psq_st      fp12, 12(dstBase), 1, 0;
271 
272     psq_l       fp14, 16(rsp), 0, 0;
273     lfd         fp14,  8(rsp);
274     addi        rsp, rsp, 24;
275     blr;
276 }
277 #endif  // GEKKO
278 
279 
280 /*---------------------------------------------------------------------*
281 Name:         MTX44MultVecSR
282 
283 Description:  Multiplies a vector by a matrix 3x3 (Scaling and Rotation)
284               component.
285 
286               m x src = dst.
287 
288 Arguments:    m        Matrix.
289               src     Source vector for multiply.
290               dst     Resultant vector from multiply.
291               Note:   OK if src == dst.
292 
293 Return   :       None.
294  *---------------------------------------------------------------------*/
C_MTX44MultVecSR(const Mtx44 m,const Vec * src,Vec * dst)295 void C_MTX44MultVecSR ( const Mtx44 m, const Vec *src, Vec *dst )
296 {
297     Vec vTmp;
298 
299     ASSERTMSG( (m   != 0), MTX44_MULTVECSR_1 );
300     ASSERTMSG( (src != 0), MTX44_MULTVECSR_2 );
301     ASSERTMSG( (dst != 0), MTX44_MULTVECSR_3 );
302 
303     // A Vec has a 4th implicit 'w' coordinate of 1
304     vTmp.x = m[0][0]*src->x + m[0][1]*src->y + m[0][2]*src->z;
305     vTmp.y = m[1][0]*src->x + m[1][1]*src->y + m[1][2]*src->z;
306     vTmp.z = m[2][0]*src->x + m[2][1]*src->y + m[2][2]*src->z;
307 
308     // Copy back
309     dst->x = vTmp.x;
310     dst->y = vTmp.y;
311     dst->z = vTmp.z;
312 }
313 
314 /*---------------------------------------------------------------------*
315     Paired-Single assembler version
316  *---------------------------------------------------------------------*
317                 Note that this performs NO error checking.
318  *---------------------------------------------------------------------*/
319 #ifdef  GEKKO
PSMTX44MultVecSR(const register Mtx44 m,const register Vec * src,register Vec * dst)320 asm void PSMTX44MultVecSR(
321     const register Mtx44 m,
322     const register Vec *src,
323           register Vec *dst
324 )
325 {
326 
327     nofralloc
328     psq_l   fp0, 0(m), 0, 0    // m[0][0], m[0][1] GQR0 = 0
329 
330     // fp6 - x y
331     psq_l   fp6, 0(src), 0, 0
332 
333     psq_l   fp2, 16(m), 0, 0   // m[1][0], m[1][1]
334 
335 
336     // fp8 = m00x m01y // next X
337     ps_mul  fp8, fp0, fp6
338     psq_l   fp4, 32(m), 0, 0   // m[2][0], m[2][1]
339 
340     // fp10 = m10x m11y // next Y
341     ps_mul  fp10, fp2, fp6
342     psq_l   fp7, 8(src), 1, 0   // fp7 - z,1.0
343 
344     // fp12 = m20x m21y // next Z
345     ps_mul  fp12, fp4, fp6  // YYY last FP6 usage
346     psq_l   fp3, 24(m), 0, 0   // m[1][2], m[1][3]
347 
348     ps_sum0 fp8, fp8, fp8, fp8
349     psq_l   fp5, 40(m), 0, 0   // m[2][2], m[2][3]
350 
351     ps_sum0 fp10, fp10, fp10, fp10
352     psq_l   fp1,  8(m), 0, 0    // m[0][2], m[0][3]
353 
354     ps_sum0 fp12, fp12, fp12, fp12
355     ps_madd fp9, fp1, fp7, fp8
356     psq_st  fp9,  0(dst), 1, 0      // Store X
357 
358     ps_madd fp11, fp3, fp7, fp10
359     psq_st  fp11, 4(dst), 1, 0      // Store Y
360 
361     ps_madd fp13, fp5, fp7, fp12
362     psq_st  fp13, 8(dst), 1, 0      //  Store Z
363 
364     blr
365 
366 }
367 #endif  // GEKKO
368 
369 /*---------------------------------------------------------------------*
370 Name:           MTX44MultVecArraySR
371 
372 Description:    Multiplies an array of vectors by a matrix 3x3
373                 (Scaling and Rotation) component.
374 
375 Arguments:      m        Matrix.
376                 srcBase  Start of source vector array.
377                 dstBase  Start of resultant vector array.
378                 Note:    OK if srcBase == dstBase.
379 
380                 count    Number of vectors in srcBase, dstBase arrays
381                 Note:    Cannot check for array overflow
382 
383 Return   :         None.
384  *---------------------------------------------------------------------*/
C_MTX44MultVecArraySR(const Mtx44 m,const Vec * srcBase,Vec * dstBase,u32 count)385 void C_MTX44MultVecArraySR ( const Mtx44 m, const Vec *srcBase, Vec *dstBase, u32 count )
386 {
387     u32 i;
388     Vec vTmp;
389 
390     ASSERTMSG( (m       != 0), MTX44_MULTVECARRAYSR_1 );
391     ASSERTMSG( (srcBase != 0), MTX44_MULTVECARRAYSR_2 );
392     ASSERTMSG( (dstBase != 0), MTX44_MULTVECARRAYSR_3 );
393 
394     for ( i = 0; i < count; i ++ )
395     {
396         // Vec has a 4th implicit 'w' coordinate of 1
397         vTmp.x = m[0][0]*srcBase->x + m[0][1]*srcBase->y + m[0][2]*srcBase->z;
398         vTmp.y = m[1][0]*srcBase->x + m[1][1]*srcBase->y + m[1][2]*srcBase->z;
399         vTmp.z = m[2][0]*srcBase->x + m[2][1]*srcBase->y + m[2][2]*srcBase->z;
400 
401         // Copy back
402         dstBase->x = vTmp.x;
403         dstBase->y = vTmp.y;
404         dstBase->z = vTmp.z;
405 
406         srcBase++;
407         dstBase++;
408     }
409 }
410 
411 /*---------------------------------------------------------------------*
412     Paired-Single assembler version
413  *---------------------------------------------------------------------*
414                 Note that this performs NO error checking.
415  *---------------------------------------------------------------------*/
416 #ifdef  GEKKO
PSMTX44MultVecArraySR(const register Mtx44 m,const register Vec * srcBase,register Vec * dstBase,register u32 count)417 asm void PSMTX44MultVecArraySR(
418     const register Mtx44 m,
419     const register Vec *srcBase,
420           register Vec *dstBase,
421           register u32 count
422 )
423 {
424     nofralloc;
425     psq_l       fp0,  0(m),         0, 0;           // fp0 <- m00, m01
426     addi        count, count, -1;
427     psq_l       fp6,  0(srcBase),   0, 0;           // fp6 <- src.x, src.y
428     ps_mul      fp8,  fp0, fp6;
429     psq_l       fp2,  16(m),        0, 0;           // fp2 <- m10, m11
430     ps_mul      fp9,  fp2, fp6;
431     psq_l       fp4,  32(m),        0, 0;           // fp4 <- m20, m21
432     psq_lu      fp7,  8(srcBase),   1, 0;           // fp7 <- src.z, 1.0
433     ps_mul      fp10, fp4, fp6;
434     psq_l       fp1,  8(m),         1, 0;           // fp1 <- m02, 1.0
435     mtctr       count;
436     psq_l       fp3,  24(m),        1, 0;           // fp3 <- m12, 1.0
437     addi        dstBase, dstBase, -4;
438     psq_l       fp5,  40(m),        1, 0;           // fp5 <- m22, 1.0
439 
440 @loop:
441     ps_madd     fp11, fp1, fp7, fp8;
442     psq_lu      fp6,  4(srcBase),   0, 0;
443     ps_madd     fp12, fp3, fp7, fp9;
444     ps_madd     fp13, fp5, fp7, fp10;
445     psq_lu      fp7,  8(srcBase),   1, 0;
446     ps_sum0     fp11, fp11, fp8, fp8;
447     psq_stu     fp11, 4(dstBase),   1, 0;
448     ps_sum0     fp12, fp12, fp9, fp9;
449     psq_stu     fp12, 4(dstBase),   1, 0;
450     ps_sum0     fp13, fp13, fp10, fp10;
451     psq_stu     fp13, 4(dstBase),   1, 0;
452     ps_mul      fp8,  fp0, fp6;
453     ps_mul      fp9,  fp2, fp6;
454     ps_mul      fp10,  fp4, fp6;
455     bdnz+       @loop
456 
457     ps_madd     fp11, fp1, fp7, fp8;
458     ps_madd     fp12, fp3, fp7, fp9;
459     ps_madd     fp13, fp5, fp7, fp10;
460     ps_sum0     fp11, fp11, fp8, fp8;
461     psq_stu     fp11, 4(dstBase),   1, 0;
462     ps_sum0     fp12, fp12, fp9, fp9;
463     psq_stu     fp12, 4(dstBase),   1, 0;
464     ps_sum0     fp13, fp13, fp10, fp10;
465     psq_stu     fp13, 4(dstBase),   1, 0;
466     blr;
467 }
468 #endif  // GEKKO
469 
470 
471 /*===========================================================================*/
472