1 /*---------------------------------------------------------------------------*
2   Project: matrix vector Library
3   File:    mtx44vec.c
4 
5   Copyright 1998-2011 Nintendo.  All rights reserved.
6 
7   These coded instructions, statements, and computer programs contain
8   proprietary information of Nintendo of America Inc. and/or Nintendo
9   Company Ltd., and are protected by Federal copyright law.     They may
10   not be disclosed to third parties or copied or duplicated in any form,
11   in whole or in part, without the prior written consent of Nintendo.
12 
13  *---------------------------------------------------------------------------*/
14 
15 #include <math.h>
16 #include <stdio.h>
17 #include <cafe/mtx.h>
18 #include <cafe/mtx/mtx44.h>
19 #include "mtxAssert.h"
20 #include "mtx44Assert.h"
21 
22 
23 
24 /*---------------------------------------------------------------------*
25 
26                              MODEL SECTION
27 
28  *---------------------------------------------------------------------*/
29 /* NOTE: Prototypes for these functions are defined in "mtx44ext.h".   */
30 
31 /*---------------------------------------------------------------------*
32 Name:           MTX44MultVec
33 
34 Description:    multiplies a vector by a matrix.
35                 m x src = dst.
36 
37 Arguments:      m         matrix.
38                 src       source vector for multiply.
39                 dst       resultant vector from multiply.
40                 note:      ok if src == dst.
41 
42 Return:         none
43  *---------------------------------------------------------------------*/
44 /*---------------------------------------------------------------------*
45     C version
46  *---------------------------------------------------------------------*/
C_MTX44MultVec(MTX_CONST Mtx44 m,const Vec * src,Vec * dst)47 void C_MTX44MultVec ( MTX_CONST Mtx44 m, const Vec *src, Vec *dst )
48 {
49     Vec vTmp;
50     f32 w;
51 
52     ASSERTMSG( (m   != 0), MTX44_MULTVEC_1 );
53     ASSERTMSG( (src != 0), MTX44_MULTVEC_2 );
54     ASSERTMSG( (dst != 0), MTX44_MULTVEC_3 );
55 
56     // a Vec has a 4th implicit 'w' coordinate of 1
57     vTmp.x = m[0][0]*src->x + m[0][1]*src->y + m[0][2]*src->z + m[0][3];
58     vTmp.y = m[1][0]*src->x + m[1][1]*src->y + m[1][2]*src->z + m[1][3];
59     vTmp.z = m[2][0]*src->x + m[2][1]*src->y + m[2][2]*src->z + m[2][3];
60     w      = m[3][0]*src->x + m[3][1]*src->y + m[3][2]*src->z + m[3][3];
61     w = 1.0f/w;
62 
63     // copy back
64     dst->x = vTmp.x * w;
65     dst->y = vTmp.y * w;
66     dst->z = vTmp.z * w;
67 }
68 
69 #if !defined(WIN32) && !defined(WIN64)
70 /*---------------------------------------------------------------------*
71     Paired-Single intrinsics version
72  *---------------------------------------------------------------------*
73                 Note that NO error checking is performed.
74  *---------------------------------------------------------------------*/
PSMTX44MultVec(MTX_CONST Mtx44 m,const Vec * src,Vec * dst)75 void PSMTX44MultVec ( MTX_CONST Mtx44 m, const Vec *src, Vec *dst )
76 {
77     f32x2 fp0, fp1, fp2, fp3, fp4, fp5, fp6, fp7, fp8, fp9, fp12, fp13; //fp10, fp11,
78 
79     //psq_l       fp0, 0(src),    0, 0;       // fp0 <-src.x, src.y
80     //fp0[0] = src->x;
81     //fp0[1] = src->y;
82     fp0 = __PSQ_LX(src, 0, 0, 0);
83 
84     //psq_l       fp2, 48(m),     0, 0;
85     //fp2[0] = m[3][0];
86     //fp2[1] = m[3][1];
87     fp2 = __PSQ_LX(m, 48, 0, 0);
88 
89     //psq_l       fp1, 8(src),    1, 0;       // fp1 <-src.z, 1.0
90     //fp1[0] = src->z;
91     //fp1[1] = 1.0F;
92     fp1 = __PSQ_LX(src, 8, 1, 0);
93 
94     //ps_mul      fp4, fp0, fp2;
95     fp4 = __PS_MUL(fp0, fp2);
96 
97     //psq_l       fp3, 56(m),     0, 0;
98     //fp3[0] = m[3][2];
99     //fp3[1] = m[3][3];
100     fp3 = __PSQ_LX(m, 56, 0, 0);
101 
102     //ps_madd     fp5, fp1, fp3, fp4;
103     fp5 = __PS_MADD(fp1, fp3, fp4);
104 
105     //ps_merge11  fp12, fp1, fp1;             // fp12 = 1.0, 1.0
106     fp12 = __PS_MERGE11(fp1, fp1);
107 
108     //ps_sum0     fp13, fp5, fp5, fp5;        // fp3 <-  w
109     fp13 = __PS_SUM0(fp5, fp5, fp5);
110 
111     //psq_l       fp4, 0(m),      0, 0;
112     //fp4[0] = m[0][0];
113     //fp4[1] = m[0][1];
114     fp4 = __PSQ_LX(m, 0, 0, 0);
115 
116     //ps_merge00  fp13, fp13, fp13;
117     fp13 = __PS_MERGE00(fp13, fp13);
118 
119     //psq_l       fp5, 8(m),      0, 0;
120     //fp5[0] = m[0][2];
121     //fp5[1] = m[0][3];
122     fp5 = __PSQ_LX(m, 8, 0, 0);
123 
124     //ps_div      fp13, fp12, fp13;           // fp13 <- 1/w
125     fp13 = __PS_DIV(fp12, fp13);
126 
127     //psq_l       fp6, 16(m),     0, 0;
128     //fp6[0] = m[1][0];
129     //fp6[1] = m[1][1];
130     fp6 = __PSQ_LX(m, 16, 0, 0);
131 
132     //psq_l       fp7, 24(m),     0, 0;
133     //fp7[0] = m[1][2];
134     //fp7[1] = m[1][3];
135     fp7 = __PSQ_LX(m, 24, 0, 0);
136 
137     //psq_l       fp8, 32(m),     0, 0;
138     //fp8[0] = m[2][0];
139     //fp8[1] = m[2][1];
140     fp8 = __PSQ_LX(m, 32, 0, 0);
141 
142     //psq_l       fp9, 40(m),     0, 0;
143     //fp9[0] = m[2][2];
144     //fp9[1] = m[2][3];
145     fp9 = __PSQ_LX(m, 40, 0, 0);
146 
147     //ps_mul      fp4, fp0, fp4;
148     fp4 = __PS_MUL(fp0, fp4);
149 
150     //ps_madd     fp2, fp1, fp5, fp4;
151     fp2 = __PS_MADD(fp1, fp5, fp4);
152 
153     //ps_mul      fp6, fp0, fp6;
154     fp6 = __PS_MUL(fp0, fp6);
155 
156     //ps_madd     fp3, fp1, fp7, fp6;
157     fp3 = __PS_MADD(fp1, fp7, fp6);
158 
159     //ps_mul      fp8, fp0, fp8;
160     fp8 = __PS_MUL(fp0, fp8);
161 
162     //ps_sum0     fp2, fp2, fp2, fp2;         // fp2 <- dst.x, --
163     fp2 = __PS_SUM0(fp2, fp2, fp2);
164 
165     //ps_madd     fp9, fp1, fp9, fp8;
166     fp9 = __PS_MADD(fp1, fp9, fp8);
167 
168     //ps_sum1     fp2, fp3, fp2, fp3;         // fp2 <- dst.x, dst.y
169     fp2 = __PS_SUM1(fp3, fp2, fp3);
170 
171     //ps_sum0     fp3, fp9, fp9, fp9;
172     fp3 = __PS_SUM0(fp9, fp9, fp9);
173 
174     //ps_mul      fp2, fp2, fp13;
175     fp2 = __PS_MUL(fp2, fp13);
176 
177     //psq_st      fp2, 0(dst),    0, 0;
178     //dst->x = fp2[0];
179     //dst->y = fp2[1];
180     __PSQ_STX(dst, 0, fp2, 0, 0);
181 
182     //ps_mul      fp3, fp3, fp13;
183     fp3 = __PS_MUL(fp3, fp13);
184 
185     //psq_st      fp3, 8(dst),    1, 0;
186     //dst->z = fp3[0];
187     __PSQ_STX(dst, 8, fp3, 1, 0);
188 }
189 #endif
190 
191 /*---------------------------------------------------------------------*
192 Name:           MTX44MultVecArray
193 
194 Description:    multiplies an array of vectors by a matrix.
195 
196 
197 Arguments:      m         matrix.
198                 srcBase   start of source vector array.
199                 dstBase   start of resultant vector array.
200                 note:     ok if srcBase == dstBase.
201                 count     number of vectors in srcBase, dstBase arrays
202                           note:      cannot check for array overflow
203 
204 Return:         none
205  *---------------------------------------------------------------------*/
206 /*---------------------------------------------------------------------*
207     C version
208  *---------------------------------------------------------------------*/
C_MTX44MultVecArray(MTX_CONST Mtx44 m,const Vec * srcBase,Vec * dstBase,u32 count)209 void C_MTX44MultVecArray ( MTX_CONST Mtx44 m, const Vec *srcBase, Vec *dstBase, u32 count )
210 {
211     u32 i;
212     Vec vTmp;
213     f32 w;
214 
215     ASSERTMSG( (m       != 0),    MTX44_MULTVECARRAY_1    );
216     ASSERTMSG( (srcBase != 0),    MTX44_MULTVECARRAY_2    );
217     ASSERTMSG( (dstBase != 0),    MTX44_MULTVECARRAY_3    );
218 
219     for(i=0; i< count; i++)
220     {
221         // Vec has a 4th implicit 'w' coordinate of 1
222         vTmp.x = m[0][0]*srcBase->x + m[0][1]*srcBase->y + m[0][2]*srcBase->z + m[0][3];
223         vTmp.y = m[1][0]*srcBase->x + m[1][1]*srcBase->y + m[1][2]*srcBase->z + m[1][3];
224         vTmp.z = m[2][0]*srcBase->x + m[2][1]*srcBase->y + m[2][2]*srcBase->z + m[2][3];
225         w      = m[3][0]*srcBase->x + m[3][1]*srcBase->y + m[3][2]*srcBase->z + m[3][3];
226         w = 1.0f/w;
227 
228         // copy back
229         dstBase->x = vTmp.x * w;
230         dstBase->y = vTmp.y * w;
231         dstBase->z = vTmp.z * w;
232 
233         srcBase++;
234         dstBase++;
235     }
236 }
237 
238 #if !defined(WIN32) && !defined(WIN64)
239 /*---------------------------------------------------------------------*
240     Paired-Single intrinsics version
241  *---------------------------------------------------------------------*
242                 Note that NO error checking is performed.
243  *---------------------------------------------------------------------*/
PSMTX44MultVecArray(MTX_CONST Mtx44 m,const Vec * srcBase,Vec * dstBase,u32 count)244 void PSMTX44MultVecArray ( MTX_CONST Mtx44 m, const Vec *srcBase, Vec *dstBase, u32 count )
245 {
246     u32 i;
247 
248     for(i=0; i< count; i++)
249     {
250         PSMTX44MultVec(m, srcBase, dstBase);
251 
252         srcBase++;
253         dstBase++;
254     }
255 }
256 #endif
257 
258 
259 /*---------------------------------------------------------------------*
260 Name:         MTX44MultVecSR
261 
262 Description:  multiplies a vector by a matrix 3x3 (Scaling and Rotation)
263               component.
264 
265               m x src = dst.
266 
267 Arguments:    m       matrix.
268               src     source vector for multiply.
269               dst     resultant vector from multiply.
270               note:   ok if src == dst.
271 
272 Return:       none
273  *---------------------------------------------------------------------*/
C_MTX44MultVecSR(MTX_CONST Mtx44 m,const Vec * src,Vec * dst)274 void C_MTX44MultVecSR ( MTX_CONST Mtx44 m, const Vec *src, Vec *dst )
275 {
276     Vec vTmp;
277 
278     ASSERTMSG( (m   != 0), MTX44_MULTVECSR_1 );
279     ASSERTMSG( (src != 0), MTX44_MULTVECSR_2 );
280     ASSERTMSG( (dst != 0), MTX44_MULTVECSR_3 );
281 
282     // a Vec has a 4th implicit 'w' coordinate of 1
283     vTmp.x = m[0][0]*src->x + m[0][1]*src->y + m[0][2]*src->z;
284     vTmp.y = m[1][0]*src->x + m[1][1]*src->y + m[1][2]*src->z;
285     vTmp.z = m[2][0]*src->x + m[2][1]*src->y + m[2][2]*src->z;
286 
287     // copy back
288     dst->x = vTmp.x;
289     dst->y = vTmp.y;
290     dst->z = vTmp.z;
291 }
292 
293 #if !defined(WIN32) && !defined(WIN64)
294 /*---------------------------------------------------------------------*
295     Paired-Single intrinsics version
296  *---------------------------------------------------------------------*
297                 Note that this performs NO error checking.
298  *---------------------------------------------------------------------*/
PSMTX44MultVecSR(MTX_CONST Mtx44 m,const Vec * src,Vec * dst)299 void PSMTX44MultVecSR ( MTX_CONST Mtx44 m, const Vec *src, Vec *dst )
300 {
301     f32x2 fp0, fp1, fp2, fp3, fp4, fp5, fp6, fp7, fp8, fp9, fp10, fp11, fp12, fp13;
302 
303     //psq_l   fp0, 0(m), 0, 0    // m[0][0], m[0][1] GQR0 = 0
304     //fp0[0] = m[0][0];
305     //fp0[1] = m[0][1];
306     fp0 = __PSQ_LX(m, 0, 0, 0);
307 
308     // fp6 - x y
309     //psq_l   fp6, 0(src), 0, 0
310     //fp6[0] = src->x;
311     //fp6[1] = src->y;
312     fp6 = __PSQ_LX(src, 0, 0, 0);
313 
314     //psq_l   fp2, 16(m), 0, 0   // m[1][0], m[1][1]
315     //fp2[0] = m[1][0];
316     //fp2[1] = m[1][1];
317     fp2 = __PSQ_LX(m, 16, 0, 0);
318 
319     // fp8 = m00x m01y // next X
320     //ps_mul  fp8, fp0, fp6
321     fp8 = __PS_MUL(fp0, fp6);
322 
323     //psq_l   fp4, 32(m), 0, 0   // m[2][0], m[2][1]
324     //fp4[0] = m[2][0];
325     //fp4[1] = m[2][1];
326     fp4 = __PSQ_LX(m, 32, 0, 0);
327 
328     // fp10 = m10x m11y // next Y
329     //ps_mul  fp10, fp2, fp6
330     fp10 = __PS_MUL(fp2, fp6);
331 
332     //psq_l   fp7, 8(src), 1, 0   // fp7 - z,1.0
333     //fp7[0] = src->z;
334     //fp7[1] = 1.0F;
335     fp7 = __PSQ_LX(src, 8, 1, 0);
336 
337     // fp12 = m20x m21y // next Z
338     //ps_mul  fp12, fp4, fp6
339     fp12 = __PS_MUL(fp4, fp6);
340 
341     //psq_l   fp3, 24(m), 0, 0   // m[1][2], m[1][3]
342     //fp3[0] = m[1][2];
343     //fp3[1] = m[1][3];
344     fp3 = __PSQ_LX(m, 24, 0, 0);
345 
346     //ps_sum0 fp8, fp8, fp8, fp8
347     fp8 = __PS_SUM0(fp8, fp8, fp8);
348 
349     //psq_l   fp5, 40(m), 0, 0   // m[2][2], m[2][3]
350     //fp5[0] = m[2][2];
351     //fp5[1] = m[2][3];
352     fp5 = __PSQ_LX(m, 40, 0, 0);
353 
354     //ps_sum0 fp10, fp10, fp10, fp10
355     fp10 = __PS_SUM0(fp10, fp10, fp10);
356 
357     //psq_l   fp1,  8(m), 0, 0    // m[0][2], m[0][3]
358     //fp1[0] = m[0][2];
359     //fp1[1] = m[0][3];
360     fp1 = __PSQ_LX(m, 8, 0, 0);
361 
362     //ps_sum0 fp12, fp12, fp12, fp12
363     fp12 = __PS_SUM0(fp12, fp12, fp12);
364 
365     //ps_madd fp9, fp1, fp7, fp8
366     fp9 = __PS_MADD(fp1, fp7, fp8);
367 
368     //psq_st  fp9,  0(dst), 1, 0      // store X
369     //dst->x = fp9[0];
370     __PSQ_STX(dst, 0, fp9, 1, 0);
371 
372     //ps_madd fp11, fp3, fp7, fp10
373     fp11 = __PS_MADD(fp3, fp7, fp10);
374 
375     //psq_st  fp11, 4(dst), 1, 0      // store Y
376     //dst->y = fp11[0];
377     __PSQ_STX(dst, 4, fp11, 1, 0);
378 
379     //ps_madd fp13, fp5, fp7, fp12
380     fp13 = __PS_MADD(fp5, fp7, fp12);
381 
382     //psq_st  fp13, 8(dst), 1, 0      //  sore Z
383     //dst->z = fp13[0];
384     __PSQ_STX(dst, 8, fp13, 1, 0);
385 }
386 #endif
387 
388 /*---------------------------------------------------------------------*
389 Name:           MTX44MultVecArraySR
390 
391 Description:    multiplies an array of vectors by a matrix 3x3
392                 (Scaling and Rotation) component.
393 
394 Arguments:      m        matrix.
395                 srcBase  start of source vector array.
396                 dstBase  start of resultant vector array.
397                 note:    ok if srcBase == dstBase.
398 
399                 count    number of vectors in srcBase, dstBase arrays
400                 note:    cannot check for array overflow
401 
402 Return:         none
403  *---------------------------------------------------------------------*/
C_MTX44MultVecArraySR(MTX_CONST Mtx44 m,const Vec * srcBase,Vec * dstBase,u32 count)404 void C_MTX44MultVecArraySR ( MTX_CONST Mtx44 m, const Vec *srcBase, Vec *dstBase, u32 count )
405 {
406     u32 i;
407     Vec vTmp;
408 
409     ASSERTMSG( (m       != 0), MTX44_MULTVECARRAYSR_1 );
410     ASSERTMSG( (srcBase != 0), MTX44_MULTVECARRAYSR_2 );
411     ASSERTMSG( (dstBase != 0), MTX44_MULTVECARRAYSR_3 );
412 
413     for ( i = 0; i < count; i ++ )
414     {
415         // Vec has a 4th implicit 'w' coordinate of 1
416         vTmp.x = m[0][0]*srcBase->x + m[0][1]*srcBase->y + m[0][2]*srcBase->z;
417         vTmp.y = m[1][0]*srcBase->x + m[1][1]*srcBase->y + m[1][2]*srcBase->z;
418         vTmp.z = m[2][0]*srcBase->x + m[2][1]*srcBase->y + m[2][2]*srcBase->z;
419 
420         // copy back
421         dstBase->x = vTmp.x;
422         dstBase->y = vTmp.y;
423         dstBase->z = vTmp.z;
424 
425         srcBase++;
426         dstBase++;
427     }
428 }
429 
430 #if !defined(WIN32) && !defined(WIN64)
431 /*---------------------------------------------------------------------*
432     Paired-Single intrinsics version
433  *---------------------------------------------------------------------*
434                 Note that this performs NO error checking.
435  *---------------------------------------------------------------------*/
PSMTX44MultVecArraySR(MTX_CONST Mtx44 m,const Vec * srcBase,Vec * dstBase,u32 count)436 void PSMTX44MultVecArraySR ( MTX_CONST Mtx44 m, const Vec *srcBase, Vec *dstBase, u32 count )
437 {
438     u32 i;
439 
440     for ( i = 0; i < count; i ++ )
441     {
442         PSMTX44MultVecSR(m, srcBase, dstBase);
443         srcBase++;
444         dstBase++;
445     }
446 }
447 #endif
448 
449 
450 /*===========================================================================*/
451