1 /*---------------------------------------------------------------------------*
2   Project: Matrix vector Library
3   File:    mtxvec.c
4 
5   Copyright 1998 - 2001 Nintendo.  All rights reserved.
6 
7   These coded instructions, statements, and computer programs contain
8   proprietary information of Nintendo of America Inc. and/or Nintendo
9   Company Ltd., and are protected by Federal copyright law.  They may
10   not be disclosed to third parties or copied or duplicated in any form,
11   in whole or in part, without the prior written consent of Nintendo.
12 
13 
14   $Log: mtxvec.c,v $
15   Revision 1.3  2007/01/11 00:45:26  aka
16   Removed win32.h.
17 
18   Revision 1.2  2006/02/20 04:25:42  mitu
19   Changed include path from dolphin/ to revolution/.
20 
21   Revision 1.1.1.1  2005/05/12 02:15:49  yasuh-to
22   Ported from dolphin source tree.
23 
24 
25     5    2002/04/11 13:11 Hirose
26     const type specifier support. (by Hiratsu@IRD)
27 
28     4    2001/07/23 8:46p Hirose
29     Added PSMultVecArraySR. Improved PSMultVecArray.
30 
31     3     2001/07/07 7:40p Hirose
32     Added PSMTXMultVecSR made by Ohki-san@NTSC.
33 
34     2     2001/02/23 1:49a Hirose
35     Fixed a bug in PSMTXMultVec.
36 
37     1    2001/02/22 11:56p Hirose
38     This section is moved from mtx.c. Added PSMultVec.
39 
40   $NoKeywords: $
41  *---------------------------------------------------------------------------*/
42 
43 #include <math.h>
44 #include <revolution/mtx.h>
45 #include "mtxAssert.h"
46 
47 /*---------------------------------------------------------------------*
48 
49 Name:           MTXMultVec
50 
51 Description:    Multiplies a vector by a matrix.
52                 m x src = dst.
53 
54 
55 Arguments:      m         Matrix.
56                 src     Source vector for multiplication.
57                 dst     Resultant vector from multiplication.
58 
59                 Note:      OK if src == dst.
60 
61 
62 Return:         None
63 
64 *---------------------------------------------------------------------*/
65 /*---------------------------------------------------------------------*
66     C version
67  *---------------------------------------------------------------------*/
C_MTXMultVec(const Mtx m,const Vec * src,Vec * dst)68 void C_MTXMultVec ( const Mtx m, const Vec *src, Vec *dst )
69 {
70     Vec vTmp;
71 
72     ASSERTMSG( (m   != 0), MTX_MULTVEC_1 );
73     ASSERTMSG( (src != 0), MTX_MULTVEC_2 );
74     ASSERTMSG( (dst != 0), MTX_MULTVEC_3 );
75 
76     // A Vec has a 4th implicit 'w' coordinate of 1
77     vTmp.x = m[0][0]*src->x + m[0][1]*src->y + m[0][2]*src->z + m[0][3];
78     vTmp.y = m[1][0]*src->x + m[1][1]*src->y + m[1][2]*src->z + m[1][3];
79     vTmp.z = m[2][0]*src->x + m[2][1]*src->y + m[2][2]*src->z + m[2][3];
80 
81     // Copy back
82     dst->x = vTmp.x;
83     dst->y = vTmp.y;
84     dst->z = vTmp.z;
85 }
86 
87 /*---------------------------------------------------------------------*
88     Paired-Single assembler version
89  *---------------------------------------------------------------------*
90                 Note that NO error checking is performed.
91  *---------------------------------------------------------------------*/
92 #ifdef GEKKO
PSMTXMultVec(const register Mtx m,const register Vec * src,register Vec * dst)93 asm void PSMTXMultVec
94 (
95     const register Mtx m,
96     const register Vec *src,
97           register Vec *dst
98 )
99 {
100     nofralloc
101 
102     // Load v[0], v[1]
103     psq_l       fp0, 0(src), 0, 0
104     // Load m[0][0], m[0][1]
105     psq_l       fp2, 0(m), 0, 0
106     // Load v[2], 1
107     psq_l       fp1, 8(src), 1, 0
108         // m[0][0]*v[0], m[0][1]*v[1]
109         ps_mul      fp4, fp2, fp0
110     // Load m[0][2], m[0][3]
111     psq_l       fp3, 8(m), 0, 0
112         // m[0][0]*v[0]+m[0][2]*v[2], m[0][1]*v[1]+m[0][3]
113         ps_madd     fp5, fp3, fp1, fp4
114     // Load m[1][0], m[1][1]
115     psq_l       fp8, 16(m), 0, 0
116        // m[0][0]*v[0]+m[0][2]*v[2]+m[0][1]*v[1]+m[0][3], ???
117         ps_sum0     fp6, fp5, fp6, fp5
118     // Load m[1][2], m[1][3]
119     psq_l       fp9, 24(m), 0, 0
120         // m[1][0]*v[0], m[1][1]*v[1]
121         ps_mul      fp10, fp8, fp0
122     // Store dst[0]
123     psq_st      fp6, 0(dst), 1, 0
124         // m[1][0]*v[0]+m[1][2]*v[2], m[1][1]*v[1]+m[1][3]
125         ps_madd     fp11, fp9, fp1, fp10
126     // Load m[2][0], m[2][1]
127     psq_l       fp2, 32(m), 0, 0
128         // m[1][0]*v[0]+m[1][2]*v[2]+m[2][1]*v[1]+m[1][3], ???
129         ps_sum0     fp12, fp11, fp12, fp11
130     // Load m[2][2], m[2][3]
131     psq_l       fp3, 40(m), 0, 0
132         // m[0][0]*v[0], m[0][1]*v[1]
133         ps_mul      fp4, fp2, fp0
134     // Store dst[1]
135     psq_st      fp12, 4(dst), 1, 0
136         // m[0][0]*v[0]+m[0][2]*v[2], m[0][1]*v[1]+m[0][3]
137         ps_madd     fp5, fp3, fp1, fp4
138         // m[0][0]*v[0]+m[0][2]*v[2]+m[0][1]*v[1]+m[0][3], ???
139         ps_sum0     fp6, fp5, fp6, fp5
140     // Store dst[0]
141     psq_st      fp6, 8(dst), 1, 0
142 
143     blr
144 }
145 #endif // GEKKO
146 
147 /*---------------------------------------------------------------------*
148 
149 Name:           MTXMultVecArray
150 
151 Description:    Multiplies an array of vectors by a matrix.
152 
153 
154 Arguments:      m         Matrix.
155                 srcBase   Start of source vector array.
156                 dstBase   Start of resultant vector array.
157 
158                 Note:     OK if srcBase == dstBase.
159 
160                 count     Number of vectors in srcBase, dstBase arrays
161                           Note:      Cannot check for array overflow
162 
163 Return:         None
164 
165 *---------------------------------------------------------------------*/
166 /*---------------------------------------------------------------------*
167     C version
168  *---------------------------------------------------------------------*/
C_MTXMultVecArray(const Mtx m,const Vec * srcBase,Vec * dstBase,u32 count)169 void C_MTXMultVecArray ( const Mtx m, const Vec *srcBase, Vec *dstBase, u32 count )
170 {
171     u32 i;
172     Vec vTmp;
173 
174     ASSERTMSG( (m       != 0), MTX_MULTVECARRAY_1 );
175     ASSERTMSG( (srcBase != 0), MTX_MULTVECARRAY_2 );
176     ASSERTMSG( (dstBase != 0), MTX_MULTVECARRAY_3 );
177     ASSERTMSG( (count > 1),    MTX_MULTVECARRAY_4 );
178 
179     for(i=0; i< count; i++)
180     {
181         // Vec has a 4th implicit 'w' coordinate of 1
182         vTmp.x = m[0][0]*srcBase->x + m[0][1]*srcBase->y + m[0][2]*srcBase->z + m[0][3];
183         vTmp.y = m[1][0]*srcBase->x + m[1][1]*srcBase->y + m[1][2]*srcBase->z + m[1][3];
184         vTmp.z = m[2][0]*srcBase->x + m[2][1]*srcBase->y + m[2][2]*srcBase->z + m[2][3];
185 
186         // Copy back
187         dstBase->x = vTmp.x;
188         dstBase->y = vTmp.y;
189         dstBase->z = vTmp.z;
190 
191         srcBase++;
192         dstBase++;
193     }
194 }
195 
196 /*---------------------------------------------------------------------*
197     Paired-Single assembler version
198  *---------------------------------------------------------------------*
199                 Note that NO error checking is performed.
200 
201                 The count should be greater than 1.
202  *---------------------------------------------------------------------*/
203 #ifdef GEKKO
PSMTXMultVecArray(const register Mtx m,const register Vec * srcBase,register Vec * dstBase,register u32 count)204 asm void PSMTXMultVecArray (
205     const register Mtx    m,
206     const register Vec   *srcBase,
207           register Vec   *dstBase,
208           register u32    count )
209 {
210     nofralloc
211 
212     // fp13 [m00][m01] : LOAD
213     psq_l       fp13,  0(m), 0, 0
214     // fp12 [m10][m11] : LOAD
215     psq_l       fp12, 16(m), 0, 0
216         // Decrement loop count due to unrolling
217         subi        count, count, 1
218     // fp11 [m02][m03] : LOAD
219     psq_l       fp11,  8(m), 0, 0
220     // fp0 [m00][m10]
221     ps_merge00  fp0, fp13, fp12
222         // Base pointer adjustment
223         subi        dstBase, dstBase, 4
224     // fp10 [m12][m13] : LOAD
225     psq_l       fp10, 24(m), 0, 0
226     // fp1 [m01][m11]
227     ps_merge11  fp1, fp13, fp12
228         // Loop counter
229         mtctr       count
230     // fp4 [m20][m21] : LOAD
231     psq_l       fp4,  32(m), 0, 0
232     // fp2 [m02][m12]
233     ps_merge00  fp2, fp11, fp10
234     // fp5 [m22][m23] : LOAD
235     psq_l       fp5,  40(m), 0, 0
236     // fp3 [m03][m13]
237     ps_merge11  fp3, fp11, fp10
238 
239     // fp6 [v0][v1]   : LOAD
240     psq_l       fp6,  0(srcBase), 0, 0
241     // fp7 [v2][1.0F] : LOAD
242     psq_lu      fp7,  8(srcBase), 1, 0
243     // fp8 [m00*v0+m03][m10*v0+m13]
244     ps_madds0   fp8, fp0, fp6, fp3
245     // fp9 [m20*v0][m21*v1]
246     ps_mul      fp9, fp4, fp6
247     // fp8 [m00*v0+m01*v1+m03][m10*v0+m11*v1+m13]
248     ps_madds1   fp8, fp1, fp6, fp8
249     // fp10 [m20*v0+m22*v2][m21*v1+m23*1.0F]
250     ps_madd     fp10, fp5, fp7, fp9
251 
252 _mloop:
253     //-------- Unrolled loop --------
254 
255         // fp6 [v0][v1]   : LOAD
256         psq_lu      fp6,  4(srcBase), 0, 0
257     // fp12 [m00*v0+m01*v1+m02*v2+m03][m10*v0+m11*v1+m12*v2+m13]
258     ps_madds0   fp12, fp2, fp7, fp8
259         // fp7 [v2][1.0F] : LOAD
260         psq_lu      fp7,  8(srcBase), 1, 0
261     // fp13 [m20*v0+m21*v1+m22*v2+m23][?]
262     ps_sum0     fp13, fp10, fp9, fp10
263         // fp8 [m00*v0+m03][m10*v0+m13]
264         ps_madds0   fp8, fp0, fp6, fp3
265         // fp9 [m20*v0][m21*v1]
266         ps_mul      fp9, fp4, fp6
267     // fp12 [v0'][v1'] : STORE
268     psq_stu     fp12,  4(dstBase), 0, 0
269         // fp8 [m00*v0+m01*v1+m03][m10*v0+m11*v1+m13]
270         ps_madds1   fp8, fp1, fp6, fp8
271     // fp13 [v2'][ ?] : STORE
272     psq_stu     fp13,  8(dstBase), 1, 0
273         // fp10 [m20*v0+m22*v2][m21*v1+m23*1.0F]
274         ps_madd     fp10, fp5, fp7, fp9
275 
276     // LOOP
277     bdnz        _mloop
278 
279 
280     // fp12 [m00*v0+m01*v1+m02*v2+m03][m10*v0+m11*v1+m12*v2+m13]
281     ps_madds0   fp12, fp2, fp7, fp8
282     // fp13 [m20*v0+m21*v1+m22*v2+m23][?]
283     ps_sum0     fp13, fp10, fp9, fp10
284     // fp12 [v0'][v1'] : STORE
285     psq_stu     fp12,  4(dstBase), 0, 0
286     // fp13 [v2'][ ?] : STORE
287     psq_stu     fp13,  8(dstBase), 1, 0
288 
289     blr
290 }
291 #endif // GEKKO
292 
293 
294 /*---------------------------------------------------------------------*
295 
296 Name:         MTXMultVecSR
297 
298 Description:  Multiplies a vector by a matrix 3x3 (Scaling and Rotation)
299               component.
300 
301               m x src = dst.
302 
303 Arguments:    m       Matrix.
304               src     Source vector for multiply.
305               dst     Resultant vector from multiply.
306 
307               Note:   OK if src == dst.
308 
309 Return:       None
310 
311 *---------------------------------------------------------------------*/
312 /*---------------------------------------------------------------------*
313     C version
314  *---------------------------------------------------------------------*/
C_MTXMultVecSR(const Mtx m,const Vec * src,Vec * dst)315 void C_MTXMultVecSR ( const Mtx m, const Vec *src, Vec *dst )
316 {
317     Vec vTmp;
318 
319     ASSERTMSG( (m   != 0), MTX_MULTVECSR_1 );
320     ASSERTMSG( (src != 0), MTX_MULTVECSR_2 );
321     ASSERTMSG( (dst != 0), MTX_MULTVECSR_3 );
322 
323     // A Vec has a 4th implicit 'w' coordinate of 1
324     vTmp.x = m[0][0]*src->x + m[0][1]*src->y + m[0][2]*src->z;
325     vTmp.y = m[1][0]*src->x + m[1][1]*src->y + m[1][2]*src->z;
326     vTmp.z = m[2][0]*src->x + m[2][1]*src->y + m[2][2]*src->z;
327 
328     // Copy back
329     dst->x = vTmp.x;
330     dst->y = vTmp.y;
331     dst->z = vTmp.z;
332 }
333 
334 /*---------------------------------------------------------------------*
335     Paired-Single assembler version
336  *---------------------------------------------------------------------*
337                 Note that this performs NO error checking.
338  *---------------------------------------------------------------------*/
339 #ifdef  GEKKO
PSMTXMultVecSR(const register Mtx m,const register Vec * src,register Vec * dst)340 asm void PSMTXMultVecSR
341 (
342     const register Mtx m,
343     const register Vec *src,
344           register Vec *dst
345 )
346 {
347     nofralloc
348     psq_l   fp0, 0(m), 0, 0    // m[0][0], m[0][1] GQR0 = 0
349 
350     // fp6 - x y
351     psq_l   fp6, 0(src), 0, 0
352 
353     psq_l   fp2, 16(m), 0, 0   // m[1][0], m[1][1]
354 
355 
356     // fp8 = m00x m01y // next X
357     ps_mul  fp8, fp0, fp6
358     psq_l   fp4, 32(m), 0, 0   // m[2][0], m[2][1]
359 
360     // fp10 = m10x m11y // next Y
361     ps_mul  fp10, fp2, fp6
362     psq_l   fp7, 8(src), 1, 0   // fp7 - z,1.0
363 
364     // fp12 = m20x m21y // next Z
365     ps_mul  fp12, fp4, fp6  // YYY last FP6 usage
366     psq_l   fp3, 24(m), 0, 0   // m[1][2], m[1][3]
367 
368     ps_sum0 fp8, fp8, fp8, fp8
369     psq_l   fp5, 40(m), 0, 0   // m[2][2], m[2][3]
370 
371     ps_sum0 fp10, fp10, fp10, fp10
372     psq_l   fp1,  8(m), 0, 0    // m[0][2], m[0][3]
373 
374     ps_sum0 fp12, fp12, fp12, fp12
375     ps_madd fp9, fp1, fp7, fp8
376     psq_st  fp9,  0(dst), 1, 0      // Store X
377 
378     ps_madd fp11, fp3, fp7, fp10
379     psq_st  fp11, 4(dst), 1, 0      // Store Y
380 
381     ps_madd fp13, fp5, fp7, fp12
382     psq_st  fp13, 8(dst), 1, 0      //  Store Z
383 
384     blr
385 
386 }
387 #endif  // GEKKO
388 
389 /*---------------------------------------------------------------------*
390 
391 Name:           MTXMultVecArraySR
392 
393 Description:    Multiplies an array of vectors by a matrix 3x3
394                 (Scaling and Rotation) component.
395 
396 Arguments:      m        Matrix.
397                 srcBase  Start of source vector array.
398                 dstBase  Start of resultant vector array.
399 
400                 Note:    OK if srcBase == dstBase.
401 
402                 count    Number of vectors in srcBase, dstBase arrays
403                 Note:    Cannot check for array overflow
404 
405 Return:         None
406 
407 *---------------------------------------------------------------------*/
408 /*---------------------------------------------------------------------*
409     C version
410  *---------------------------------------------------------------------*/
C_MTXMultVecArraySR(const Mtx m,const Vec * srcBase,Vec * dstBase,u32 count)411 void C_MTXMultVecArraySR ( const Mtx m, const Vec *srcBase, Vec *dstBase, u32 count )
412 {
413     u32 i;
414     Vec vTmp;
415 
416     ASSERTMSG( (m       != 0), MTX_MULTVECARRAYSR_1 );
417     ASSERTMSG( (srcBase != 0), MTX_MULTVECARRAYSR_2 );
418     ASSERTMSG( (dstBase != 0), MTX_MULTVECARRAYSR_3 );
419     ASSERTMSG( (count > 1),    MTX_MULTVECARRAYSR_4 );
420 
421     for ( i = 0; i < count; i ++ )
422     {
423         // Vec has a 4th implicit 'w' coordinate of 1
424         vTmp.x = m[0][0]*srcBase->x + m[0][1]*srcBase->y + m[0][2]*srcBase->z;
425         vTmp.y = m[1][0]*srcBase->x + m[1][1]*srcBase->y + m[1][2]*srcBase->z;
426         vTmp.z = m[2][0]*srcBase->x + m[2][1]*srcBase->y + m[2][2]*srcBase->z;
427 
428         // Copy back
429         dstBase->x = vTmp.x;
430         dstBase->y = vTmp.y;
431         dstBase->z = vTmp.z;
432 
433         srcBase++;
434         dstBase++;
435     }
436 }
437 
438 /*---------------------------------------------------------------------*
439     Paired-Single assembler version
440  *---------------------------------------------------------------------*
441                 Note that NO error checking is performed.
442  *---------------------------------------------------------------------*/
443 #ifdef GEKKO
PSMTXMultVecArraySR(const register Mtx m,const register Vec * srcBase,register Vec * dstBase,register u32 count)444 asm void PSMTXMultVecArraySR (
445     const register Mtx    m,
446     const register Vec   *srcBase,
447           register Vec   *dstBase,
448           register u32    count )
449 {
450     nofralloc
451 
452     // fp13 [m00][m01] : LOAD
453     psq_l       fp13,  0(m), 0, 0
454     // fp12 [m10][m11] : LOAD
455     psq_l       fp12, 16(m), 0, 0
456         // Decrement loop count due to unrolling
457         subi        count, count, 1
458     // fp11 [m02][1.0F] : LOAD
459     psq_l       fp11,  8(m), 1, 0
460     // fp0 [m00][m10]
461     ps_merge00  fp0, fp13, fp12
462         // Base pointer adjustment
463         subi        dstBase, dstBase, 4
464     // fp10 [m12][1.0F] : LOAD
465     psq_l       fp10, 24(m), 1, 0
466     // fp1 [m01][m11]
467     ps_merge11  fp1, fp13, fp12
468         // Loop counter
469         mtctr       count
470     // fp3 [m20][m21] : LOAD
471     psq_l       fp3,  32(m), 0, 0
472     // fp2 [m02][m12]
473     ps_merge00  fp2, fp11, fp10
474     // fp4 [m22][1.0F] : LOAD
475     psq_l       fp4,  40(m), 1, 0
476 
477 
478     // fp6 [v0][v1]   : LOAD
479     psq_l       fp6,  0(srcBase), 0, 0
480     // fp7 [v2][1.0F] : LOAD
481     psq_lu      fp7,  8(srcBase), 1, 0
482     // fp8 [m00*v0][m10*v0]
483     ps_muls0    fp8, fp0, fp6
484     // fp9 [m20*v0][m21*v1]
485     ps_mul      fp9, fp3, fp6
486     // fp8 [m00*v0+m01*v1][m10*v0+m11*v1]
487     ps_madds1   fp8, fp1, fp6, fp8
488     // fp10 [m20*v0+m22*v2][?]
489     ps_madd     fp10, fp4, fp7, fp9
490 
491 _mloop:
492     //-------- Unrolled loop --------
493 
494         // fp6 [v0][v1]   : LOAD
495         psq_lu      fp6,  4(srcBase), 0, 0
496     // fp12 [m00*v0+m01*v1+m02*v2][m10*v0+m11*v1+m12*v2]
497     ps_madds0   fp12, fp2, fp7, fp8
498         // fp7 [v2][1.0F] : LOAD
499         psq_lu      fp7,  8(srcBase), 1, 0
500     // fp13 [m20*v0+m21*v1+m22*v2][?]
501     ps_sum0     fp13, fp10, fp9, fp9
502         // fp8 [m00*v0][m10*v0]
503         ps_muls0    fp8, fp0, fp6
504         // fp9 [m20*v0][m21*v1]
505         ps_mul      fp9, fp3, fp6
506     // fp12 [v0'][v1'] : STORE
507     psq_stu     fp12,  4(dstBase), 0, 0
508         // fp8 [m00*v0+m01*v1][m10*v0+m11*v1]
509         ps_madds1   fp8, fp1, fp6, fp8
510     // fp13 [v2'][ ?] : STORE
511     psq_stu     fp13,  8(dstBase), 1, 0
512         // fp10 [m20*v0+m22*v2][?]
513         ps_madd     fp10, fp4, fp7, fp9
514 
515     // LOOP
516     bdnz        _mloop
517 
518 
519     // fp12 [m00*v0+m01*v1+m02*v2][m10*v0+m11*v1+m12*v2]
520     ps_madds0   fp12, fp2, fp7, fp8
521     // fp13 [m20*v0+m21*v1+m22*v2][?]
522     ps_sum0     fp13, fp10, fp9, fp9
523     // fp12 [v0'][v1'] : STORE
524     psq_stu     fp12,  4(dstBase), 0, 0
525     // fp13 [v2'][ ?] : STORE
526     psq_stu     fp13,  8(dstBase), 1, 0
527 
528     blr
529 }
530 #endif // GEKKO
531 
532 
533 /*===========================================================================*/
534