1 /*---------------------------------------------------------------------------*
2 Project: Matrix vector Library
3 File: mtxvec.c
4
5 Copyright 1998 - 2001 Nintendo. All rights reserved.
6
7 These coded instructions, statements, and computer programs contain
8 proprietary information of Nintendo of America Inc. and/or Nintendo
9 Company Ltd., and are protected by Federal copyright law. They may
10 not be disclosed to third parties or copied or duplicated in any form,
11 in whole or in part, without the prior written consent of Nintendo.
12
13
14 $Log: mtxvec.c,v $
15 Revision 1.3 2007/01/11 00:45:26 aka
16 Removed win32.h.
17
18 Revision 1.2 2006/02/20 04:25:42 mitu
19 Changed include path from dolphin/ to revolution/.
20
21 Revision 1.1.1.1 2005/05/12 02:15:49 yasuh-to
22 Ported from dolphin source tree.
23
24
25 5 2002/04/11 13:11 Hirose
26 const type specifier support. (by Hiratsu@IRD)
27
28 4 2001/07/23 8:46p Hirose
29 Added PSMultVecArraySR. Improved PSMultVecArray.
30
31 3 2001/07/07 7:40p Hirose
32 Added PSMTXMultVecSR made by Ohki-san@NTSC.
33
34 2 2001/02/23 1:49a Hirose
35 Fixed a bug in PSMTXMultVec.
36
37 1 2001/02/22 11:56p Hirose
38 This section is moved from mtx.c. Added PSMultVec.
39
40 $NoKeywords: $
41 *---------------------------------------------------------------------------*/
42
43 #include <math.h>
44 #include <revolution/mtx.h>
45 #include "mtxAssert.h"
46
47 /*---------------------------------------------------------------------*
48
49 Name: MTXMultVec
50
51 Description: Multiplies a vector by a matrix.
52 m x src = dst.
53
54
55 Arguments: m Matrix.
56 src Source vector for multiplication.
57 dst Resultant vector from multiplication.
58
59 Note: OK if src == dst.
60
61
62 Return: None
63
64 *---------------------------------------------------------------------*/
65 /*---------------------------------------------------------------------*
66 C version
67 *---------------------------------------------------------------------*/
C_MTXMultVec(const Mtx m,const Vec * src,Vec * dst)68 void C_MTXMultVec ( const Mtx m, const Vec *src, Vec *dst )
69 {
70 Vec vTmp;
71
72 ASSERTMSG( (m != 0), MTX_MULTVEC_1 );
73 ASSERTMSG( (src != 0), MTX_MULTVEC_2 );
74 ASSERTMSG( (dst != 0), MTX_MULTVEC_3 );
75
76 // A Vec has a 4th implicit 'w' coordinate of 1
77 vTmp.x = m[0][0]*src->x + m[0][1]*src->y + m[0][2]*src->z + m[0][3];
78 vTmp.y = m[1][0]*src->x + m[1][1]*src->y + m[1][2]*src->z + m[1][3];
79 vTmp.z = m[2][0]*src->x + m[2][1]*src->y + m[2][2]*src->z + m[2][3];
80
81 // Copy back
82 dst->x = vTmp.x;
83 dst->y = vTmp.y;
84 dst->z = vTmp.z;
85 }
86
87 /*---------------------------------------------------------------------*
88 Paired-Single assembler version
89 *---------------------------------------------------------------------*
90 Note that NO error checking is performed.
91 *---------------------------------------------------------------------*/
92 #ifdef GEKKO
PSMTXMultVec(const register Mtx m,const register Vec * src,register Vec * dst)93 asm void PSMTXMultVec
94 (
95 const register Mtx m,
96 const register Vec *src,
97 register Vec *dst
98 )
99 {
100 nofralloc
101
102 // Load v[0], v[1]
103 psq_l fp0, 0(src), 0, 0
104 // Load m[0][0], m[0][1]
105 psq_l fp2, 0(m), 0, 0
106 // Load v[2], 1
107 psq_l fp1, 8(src), 1, 0
108 // m[0][0]*v[0], m[0][1]*v[1]
109 ps_mul fp4, fp2, fp0
110 // Load m[0][2], m[0][3]
111 psq_l fp3, 8(m), 0, 0
112 // m[0][0]*v[0]+m[0][2]*v[2], m[0][1]*v[1]+m[0][3]
113 ps_madd fp5, fp3, fp1, fp4
114 // Load m[1][0], m[1][1]
115 psq_l fp8, 16(m), 0, 0
116 // m[0][0]*v[0]+m[0][2]*v[2]+m[0][1]*v[1]+m[0][3], ???
117 ps_sum0 fp6, fp5, fp6, fp5
118 // Load m[1][2], m[1][3]
119 psq_l fp9, 24(m), 0, 0
120 // m[1][0]*v[0], m[1][1]*v[1]
121 ps_mul fp10, fp8, fp0
122 // Store dst[0]
123 psq_st fp6, 0(dst), 1, 0
124 // m[1][0]*v[0]+m[1][2]*v[2], m[1][1]*v[1]+m[1][3]
125 ps_madd fp11, fp9, fp1, fp10
126 // Load m[2][0], m[2][1]
127 psq_l fp2, 32(m), 0, 0
128 // m[1][0]*v[0]+m[1][2]*v[2]+m[2][1]*v[1]+m[1][3], ???
129 ps_sum0 fp12, fp11, fp12, fp11
130 // Load m[2][2], m[2][3]
131 psq_l fp3, 40(m), 0, 0
132 // m[0][0]*v[0], m[0][1]*v[1]
133 ps_mul fp4, fp2, fp0
134 // Store dst[1]
135 psq_st fp12, 4(dst), 1, 0
136 // m[0][0]*v[0]+m[0][2]*v[2], m[0][1]*v[1]+m[0][3]
137 ps_madd fp5, fp3, fp1, fp4
138 // m[0][0]*v[0]+m[0][2]*v[2]+m[0][1]*v[1]+m[0][3], ???
139 ps_sum0 fp6, fp5, fp6, fp5
140 // Store dst[0]
141 psq_st fp6, 8(dst), 1, 0
142
143 blr
144 }
145 #endif // GEKKO
146
147 /*---------------------------------------------------------------------*
148
149 Name: MTXMultVecArray
150
151 Description: Multiplies an array of vectors by a matrix.
152
153
154 Arguments: m Matrix.
155 srcBase Start of source vector array.
156 dstBase Start of resultant vector array.
157
158 Note: OK if srcBase == dstBase.
159
160 count Number of vectors in srcBase, dstBase arrays
161 Note: Cannot check for array overflow
162
163 Return: None
164
165 *---------------------------------------------------------------------*/
166 /*---------------------------------------------------------------------*
167 C version
168 *---------------------------------------------------------------------*/
C_MTXMultVecArray(const Mtx m,const Vec * srcBase,Vec * dstBase,u32 count)169 void C_MTXMultVecArray ( const Mtx m, const Vec *srcBase, Vec *dstBase, u32 count )
170 {
171 u32 i;
172 Vec vTmp;
173
174 ASSERTMSG( (m != 0), MTX_MULTVECARRAY_1 );
175 ASSERTMSG( (srcBase != 0), MTX_MULTVECARRAY_2 );
176 ASSERTMSG( (dstBase != 0), MTX_MULTVECARRAY_3 );
177 ASSERTMSG( (count > 1), MTX_MULTVECARRAY_4 );
178
179 for(i=0; i< count; i++)
180 {
181 // Vec has a 4th implicit 'w' coordinate of 1
182 vTmp.x = m[0][0]*srcBase->x + m[0][1]*srcBase->y + m[0][2]*srcBase->z + m[0][3];
183 vTmp.y = m[1][0]*srcBase->x + m[1][1]*srcBase->y + m[1][2]*srcBase->z + m[1][3];
184 vTmp.z = m[2][0]*srcBase->x + m[2][1]*srcBase->y + m[2][2]*srcBase->z + m[2][3];
185
186 // Copy back
187 dstBase->x = vTmp.x;
188 dstBase->y = vTmp.y;
189 dstBase->z = vTmp.z;
190
191 srcBase++;
192 dstBase++;
193 }
194 }
195
196 /*---------------------------------------------------------------------*
197 Paired-Single assembler version
198 *---------------------------------------------------------------------*
199 Note that NO error checking is performed.
200
201 The count should be greater than 1.
202 *---------------------------------------------------------------------*/
203 #ifdef GEKKO
PSMTXMultVecArray(const register Mtx m,const register Vec * srcBase,register Vec * dstBase,register u32 count)204 asm void PSMTXMultVecArray (
205 const register Mtx m,
206 const register Vec *srcBase,
207 register Vec *dstBase,
208 register u32 count )
209 {
210 nofralloc
211
212 // fp13 [m00][m01] : LOAD
213 psq_l fp13, 0(m), 0, 0
214 // fp12 [m10][m11] : LOAD
215 psq_l fp12, 16(m), 0, 0
216 // Decrement loop count due to unrolling
217 subi count, count, 1
218 // fp11 [m02][m03] : LOAD
219 psq_l fp11, 8(m), 0, 0
220 // fp0 [m00][m10]
221 ps_merge00 fp0, fp13, fp12
222 // Base pointer adjustment
223 subi dstBase, dstBase, 4
224 // fp10 [m12][m13] : LOAD
225 psq_l fp10, 24(m), 0, 0
226 // fp1 [m01][m11]
227 ps_merge11 fp1, fp13, fp12
228 // Loop counter
229 mtctr count
230 // fp4 [m20][m21] : LOAD
231 psq_l fp4, 32(m), 0, 0
232 // fp2 [m02][m12]
233 ps_merge00 fp2, fp11, fp10
234 // fp5 [m22][m23] : LOAD
235 psq_l fp5, 40(m), 0, 0
236 // fp3 [m03][m13]
237 ps_merge11 fp3, fp11, fp10
238
239 // fp6 [v0][v1] : LOAD
240 psq_l fp6, 0(srcBase), 0, 0
241 // fp7 [v2][1.0F] : LOAD
242 psq_lu fp7, 8(srcBase), 1, 0
243 // fp8 [m00*v0+m03][m10*v0+m13]
244 ps_madds0 fp8, fp0, fp6, fp3
245 // fp9 [m20*v0][m21*v1]
246 ps_mul fp9, fp4, fp6
247 // fp8 [m00*v0+m01*v1+m03][m10*v0+m11*v1+m13]
248 ps_madds1 fp8, fp1, fp6, fp8
249 // fp10 [m20*v0+m22*v2][m21*v1+m23*1.0F]
250 ps_madd fp10, fp5, fp7, fp9
251
252 _mloop:
253 //-------- Unrolled loop --------
254
255 // fp6 [v0][v1] : LOAD
256 psq_lu fp6, 4(srcBase), 0, 0
257 // fp12 [m00*v0+m01*v1+m02*v2+m03][m10*v0+m11*v1+m12*v2+m13]
258 ps_madds0 fp12, fp2, fp7, fp8
259 // fp7 [v2][1.0F] : LOAD
260 psq_lu fp7, 8(srcBase), 1, 0
261 // fp13 [m20*v0+m21*v1+m22*v2+m23][?]
262 ps_sum0 fp13, fp10, fp9, fp10
263 // fp8 [m00*v0+m03][m10*v0+m13]
264 ps_madds0 fp8, fp0, fp6, fp3
265 // fp9 [m20*v0][m21*v1]
266 ps_mul fp9, fp4, fp6
267 // fp12 [v0'][v1'] : STORE
268 psq_stu fp12, 4(dstBase), 0, 0
269 // fp8 [m00*v0+m01*v1+m03][m10*v0+m11*v1+m13]
270 ps_madds1 fp8, fp1, fp6, fp8
271 // fp13 [v2'][ ?] : STORE
272 psq_stu fp13, 8(dstBase), 1, 0
273 // fp10 [m20*v0+m22*v2][m21*v1+m23*1.0F]
274 ps_madd fp10, fp5, fp7, fp9
275
276 // LOOP
277 bdnz _mloop
278
279
280 // fp12 [m00*v0+m01*v1+m02*v2+m03][m10*v0+m11*v1+m12*v2+m13]
281 ps_madds0 fp12, fp2, fp7, fp8
282 // fp13 [m20*v0+m21*v1+m22*v2+m23][?]
283 ps_sum0 fp13, fp10, fp9, fp10
284 // fp12 [v0'][v1'] : STORE
285 psq_stu fp12, 4(dstBase), 0, 0
286 // fp13 [v2'][ ?] : STORE
287 psq_stu fp13, 8(dstBase), 1, 0
288
289 blr
290 }
291 #endif // GEKKO
292
293
294 /*---------------------------------------------------------------------*
295
296 Name: MTXMultVecSR
297
298 Description: Multiplies a vector by a matrix 3x3 (Scaling and Rotation)
299 component.
300
301 m x src = dst.
302
303 Arguments: m Matrix.
304 src Source vector for multiply.
305 dst Resultant vector from multiply.
306
307 Note: OK if src == dst.
308
309 Return: None
310
311 *---------------------------------------------------------------------*/
312 /*---------------------------------------------------------------------*
313 C version
314 *---------------------------------------------------------------------*/
C_MTXMultVecSR(const Mtx m,const Vec * src,Vec * dst)315 void C_MTXMultVecSR ( const Mtx m, const Vec *src, Vec *dst )
316 {
317 Vec vTmp;
318
319 ASSERTMSG( (m != 0), MTX_MULTVECSR_1 );
320 ASSERTMSG( (src != 0), MTX_MULTVECSR_2 );
321 ASSERTMSG( (dst != 0), MTX_MULTVECSR_3 );
322
323 // A Vec has a 4th implicit 'w' coordinate of 1
324 vTmp.x = m[0][0]*src->x + m[0][1]*src->y + m[0][2]*src->z;
325 vTmp.y = m[1][0]*src->x + m[1][1]*src->y + m[1][2]*src->z;
326 vTmp.z = m[2][0]*src->x + m[2][1]*src->y + m[2][2]*src->z;
327
328 // Copy back
329 dst->x = vTmp.x;
330 dst->y = vTmp.y;
331 dst->z = vTmp.z;
332 }
333
334 /*---------------------------------------------------------------------*
335 Paired-Single assembler version
336 *---------------------------------------------------------------------*
337 Note that this performs NO error checking.
338 *---------------------------------------------------------------------*/
339 #ifdef GEKKO
PSMTXMultVecSR(const register Mtx m,const register Vec * src,register Vec * dst)340 asm void PSMTXMultVecSR
341 (
342 const register Mtx m,
343 const register Vec *src,
344 register Vec *dst
345 )
346 {
347 nofralloc
348 psq_l fp0, 0(m), 0, 0 // m[0][0], m[0][1] GQR0 = 0
349
350 // fp6 - x y
351 psq_l fp6, 0(src), 0, 0
352
353 psq_l fp2, 16(m), 0, 0 // m[1][0], m[1][1]
354
355
356 // fp8 = m00x m01y // next X
357 ps_mul fp8, fp0, fp6
358 psq_l fp4, 32(m), 0, 0 // m[2][0], m[2][1]
359
360 // fp10 = m10x m11y // next Y
361 ps_mul fp10, fp2, fp6
362 psq_l fp7, 8(src), 1, 0 // fp7 - z,1.0
363
364 // fp12 = m20x m21y // next Z
365 ps_mul fp12, fp4, fp6 // YYY last FP6 usage
366 psq_l fp3, 24(m), 0, 0 // m[1][2], m[1][3]
367
368 ps_sum0 fp8, fp8, fp8, fp8
369 psq_l fp5, 40(m), 0, 0 // m[2][2], m[2][3]
370
371 ps_sum0 fp10, fp10, fp10, fp10
372 psq_l fp1, 8(m), 0, 0 // m[0][2], m[0][3]
373
374 ps_sum0 fp12, fp12, fp12, fp12
375 ps_madd fp9, fp1, fp7, fp8
376 psq_st fp9, 0(dst), 1, 0 // Store X
377
378 ps_madd fp11, fp3, fp7, fp10
379 psq_st fp11, 4(dst), 1, 0 // Store Y
380
381 ps_madd fp13, fp5, fp7, fp12
382 psq_st fp13, 8(dst), 1, 0 // Store Z
383
384 blr
385
386 }
387 #endif // GEKKO
388
389 /*---------------------------------------------------------------------*
390
391 Name: MTXMultVecArraySR
392
393 Description: Multiplies an array of vectors by a matrix 3x3
394 (Scaling and Rotation) component.
395
396 Arguments: m Matrix.
397 srcBase Start of source vector array.
398 dstBase Start of resultant vector array.
399
400 Note: OK if srcBase == dstBase.
401
402 count Number of vectors in srcBase, dstBase arrays
403 Note: Cannot check for array overflow
404
405 Return: None
406
407 *---------------------------------------------------------------------*/
408 /*---------------------------------------------------------------------*
409 C version
410 *---------------------------------------------------------------------*/
C_MTXMultVecArraySR(const Mtx m,const Vec * srcBase,Vec * dstBase,u32 count)411 void C_MTXMultVecArraySR ( const Mtx m, const Vec *srcBase, Vec *dstBase, u32 count )
412 {
413 u32 i;
414 Vec vTmp;
415
416 ASSERTMSG( (m != 0), MTX_MULTVECARRAYSR_1 );
417 ASSERTMSG( (srcBase != 0), MTX_MULTVECARRAYSR_2 );
418 ASSERTMSG( (dstBase != 0), MTX_MULTVECARRAYSR_3 );
419 ASSERTMSG( (count > 1), MTX_MULTVECARRAYSR_4 );
420
421 for ( i = 0; i < count; i ++ )
422 {
423 // Vec has a 4th implicit 'w' coordinate of 1
424 vTmp.x = m[0][0]*srcBase->x + m[0][1]*srcBase->y + m[0][2]*srcBase->z;
425 vTmp.y = m[1][0]*srcBase->x + m[1][1]*srcBase->y + m[1][2]*srcBase->z;
426 vTmp.z = m[2][0]*srcBase->x + m[2][1]*srcBase->y + m[2][2]*srcBase->z;
427
428 // Copy back
429 dstBase->x = vTmp.x;
430 dstBase->y = vTmp.y;
431 dstBase->z = vTmp.z;
432
433 srcBase++;
434 dstBase++;
435 }
436 }
437
438 /*---------------------------------------------------------------------*
439 Paired-Single assembler version
440 *---------------------------------------------------------------------*
441 Note that NO error checking is performed.
442 *---------------------------------------------------------------------*/
443 #ifdef GEKKO
PSMTXMultVecArraySR(const register Mtx m,const register Vec * srcBase,register Vec * dstBase,register u32 count)444 asm void PSMTXMultVecArraySR (
445 const register Mtx m,
446 const register Vec *srcBase,
447 register Vec *dstBase,
448 register u32 count )
449 {
450 nofralloc
451
452 // fp13 [m00][m01] : LOAD
453 psq_l fp13, 0(m), 0, 0
454 // fp12 [m10][m11] : LOAD
455 psq_l fp12, 16(m), 0, 0
456 // Decrement loop count due to unrolling
457 subi count, count, 1
458 // fp11 [m02][1.0F] : LOAD
459 psq_l fp11, 8(m), 1, 0
460 // fp0 [m00][m10]
461 ps_merge00 fp0, fp13, fp12
462 // Base pointer adjustment
463 subi dstBase, dstBase, 4
464 // fp10 [m12][1.0F] : LOAD
465 psq_l fp10, 24(m), 1, 0
466 // fp1 [m01][m11]
467 ps_merge11 fp1, fp13, fp12
468 // Loop counter
469 mtctr count
470 // fp3 [m20][m21] : LOAD
471 psq_l fp3, 32(m), 0, 0
472 // fp2 [m02][m12]
473 ps_merge00 fp2, fp11, fp10
474 // fp4 [m22][1.0F] : LOAD
475 psq_l fp4, 40(m), 1, 0
476
477
478 // fp6 [v0][v1] : LOAD
479 psq_l fp6, 0(srcBase), 0, 0
480 // fp7 [v2][1.0F] : LOAD
481 psq_lu fp7, 8(srcBase), 1, 0
482 // fp8 [m00*v0][m10*v0]
483 ps_muls0 fp8, fp0, fp6
484 // fp9 [m20*v0][m21*v1]
485 ps_mul fp9, fp3, fp6
486 // fp8 [m00*v0+m01*v1][m10*v0+m11*v1]
487 ps_madds1 fp8, fp1, fp6, fp8
488 // fp10 [m20*v0+m22*v2][?]
489 ps_madd fp10, fp4, fp7, fp9
490
491 _mloop:
492 //-------- Unrolled loop --------
493
494 // fp6 [v0][v1] : LOAD
495 psq_lu fp6, 4(srcBase), 0, 0
496 // fp12 [m00*v0+m01*v1+m02*v2][m10*v0+m11*v1+m12*v2]
497 ps_madds0 fp12, fp2, fp7, fp8
498 // fp7 [v2][1.0F] : LOAD
499 psq_lu fp7, 8(srcBase), 1, 0
500 // fp13 [m20*v0+m21*v1+m22*v2][?]
501 ps_sum0 fp13, fp10, fp9, fp9
502 // fp8 [m00*v0][m10*v0]
503 ps_muls0 fp8, fp0, fp6
504 // fp9 [m20*v0][m21*v1]
505 ps_mul fp9, fp3, fp6
506 // fp12 [v0'][v1'] : STORE
507 psq_stu fp12, 4(dstBase), 0, 0
508 // fp8 [m00*v0+m01*v1][m10*v0+m11*v1]
509 ps_madds1 fp8, fp1, fp6, fp8
510 // fp13 [v2'][ ?] : STORE
511 psq_stu fp13, 8(dstBase), 1, 0
512 // fp10 [m20*v0+m22*v2][?]
513 ps_madd fp10, fp4, fp7, fp9
514
515 // LOOP
516 bdnz _mloop
517
518
519 // fp12 [m00*v0+m01*v1+m02*v2][m10*v0+m11*v1+m12*v2]
520 ps_madds0 fp12, fp2, fp7, fp8
521 // fp13 [m20*v0+m21*v1+m22*v2][?]
522 ps_sum0 fp13, fp10, fp9, fp9
523 // fp12 [v0'][v1'] : STORE
524 psq_stu fp12, 4(dstBase), 0, 0
525 // fp13 [v2'][ ?] : STORE
526 psq_stu fp13, 8(dstBase), 1, 0
527
528 blr
529 }
530 #endif // GEKKO
531
532
533 /*===========================================================================*/
534