1 /*---------------------------------------------------------------------------*
2 Project: matrix vector Library
3 File: mtx44vec.c
4
5 Copyright 1998-2001 Nintendo. All rights reserved.
6
7 These coded instructions, statements, and computer programs contain
8 proprietary information of Nintendo of America Inc. and/or Nintendo
9 Company Ltd., and are protected by Federal copyright law. They may
10 not be disclosed to third parties or copied or duplicated in any form,
11 in whole or in part, without the prior written consent of Nintendo.
12
13
14 $Log: mtx44vec.c,v $
15 Revision 1.2 02/20/2006 04:25:42 mitu
16 changed include path from dolphin/ to revolution/.
17
18 Revision 1.1.1.1 2005/05/12 02:15:49 yasuh-to
19 transitioned from the Dolphin source tree
20
21
22 2 02/04/11 13:11 Hirose
23 const type specifier support. (worked by Hiratsu@IRD)
24
25 1 7/30/01 10:51p Hirose
26 Initial check in.
27
28 $NoKeywords: $
29 *---------------------------------------------------------------------------*/
30 #ifdef WIN32
31 #include <win32/win32.h>
32 #endif
33
34 #include <math.h>
35 #include <revolution/mtx.h>
36 #include <revolution/mtx/mtx44ext.h>
37 #include "mtx44extAssert.h"
38
39
40
41 /*---------------------------------------------------------------------*
42
43
44
45 MODEL SECTION
46
47
48
49 *---------------------------------------------------------------------*/
50 /* NOTE: Prototypes for these functions are defined in "mtx44ext.h". */
51
52 /*---------------------------------------------------------------------*
53 Name: MTX44MultVec
54
55 Description: multiplies a vector by a matrix.
56 m x src = dst.
57
58 Arguments: m matrix.
59 src source vector for multiply.
60 dst resultant vector from multiply.
61 note: ok if src == dst.
62
63 Return : none
64 *---------------------------------------------------------------------*/
65 /*---------------------------------------------------------------------*
66 C version
67 *---------------------------------------------------------------------*/
C_MTX44MultVec(const Mtx44 m,const Vec * src,Vec * dst)68 void C_MTX44MultVec ( const Mtx44 m, const Vec *src, Vec *dst )
69 {
70 Vec vTmp;
71 f32 w;
72
73 ASSERTMSG( (m != 0), MTX44_MULTVEC_1 );
74 ASSERTMSG( (src != 0), MTX44_MULTVEC_2 );
75 ASSERTMSG( (dst != 0), MTX44_MULTVEC_3 );
76
77 // a Vec has a 4th implicit 'w' coordinate of 1
78 vTmp.x = m[0][0]*src->x + m[0][1]*src->y + m[0][2]*src->z + m[0][3];
79 vTmp.y = m[1][0]*src->x + m[1][1]*src->y + m[1][2]*src->z + m[1][3];
80 vTmp.z = m[2][0]*src->x + m[2][1]*src->y + m[2][2]*src->z + m[2][3];
81 w = m[3][0]*src->x + m[3][1]*src->y + m[3][2]*src->z + m[3][3];
82 w = 1.0f/w;
83
84 // copy back
85 dst->x = vTmp.x * w;
86 dst->y = vTmp.y * w;
87 dst->z = vTmp.z * w;
88 }
89
90 /*---------------------------------------------------------------------*
91 Paired-Single assembler version
92 *---------------------------------------------------------------------*
93 Note that NO error checking is performed.
94 *---------------------------------------------------------------------*/
95 #ifdef GEKKO
PSMTX44MultVec(const register Mtx44 m,const register Vec * src,register Vec * dst)96 asm void PSMTX44MultVec (
97 const register Mtx44 m,
98 const register Vec *src,
99 register Vec *dst
100 )
101 {
102 nofralloc;
103 psq_l fp0, 0(src), 0, 0; // fp0 <-src.x, src.y
104 psq_l fp2, 48(m), 0, 0;
105 psq_l fp1, 8(src), 1, 0; // fp1 <-src.z, 1.0
106 ps_mul fp4, fp0, fp2;
107 psq_l fp3, 56(m), 0, 0;
108 ps_madd fp5, fp1, fp3, fp4;
109 ps_merge11 fp12, fp1, fp1; // fp12 = 1.0, 1.0
110 ps_sum0 fp13, fp5, fp5, fp5; // fp3 <- w
111 psq_l fp4, 0(m), 0, 0;
112 ps_merge00 fp13, fp13, fp13;
113 psq_l fp5, 8(m), 0, 0;
114 ps_div fp13, fp12, fp13; // fp13 <- 1/w
115 psq_l fp6, 16(m), 0, 0;
116 psq_l fp7, 24(m), 0, 0;
117 psq_l fp8, 32(m), 0, 0;
118 psq_l fp9, 40(m), 0, 0;
119 ps_mul fp4, fp0, fp4;
120 ps_madd fp2, fp1, fp5, fp4;
121 ps_mul fp6, fp0, fp6;
122 ps_madd fp3, fp1, fp7, fp6;
123 ps_mul fp8, fp0, fp8;
124 ps_sum0 fp2, fp2, fp2, fp2; // fp2 <- dst.x, --
125 ps_madd fp9, fp1, fp9, fp8;
126 ps_sum1 fp2, fp3, fp2, fp3; // fp2 <- dst.x, dst.y
127 ps_sum0 fp3, fp9, fp9, fp9;
128 ps_mul fp2, fp2, fp13;
129 psq_st fp2, 0(dst), 0, 0;
130 ps_mul fp3, fp3, fp13;
131 psq_st fp3, 8(dst), 1, 0;
132 blr;
133 }
134 #endif // GEKKO
135
136 /*---------------------------------------------------------------------*
137 Name: MTX44MultVecArray
138
139 Description: multiplies an array of vectors by a matrix.
140
141
142 Arguments: m matrix.
143 srcBase start of source vector array.
144 dstBase start of resultant vector array.
145 note: ok if srcBase == dstBase.
146 count number of vectors in srcBase, dstBase arrays
147 note: cannot check for array overflow
148
149 Return : none
150 *---------------------------------------------------------------------*/
151 /*---------------------------------------------------------------------*
152 C version
153 *---------------------------------------------------------------------*/
C_MTX44MultVecArray(const Mtx44 m,const Vec * srcBase,Vec * dstBase,u32 count)154 void C_MTX44MultVecArray ( const Mtx44 m, const Vec *srcBase, Vec *dstBase, u32 count )
155 {
156 u32 i;
157 Vec vTmp;
158 f32 w;
159
160 ASSERTMSG( (m != 0), MTX44_MULTVECARRAY_1 );
161 ASSERTMSG( (srcBase != 0), MTX44_MULTVECARRAY_2 );
162 ASSERTMSG( (dstBase != 0), MTX44_MULTVECARRAY_3 );
163
164 for(i=0; i< count; i++)
165 {
166 // Vec has a 4th implicit 'w' coordinate of 1
167 vTmp.x = m[0][0]*srcBase->x + m[0][1]*srcBase->y + m[0][2]*srcBase->z + m[0][3];
168 vTmp.y = m[1][0]*srcBase->x + m[1][1]*srcBase->y + m[1][2]*srcBase->z + m[1][3];
169 vTmp.z = m[2][0]*srcBase->x + m[2][1]*srcBase->y + m[2][2]*srcBase->z + m[2][3];
170 w = m[3][0]*srcBase->x + m[3][1]*srcBase->y + m[3][2]*srcBase->z + m[3][3];
171 w = 1.0f/w;
172
173 // copy back
174 dstBase->x = vTmp.x * w;
175 dstBase->y = vTmp.y * w;
176 dstBase->z = vTmp.z * w;
177
178 srcBase++;
179 dstBase++;
180 }
181 }
182
183 /*---------------------------------------------------------------------*
184 Paired-Single assembler version
185 *---------------------------------------------------------------------*
186 Note that NO error checking is performed.
187 *---------------------------------------------------------------------*/
188 #ifdef GEKKO
PSMTX44MultVecArray(const register Mtx44 m,const register Vec * srcBase,register Vec * dstBase,register u32 count)189 asm void PSMTX44MultVecArray (
190 const register Mtx44 m,
191 const register Vec *srcBase,
192 register Vec *dstBase,
193 register u32 count
194 )
195 {
196 nofralloc;
197 // cmpwi count, 1;
198 // bne @array
199 // b PSMTX44MultVec
200 //@array:
201 stwu rsp, -16(rsp);
202 addi count, count, -1;
203 psq_l fp6, 48(m), 0, 0; // fp6 <- m30, m31
204 mtctr count;
205 psq_l fp8, 0(srcBase), 0, 0; // fp8 <- src.x, src.y
206 addi dstBase, dstBase, -4;
207 psq_l fp7, 56(m), 0, 0; // fp7 <- m32, m33
208 psq_lu fp9, 8(srcBase), 1, 0; // fp9 <- src.z, 1.0
209 ps_mul fp13, fp6, fp8;
210 psq_l fp0, 0(m), 0, 0; // fp0 <- m00, m01
211 stfd fp14, 8(rsp);
212 ps_madd fp13, fp7, fp9, fp13;
213 psq_l fp2, 16(m), 0, 0; // fp2 <- m10, m11
214 ps_merge11 fp14, fp9, fp9; // fp9 = 1.0F, 1.0F
215 ps_mul fp10, fp0, fp8;
216 psq_l fp4, 32(m), 0, 0; // fp4 <- m20, m21
217 ps_mul fp11, fp2, fp8;
218 psq_l fp1, 8(m), 0, 0; // fp1 <- m02, m03
219 ps_mul fp12, fp4, fp8;
220 psq_l fp3, 24(m), 0, 0; // fp3 <- m12, m13
221 ps_sum0 fp13, fp13, fp13, fp13; // fp13 <- w
222 psq_l fp5, 40(m), 0, 0; // fp5 <- m22, m23
223
224 @loop:
225 ps_madd fp10, fp1, fp9, fp10;
226 ps_madd fp11, fp3, fp9, fp11;
227 ps_madd fp12, fp5, fp9, fp12;
228 ps_sum0 fp10, fp10, fp10, fp10; // fp10 <- x
229 ps_sum0 fp11, fp11, fp11, fp11; // fp11 <- y
230 ps_sum0 fp12, fp12, fp12, fp12; // fp12 <- z
231 ps_div fp13, fp14, fp13;
232
233 psq_lu fp8, 4(srcBase), 0, 0;
234 psq_lu fp9, 8(srcBase), 1, 0;
235
236 ps_mul fp10, fp10, fp13;
237 psq_stu fp10, 4(dstBase), 1, 0;
238 ps_mul fp11, fp11, fp13;
239 psq_stu fp11, 4(dstBase), 1, 0;
240 ps_mul fp12, fp12, fp13;
241 psq_stu fp12, 4(dstBase), 1, 0;
242
243 ps_mul fp13, fp6, fp8;
244
245 ps_mul fp10, fp0, fp8;
246 ps_mul fp11, fp2, fp8;
247 ps_madd fp13, fp7, fp9, fp13;
248 ps_mul fp12, fp4, fp8;
249 ps_sum0 fp13, fp13, fp13, fp13;
250
251 bdnz+ @loop
252
253 ps_madd fp10, fp1, fp9, fp10;
254 ps_madd fp11, fp3, fp9, fp11;
255 ps_madd fp12, fp5, fp9, fp12;
256 ps_sum0 fp10, fp10, fp10, fp10; // fp10 <- x
257 ps_sum0 fp11, fp11, fp11, fp11; // fp11 <- y
258 ps_sum0 fp12, fp12, fp12, fp12; // fp12 <- z
259 ps_div fp13, fp14, fp13;
260
261 ps_mul fp10, fp10, fp13;
262 psq_st fp10, 4(dstBase), 1, 0;
263 ps_mul fp11, fp11, fp13;
264 psq_st fp11, 8(dstBase), 1, 0;
265 ps_mul fp12, fp12, fp13;
266 psq_st fp12, 12(dstBase), 1, 0;
267
268 lfd fp14, 8(rsp);
269 addi rsp, rsp, 16;
270 blr;
271 }
272 #endif // GEKKO
273
274
275 /*---------------------------------------------------------------------*
276 Name: MTX44MultVecSR
277
278 Description: multiplies a vector by a matrix 3x3 (Scaling and Rotation)
279 component.
280
281 m x src = dst.
282
283 Arguments: m matrix.
284 src source vector for multiply.
285 dst resultant vector from multiply.
286 note: ok if src == dst.
287
288 Return : none
289 *---------------------------------------------------------------------*/
C_MTX44MultVecSR(const Mtx44 m,const Vec * src,Vec * dst)290 void C_MTX44MultVecSR ( const Mtx44 m, const Vec *src, Vec *dst )
291 {
292 Vec vTmp;
293
294 ASSERTMSG( (m != 0), MTX44_MULTVECSR_1 );
295 ASSERTMSG( (src != 0), MTX44_MULTVECSR_2 );
296 ASSERTMSG( (dst != 0), MTX44_MULTVECSR_3 );
297
298 // a Vec has a 4th implicit 'w' coordinate of 1
299 vTmp.x = m[0][0]*src->x + m[0][1]*src->y + m[0][2]*src->z;
300 vTmp.y = m[1][0]*src->x + m[1][1]*src->y + m[1][2]*src->z;
301 vTmp.z = m[2][0]*src->x + m[2][1]*src->y + m[2][2]*src->z;
302
303 // copy back
304 dst->x = vTmp.x;
305 dst->y = vTmp.y;
306 dst->z = vTmp.z;
307 }
308
309 /*---------------------------------------------------------------------*
310 Paired-Single assembler version
311 *---------------------------------------------------------------------*
312 Note that this performs NO error checking.
313 *---------------------------------------------------------------------*/
314 #ifdef GEKKO
PSMTX44MultVecSR(const register Mtx44 m,const register Vec * src,register Vec * dst)315 asm void PSMTX44MultVecSR(
316 const register Mtx44 m,
317 const register Vec *src,
318 register Vec *dst
319 )
320 {
321
322 nofralloc
323 psq_l fp0, 0(m), 0, 0 // m[0][0], m[0][1] GQR0 = 0
324
325 // fp6 - x y
326 psq_l fp6, 0(src), 0, 0
327
328 psq_l fp2, 16(m), 0, 0 // m[1][0], m[1][1]
329
330
331 // fp8 = m00x m01y // next X
332 ps_mul fp8, fp0, fp6
333 psq_l fp4, 32(m), 0, 0 // m[2][0], m[2][1]
334
335 // fp10 = m10x m11y // next Y
336 ps_mul fp10, fp2, fp6
337 psq_l fp7, 8(src), 1, 0 // fp7 - z,1.0
338
339 // fp12 = m20x m21y // next Z
340 ps_mul fp12, fp4, fp6 // YYY last FP6 usage
341 psq_l fp3, 24(m), 0, 0 // m[1][2], m[1][3]
342
343 ps_sum0 fp8, fp8, fp8, fp8
344 psq_l fp5, 40(m), 0, 0 // m[2][2], m[2][3]
345
346 ps_sum0 fp10, fp10, fp10, fp10
347 psq_l fp1, 8(m), 0, 0 // m[0][2], m[0][3]
348
349 ps_sum0 fp12, fp12, fp12, fp12
350 ps_madd fp9, fp1, fp7, fp8
351 psq_st fp9, 0(dst), 1, 0 // store X
352
353 ps_madd fp11, fp3, fp7, fp10
354 psq_st fp11, 4(dst), 1, 0 // store Y
355
356 ps_madd fp13, fp5, fp7, fp12
357 psq_st fp13, 8(dst), 1, 0 // store Z
358
359 blr
360
361 }
362 #endif // GEKKO
363
364 /*---------------------------------------------------------------------*
365 Name: MTX44MultVecArraySR
366
367 Description: multiplies an array of vectors by a matrix 3x3
368 (Scaling and Rotation) component.
369
370 Arguments: m matrix.
371 srcBase start of source vector array.
372 dstBase start of resultant vector array.
373 note: ok if srcBase == dstBase.
374
375 count number of vectors in srcBase, dstBase arrays
376 note: cannot check for array overflow
377
378 Return : none
379 *---------------------------------------------------------------------*/
C_MTX44MultVecArraySR(const Mtx44 m,const Vec * srcBase,Vec * dstBase,u32 count)380 void C_MTX44MultVecArraySR ( const Mtx44 m, const Vec *srcBase, Vec *dstBase, u32 count )
381 {
382 u32 i;
383 Vec vTmp;
384
385 ASSERTMSG( (m != 0), MTX44_MULTVECARRAYSR_1 );
386 ASSERTMSG( (srcBase != 0), MTX44_MULTVECARRAYSR_2 );
387 ASSERTMSG( (dstBase != 0), MTX44_MULTVECARRAYSR_3 );
388
389 for ( i = 0; i < count; i ++ )
390 {
391 // Vec has a 4th implicit 'w' coordinate of 1
392 vTmp.x = m[0][0]*srcBase->x + m[0][1]*srcBase->y + m[0][2]*srcBase->z;
393 vTmp.y = m[1][0]*srcBase->x + m[1][1]*srcBase->y + m[1][2]*srcBase->z;
394 vTmp.z = m[2][0]*srcBase->x + m[2][1]*srcBase->y + m[2][2]*srcBase->z;
395
396 // copy back
397 dstBase->x = vTmp.x;
398 dstBase->y = vTmp.y;
399 dstBase->z = vTmp.z;
400
401 srcBase++;
402 dstBase++;
403 }
404 }
405
406 /*---------------------------------------------------------------------*
407 Paired-Single assembler version
408 *---------------------------------------------------------------------*
409 Note that this performs NO error checking.
410 *---------------------------------------------------------------------*/
411 #ifdef GEKKO
PSMTX44MultVecArraySR(const register Mtx44 m,const register Vec * srcBase,register Vec * dstBase,register u32 count)412 asm void PSMTX44MultVecArraySR(
413 const register Mtx44 m,
414 const register Vec *srcBase,
415 register Vec *dstBase,
416 register u32 count
417 )
418 {
419 nofralloc;
420 psq_l fp0, 0(m), 0, 0; // fp0 <- m00, m01
421 addi count, count, -1;
422 psq_l fp6, 0(srcBase), 0, 0; // fp6 <- src.x, src.y
423 ps_mul fp8, fp0, fp6;
424 psq_l fp2, 16(m), 0, 0; // fp2 <- m10, m11
425 ps_mul fp9, fp2, fp6;
426 psq_l fp4, 32(m), 0, 0; // fp4 <- m20, m21
427 psq_lu fp7, 8(srcBase), 1, 0; // fp7 <- src.z, 1.0
428 ps_mul fp10, fp4, fp6;
429 psq_l fp1, 8(m), 1, 0; // fp1 <- m02, 1.0
430 mtctr count;
431 psq_l fp3, 24(m), 1, 0; // fp3 <- m12, 1.0
432 addi dstBase, dstBase, -4;
433 psq_l fp5, 40(m), 1, 0; // fp5 <- m22, 1.0
434
435 @loop:
436 ps_madd fp11, fp1, fp7, fp8;
437 psq_lu fp6, 4(srcBase), 0, 0;
438 ps_madd fp12, fp3, fp7, fp9;
439 ps_madd fp13, fp5, fp7, fp10;
440 psq_lu fp7, 8(srcBase), 1, 0;
441 ps_sum0 fp11, fp11, fp8, fp8;
442 psq_stu fp11, 4(dstBase), 1, 0;
443 ps_sum0 fp12, fp12, fp9, fp9;
444 psq_stu fp12, 4(dstBase), 1, 0;
445 ps_sum0 fp13, fp13, fp10, fp10;
446 psq_stu fp13, 4(dstBase), 1, 0;
447 ps_mul fp8, fp0, fp6;
448 ps_mul fp9, fp2, fp6;
449 ps_mul fp10, fp4, fp6;
450 bdnz+ @loop
451
452 ps_madd fp11, fp1, fp7, fp8;
453 ps_madd fp12, fp3, fp7, fp9;
454 ps_madd fp13, fp5, fp7, fp10;
455 ps_sum0 fp11, fp11, fp8, fp8;
456 psq_stu fp11, 4(dstBase), 1, 0;
457 ps_sum0 fp12, fp12, fp9, fp9;
458 psq_stu fp12, 4(dstBase), 1, 0;
459 ps_sum0 fp13, fp13, fp10, fp10;
460 psq_stu fp13, 4(dstBase), 1, 0;
461 blr;
462 }
463 #endif // GEKKO
464
465
466 /*===========================================================================*/
467