1 /*---------------------------------------------------------------------------*
2 Project: Matrix vector Library
3 File: mtx44vec.c
4
5 Copyright 1998 - 2001 Nintendo. All rights reserved.
6
7 These coded instructions, statements, and computer programs contain
8 proprietary information of Nintendo of America Inc. and/or Nintendo
9 Company Ltd., and are protected by Federal copyright law. They may
10 not be disclosed to third parties or copied or duplicated in any form,
11 in whole or in part, without the prior written consent of Nintendo.
12
13
14 $Log: mtx44vec.c,v $
15 Revision 1.4 2007/08/30 10:41:22 hirose
16 Updated PSMTX44MultVecArray to make it Broadway EABI compliant.
17
18 Revision 1.3 2007/01/11 00:45:26 aka
19 Removed win32.h.
20
21 Revision 1.2 2006/02/20 04:25:42 mitu
22 Changed include path from dolphin/ to revolution/.
23
24 Revision 1.1.1.1 2005/05/12 02:15:49 yasuh-to
25 Ported from dolphin sheath tree.
26
27
28 2 2002/04/11 13:11 Hirose
29 const type specifier support. (by Hiratsu@IRD)
30
31 1 2001/07/30 10:51p Hirose
32 Initial check in.
33
34 $NoKeywords: $
35 *---------------------------------------------------------------------------*/
36
37 #include <math.h>
38 #include <revolution/mtx.h>
39 #include <revolution/mtx/mtx44ext.h>
40 #include "mtx44extAssert.h"
41
42
43
44 /*---------------------------------------------------------------------*
45
46
47
48 MODEL SECTION
49
50
51
52 *---------------------------------------------------------------------*/
53 /* NOTE: Prototypes for these functions are defined in "mtx44ext.h". */
54
55 /*---------------------------------------------------------------------*
56 Name: MTX44MultVec
57
58 Description: Multiplies a vector by a matrix.
59 m x src = dst.
60
61 Arguments: m Matrix.
62 src Source vector for multiply.
63 dst Resultant vector from multiply.
64 Note: OK if src == dst.
65
66 Return : None.
67 *---------------------------------------------------------------------*/
68 /*---------------------------------------------------------------------*
69 C version
70 *---------------------------------------------------------------------*/
C_MTX44MultVec(const Mtx44 m,const Vec * src,Vec * dst)71 void C_MTX44MultVec ( const Mtx44 m, const Vec *src, Vec *dst )
72 {
73 Vec vTmp;
74 f32 w;
75
76 ASSERTMSG( (m != 0), MTX44_MULTVEC_1 );
77 ASSERTMSG( (src != 0), MTX44_MULTVEC_2 );
78 ASSERTMSG( (dst != 0), MTX44_MULTVEC_3 );
79
80 // A Vec has a 4th implicit 'w' coordinate of 1
81 vTmp.x = m[0][0]*src->x + m[0][1]*src->y + m[0][2]*src->z + m[0][3];
82 vTmp.y = m[1][0]*src->x + m[1][1]*src->y + m[1][2]*src->z + m[1][3];
83 vTmp.z = m[2][0]*src->x + m[2][1]*src->y + m[2][2]*src->z + m[2][3];
84 w = m[3][0]*src->x + m[3][1]*src->y + m[3][2]*src->z + m[3][3];
85 w = 1.0f/w;
86
87 // Copy back
88 dst->x = vTmp.x * w;
89 dst->y = vTmp.y * w;
90 dst->z = vTmp.z * w;
91 }
92
93 /*---------------------------------------------------------------------*
94 Paired-Single assembler version
95 *---------------------------------------------------------------------*
96 Note that NO error checking is performed.
97 *---------------------------------------------------------------------*/
98 #ifdef GEKKO
PSMTX44MultVec(const register Mtx44 m,const register Vec * src,register Vec * dst)99 asm void PSMTX44MultVec (
100 const register Mtx44 m,
101 const register Vec *src,
102 register Vec *dst
103 )
104 {
105 nofralloc;
106 psq_l fp0, 0(src), 0, 0; // fp0 <-src.x, src.y
107 psq_l fp2, 48(m), 0, 0;
108 psq_l fp1, 8(src), 1, 0; // fp1 <-src.z, 1.0
109 ps_mul fp4, fp0, fp2;
110 psq_l fp3, 56(m), 0, 0;
111 ps_madd fp5, fp1, fp3, fp4;
112 ps_merge11 fp12, fp1, fp1; // fp12 = 1.0, 1.0
113 ps_sum0 fp13, fp5, fp5, fp5; // fp3 <- w
114 psq_l fp4, 0(m), 0, 0;
115 ps_merge00 fp13, fp13, fp13;
116 psq_l fp5, 8(m), 0, 0;
117 ps_div fp13, fp12, fp13; // fp13 <- 1/w
118 psq_l fp6, 16(m), 0, 0;
119 psq_l fp7, 24(m), 0, 0;
120 psq_l fp8, 32(m), 0, 0;
121 psq_l fp9, 40(m), 0, 0;
122 ps_mul fp4, fp0, fp4;
123 ps_madd fp2, fp1, fp5, fp4;
124 ps_mul fp6, fp0, fp6;
125 ps_madd fp3, fp1, fp7, fp6;
126 ps_mul fp8, fp0, fp8;
127 ps_sum0 fp2, fp2, fp2, fp2; // fp2 <- dst.x, --
128 ps_madd fp9, fp1, fp9, fp8;
129 ps_sum1 fp2, fp3, fp2, fp3; // fp2 <- dst.x, dst.y
130 ps_sum0 fp3, fp9, fp9, fp9;
131 ps_mul fp2, fp2, fp13;
132 psq_st fp2, 0(dst), 0, 0;
133 ps_mul fp3, fp3, fp13;
134 psq_st fp3, 8(dst), 1, 0;
135 blr;
136 }
137 #endif // GEKKO
138
139 /*---------------------------------------------------------------------*
140 Name: MTX44MultVecArray
141
142 Description: Multiplies an array of vectors by a matrix.
143
144
145 Arguments: m Matrix.
146 srcBase Start of source vector array.
147 dstBase Start of resultant vector array.
148 Note: OK if srcBase == dstBase.
149 count Number of vectors in srcBase, dstBase arrays
150 Note: Cannot check for array overflow
151
152 Return : None.
153 *---------------------------------------------------------------------*/
154 /*---------------------------------------------------------------------*
155 C version
156 *---------------------------------------------------------------------*/
C_MTX44MultVecArray(const Mtx44 m,const Vec * srcBase,Vec * dstBase,u32 count)157 void C_MTX44MultVecArray ( const Mtx44 m, const Vec *srcBase, Vec *dstBase, u32 count )
158 {
159 u32 i;
160 Vec vTmp;
161 f32 w;
162
163 ASSERTMSG( (m != 0), MTX44_MULTVECARRAY_1 );
164 ASSERTMSG( (srcBase != 0), MTX44_MULTVECARRAY_2 );
165 ASSERTMSG( (dstBase != 0), MTX44_MULTVECARRAY_3 );
166
167 for(i=0; i< count; i++)
168 {
169 // Vec has a 4th implicit 'w' coordinate of 1
170 vTmp.x = m[0][0]*srcBase->x + m[0][1]*srcBase->y + m[0][2]*srcBase->z + m[0][3];
171 vTmp.y = m[1][0]*srcBase->x + m[1][1]*srcBase->y + m[1][2]*srcBase->z + m[1][3];
172 vTmp.z = m[2][0]*srcBase->x + m[2][1]*srcBase->y + m[2][2]*srcBase->z + m[2][3];
173 w = m[3][0]*srcBase->x + m[3][1]*srcBase->y + m[3][2]*srcBase->z + m[3][3];
174 w = 1.0f/w;
175
176 // Copy back
177 dstBase->x = vTmp.x * w;
178 dstBase->y = vTmp.y * w;
179 dstBase->z = vTmp.z * w;
180
181 srcBase++;
182 dstBase++;
183 }
184 }
185
186 /*---------------------------------------------------------------------*
187 Paired-Single assembler version
188 *---------------------------------------------------------------------*
189 Note that NO error checking is performed.
190 *---------------------------------------------------------------------*/
191 #ifdef GEKKO
PSMTX44MultVecArray(const register Mtx44 m,const register Vec * srcBase,register Vec * dstBase,register u32 count)192 asm void PSMTX44MultVecArray (
193 const register Mtx44 m,
194 const register Vec *srcBase,
195 register Vec *dstBase,
196 register u32 count
197 )
198 {
199 nofralloc;
200 // cmpwi count, 1;
201 // bne @array
202 // b PSMTX44MultVec
203 //@array:
204 stwu rsp, -24(rsp);
205 addi count, count, -1;
206 psq_l fp6, 48(m), 0, 0; // fp6 <- m30, m31
207 mtctr count;
208 psq_l fp8, 0(srcBase), 0, 0; // fp8 <- src.x, src.y
209 addi dstBase, dstBase, -4;
210 stfd fp14, 8(rsp);
211 psq_l fp7, 56(m), 0, 0; // fp7 <- m32, m33
212 psq_lu fp9, 8(srcBase), 1, 0; // fp9 <- src.z, 1.0
213 ps_mul fp13, fp6, fp8;
214 psq_l fp0, 0(m), 0, 0; // fp0 <- m00, m01
215 psq_st fp14, 16(rsp), 0, 0;
216 ps_madd fp13, fp7, fp9, fp13;
217 psq_l fp2, 16(m), 0, 0; // fp2 <- m10, m11
218 ps_merge11 fp14, fp9, fp9; // fp9 = 1.0F, 1.0F
219 ps_mul fp10, fp0, fp8;
220 psq_l fp4, 32(m), 0, 0; // fp4 <- m20, m21
221 ps_mul fp11, fp2, fp8;
222 psq_l fp1, 8(m), 0, 0; // fp1 <- m02, m03
223 ps_mul fp12, fp4, fp8;
224 psq_l fp3, 24(m), 0, 0; // fp3 <- m12, m13
225 ps_sum0 fp13, fp13, fp13, fp13; // fp13 <- w
226 psq_l fp5, 40(m), 0, 0; // fp5 <- m22, m23
227
228 @loop:
229 ps_madd fp10, fp1, fp9, fp10;
230 ps_madd fp11, fp3, fp9, fp11;
231 ps_madd fp12, fp5, fp9, fp12;
232 ps_sum0 fp10, fp10, fp10, fp10; // fp10 <- x
233 ps_sum0 fp11, fp11, fp11, fp11; // fp11 <- y
234 ps_sum0 fp12, fp12, fp12, fp12; // fp12 <- z
235 ps_div fp13, fp14, fp13;
236
237 psq_lu fp8, 4(srcBase), 0, 0;
238 psq_lu fp9, 8(srcBase), 1, 0;
239
240 ps_mul fp10, fp10, fp13;
241 psq_stu fp10, 4(dstBase), 1, 0;
242 ps_mul fp11, fp11, fp13;
243 psq_stu fp11, 4(dstBase), 1, 0;
244 ps_mul fp12, fp12, fp13;
245 psq_stu fp12, 4(dstBase), 1, 0;
246
247 ps_mul fp13, fp6, fp8;
248
249 ps_mul fp10, fp0, fp8;
250 ps_mul fp11, fp2, fp8;
251 ps_madd fp13, fp7, fp9, fp13;
252 ps_mul fp12, fp4, fp8;
253 ps_sum0 fp13, fp13, fp13, fp13;
254
255 bdnz+ @loop
256
257 ps_madd fp10, fp1, fp9, fp10;
258 ps_madd fp11, fp3, fp9, fp11;
259 ps_madd fp12, fp5, fp9, fp12;
260 ps_sum0 fp10, fp10, fp10, fp10; // fp10 <- x
261 ps_sum0 fp11, fp11, fp11, fp11; // fp11 <- y
262 ps_sum0 fp12, fp12, fp12, fp12; // fp12 <- z
263 ps_div fp13, fp14, fp13;
264
265 ps_mul fp10, fp10, fp13;
266 psq_st fp10, 4(dstBase), 1, 0;
267 ps_mul fp11, fp11, fp13;
268 psq_st fp11, 8(dstBase), 1, 0;
269 ps_mul fp12, fp12, fp13;
270 psq_st fp12, 12(dstBase), 1, 0;
271
272 psq_l fp14, 16(rsp), 0, 0;
273 lfd fp14, 8(rsp);
274 addi rsp, rsp, 24;
275 blr;
276 }
277 #endif // GEKKO
278
279
280 /*---------------------------------------------------------------------*
281 Name: MTX44MultVecSR
282
283 Description: Multiplies a vector by a matrix 3x3 (Scaling and Rotation)
284 component.
285
286 m x src = dst.
287
288 Arguments: m Matrix.
289 src Source vector for multiply.
290 dst Resultant vector from multiply.
291 Note: OK if src == dst.
292
293 Return : None.
294 *---------------------------------------------------------------------*/
C_MTX44MultVecSR(const Mtx44 m,const Vec * src,Vec * dst)295 void C_MTX44MultVecSR ( const Mtx44 m, const Vec *src, Vec *dst )
296 {
297 Vec vTmp;
298
299 ASSERTMSG( (m != 0), MTX44_MULTVECSR_1 );
300 ASSERTMSG( (src != 0), MTX44_MULTVECSR_2 );
301 ASSERTMSG( (dst != 0), MTX44_MULTVECSR_3 );
302
303 // A Vec has a 4th implicit 'w' coordinate of 1
304 vTmp.x = m[0][0]*src->x + m[0][1]*src->y + m[0][2]*src->z;
305 vTmp.y = m[1][0]*src->x + m[1][1]*src->y + m[1][2]*src->z;
306 vTmp.z = m[2][0]*src->x + m[2][1]*src->y + m[2][2]*src->z;
307
308 // Copy back
309 dst->x = vTmp.x;
310 dst->y = vTmp.y;
311 dst->z = vTmp.z;
312 }
313
314 /*---------------------------------------------------------------------*
315 Paired-Single assembler version
316 *---------------------------------------------------------------------*
317 Note that this performs NO error checking.
318 *---------------------------------------------------------------------*/
319 #ifdef GEKKO
PSMTX44MultVecSR(const register Mtx44 m,const register Vec * src,register Vec * dst)320 asm void PSMTX44MultVecSR(
321 const register Mtx44 m,
322 const register Vec *src,
323 register Vec *dst
324 )
325 {
326
327 nofralloc
328 psq_l fp0, 0(m), 0, 0 // m[0][0], m[0][1] GQR0 = 0
329
330 // fp6 - x y
331 psq_l fp6, 0(src), 0, 0
332
333 psq_l fp2, 16(m), 0, 0 // m[1][0], m[1][1]
334
335
336 // fp8 = m00x m01y // next X
337 ps_mul fp8, fp0, fp6
338 psq_l fp4, 32(m), 0, 0 // m[2][0], m[2][1]
339
340 // fp10 = m10x m11y // next Y
341 ps_mul fp10, fp2, fp6
342 psq_l fp7, 8(src), 1, 0 // fp7 - z,1.0
343
344 // fp12 = m20x m21y // next Z
345 ps_mul fp12, fp4, fp6 // YYY last FP6 usage
346 psq_l fp3, 24(m), 0, 0 // m[1][2], m[1][3]
347
348 ps_sum0 fp8, fp8, fp8, fp8
349 psq_l fp5, 40(m), 0, 0 // m[2][2], m[2][3]
350
351 ps_sum0 fp10, fp10, fp10, fp10
352 psq_l fp1, 8(m), 0, 0 // m[0][2], m[0][3]
353
354 ps_sum0 fp12, fp12, fp12, fp12
355 ps_madd fp9, fp1, fp7, fp8
356 psq_st fp9, 0(dst), 1, 0 // Store X
357
358 ps_madd fp11, fp3, fp7, fp10
359 psq_st fp11, 4(dst), 1, 0 // Store Y
360
361 ps_madd fp13, fp5, fp7, fp12
362 psq_st fp13, 8(dst), 1, 0 // Store Z
363
364 blr
365
366 }
367 #endif // GEKKO
368
369 /*---------------------------------------------------------------------*
370 Name: MTX44MultVecArraySR
371
372 Description: Multiplies an array of vectors by a matrix 3x3
373 (Scaling and Rotation) component.
374
375 Arguments: m Matrix.
376 srcBase Start of source vector array.
377 dstBase Start of resultant vector array.
378 Note: OK if srcBase == dstBase.
379
380 count Number of vectors in srcBase, dstBase arrays
381 Note: Cannot check for array overflow
382
383 Return : None.
384 *---------------------------------------------------------------------*/
C_MTX44MultVecArraySR(const Mtx44 m,const Vec * srcBase,Vec * dstBase,u32 count)385 void C_MTX44MultVecArraySR ( const Mtx44 m, const Vec *srcBase, Vec *dstBase, u32 count )
386 {
387 u32 i;
388 Vec vTmp;
389
390 ASSERTMSG( (m != 0), MTX44_MULTVECARRAYSR_1 );
391 ASSERTMSG( (srcBase != 0), MTX44_MULTVECARRAYSR_2 );
392 ASSERTMSG( (dstBase != 0), MTX44_MULTVECARRAYSR_3 );
393
394 for ( i = 0; i < count; i ++ )
395 {
396 // Vec has a 4th implicit 'w' coordinate of 1
397 vTmp.x = m[0][0]*srcBase->x + m[0][1]*srcBase->y + m[0][2]*srcBase->z;
398 vTmp.y = m[1][0]*srcBase->x + m[1][1]*srcBase->y + m[1][2]*srcBase->z;
399 vTmp.z = m[2][0]*srcBase->x + m[2][1]*srcBase->y + m[2][2]*srcBase->z;
400
401 // Copy back
402 dstBase->x = vTmp.x;
403 dstBase->y = vTmp.y;
404 dstBase->z = vTmp.z;
405
406 srcBase++;
407 dstBase++;
408 }
409 }
410
411 /*---------------------------------------------------------------------*
412 Paired-Single assembler version
413 *---------------------------------------------------------------------*
414 Note that this performs NO error checking.
415 *---------------------------------------------------------------------*/
416 #ifdef GEKKO
PSMTX44MultVecArraySR(const register Mtx44 m,const register Vec * srcBase,register Vec * dstBase,register u32 count)417 asm void PSMTX44MultVecArraySR(
418 const register Mtx44 m,
419 const register Vec *srcBase,
420 register Vec *dstBase,
421 register u32 count
422 )
423 {
424 nofralloc;
425 psq_l fp0, 0(m), 0, 0; // fp0 <- m00, m01
426 addi count, count, -1;
427 psq_l fp6, 0(srcBase), 0, 0; // fp6 <- src.x, src.y
428 ps_mul fp8, fp0, fp6;
429 psq_l fp2, 16(m), 0, 0; // fp2 <- m10, m11
430 ps_mul fp9, fp2, fp6;
431 psq_l fp4, 32(m), 0, 0; // fp4 <- m20, m21
432 psq_lu fp7, 8(srcBase), 1, 0; // fp7 <- src.z, 1.0
433 ps_mul fp10, fp4, fp6;
434 psq_l fp1, 8(m), 1, 0; // fp1 <- m02, 1.0
435 mtctr count;
436 psq_l fp3, 24(m), 1, 0; // fp3 <- m12, 1.0
437 addi dstBase, dstBase, -4;
438 psq_l fp5, 40(m), 1, 0; // fp5 <- m22, 1.0
439
440 @loop:
441 ps_madd fp11, fp1, fp7, fp8;
442 psq_lu fp6, 4(srcBase), 0, 0;
443 ps_madd fp12, fp3, fp7, fp9;
444 ps_madd fp13, fp5, fp7, fp10;
445 psq_lu fp7, 8(srcBase), 1, 0;
446 ps_sum0 fp11, fp11, fp8, fp8;
447 psq_stu fp11, 4(dstBase), 1, 0;
448 ps_sum0 fp12, fp12, fp9, fp9;
449 psq_stu fp12, 4(dstBase), 1, 0;
450 ps_sum0 fp13, fp13, fp10, fp10;
451 psq_stu fp13, 4(dstBase), 1, 0;
452 ps_mul fp8, fp0, fp6;
453 ps_mul fp9, fp2, fp6;
454 ps_mul fp10, fp4, fp6;
455 bdnz+ @loop
456
457 ps_madd fp11, fp1, fp7, fp8;
458 ps_madd fp12, fp3, fp7, fp9;
459 ps_madd fp13, fp5, fp7, fp10;
460 ps_sum0 fp11, fp11, fp8, fp8;
461 psq_stu fp11, 4(dstBase), 1, 0;
462 ps_sum0 fp12, fp12, fp9, fp9;
463 psq_stu fp12, 4(dstBase), 1, 0;
464 ps_sum0 fp13, fp13, fp10, fp10;
465 psq_stu fp13, 4(dstBase), 1, 0;
466 blr;
467 }
468 #endif // GEKKO
469
470
471 /*===========================================================================*/
472