1 /*---------------------------------------------------------------------------*
2 Project: matrix vector Library
3 File: mtx44vec.c
4
5 Copyright 1998-2011 Nintendo. All rights reserved.
6
7 These coded instructions, statements, and computer programs contain
8 proprietary information of Nintendo of America Inc. and/or Nintendo
9 Company Ltd., and are protected by Federal copyright law. They may
10 not be disclosed to third parties or copied or duplicated in any form,
11 in whole or in part, without the prior written consent of Nintendo.
12
13 *---------------------------------------------------------------------------*/
14
15 #include <math.h>
16 #include <stdio.h>
17 #include <cafe/mtx.h>
18 #include <cafe/mtx/mtx44.h>
19 #include "mtxAssert.h"
20 #include "mtx44Assert.h"
21
22
23
24 /*---------------------------------------------------------------------*
25
26 MODEL SECTION
27
28 *---------------------------------------------------------------------*/
29 /* NOTE: Prototypes for these functions are defined in "mtx44ext.h". */
30
31 /*---------------------------------------------------------------------*
32 Name: MTX44MultVec
33
34 Description: multiplies a vector by a matrix.
35 m x src = dst.
36
37 Arguments: m matrix.
38 src source vector for multiply.
39 dst resultant vector from multiply.
40 note: ok if src == dst.
41
42 Return: none
43 *---------------------------------------------------------------------*/
44 /*---------------------------------------------------------------------*
45 C version
46 *---------------------------------------------------------------------*/
C_MTX44MultVec(MTX_CONST Mtx44 m,const Vec * src,Vec * dst)47 void C_MTX44MultVec ( MTX_CONST Mtx44 m, const Vec *src, Vec *dst )
48 {
49 Vec vTmp;
50 f32 w;
51
52 ASSERTMSG( (m != 0), MTX44_MULTVEC_1 );
53 ASSERTMSG( (src != 0), MTX44_MULTVEC_2 );
54 ASSERTMSG( (dst != 0), MTX44_MULTVEC_3 );
55
56 // a Vec has a 4th implicit 'w' coordinate of 1
57 vTmp.x = m[0][0]*src->x + m[0][1]*src->y + m[0][2]*src->z + m[0][3];
58 vTmp.y = m[1][0]*src->x + m[1][1]*src->y + m[1][2]*src->z + m[1][3];
59 vTmp.z = m[2][0]*src->x + m[2][1]*src->y + m[2][2]*src->z + m[2][3];
60 w = m[3][0]*src->x + m[3][1]*src->y + m[3][2]*src->z + m[3][3];
61 w = 1.0f/w;
62
63 // copy back
64 dst->x = vTmp.x * w;
65 dst->y = vTmp.y * w;
66 dst->z = vTmp.z * w;
67 }
68
69 #if !defined(WIN32) && !defined(WIN64)
70 /*---------------------------------------------------------------------*
71 Paired-Single intrinsics version
72 *---------------------------------------------------------------------*
73 Note that NO error checking is performed.
74 *---------------------------------------------------------------------*/
PSMTX44MultVec(MTX_CONST Mtx44 m,const Vec * src,Vec * dst)75 void PSMTX44MultVec ( MTX_CONST Mtx44 m, const Vec *src, Vec *dst )
76 {
77 f32x2 fp0, fp1, fp2, fp3, fp4, fp5, fp6, fp7, fp8, fp9, fp12, fp13; //fp10, fp11,
78
79 //psq_l fp0, 0(src), 0, 0; // fp0 <-src.x, src.y
80 //fp0[0] = src->x;
81 //fp0[1] = src->y;
82 fp0 = __PSQ_LX(src, 0, 0, 0);
83
84 //psq_l fp2, 48(m), 0, 0;
85 //fp2[0] = m[3][0];
86 //fp2[1] = m[3][1];
87 fp2 = __PSQ_LX(m, 48, 0, 0);
88
89 //psq_l fp1, 8(src), 1, 0; // fp1 <-src.z, 1.0
90 //fp1[0] = src->z;
91 //fp1[1] = 1.0F;
92 fp1 = __PSQ_LX(src, 8, 1, 0);
93
94 //ps_mul fp4, fp0, fp2;
95 fp4 = __PS_MUL(fp0, fp2);
96
97 //psq_l fp3, 56(m), 0, 0;
98 //fp3[0] = m[3][2];
99 //fp3[1] = m[3][3];
100 fp3 = __PSQ_LX(m, 56, 0, 0);
101
102 //ps_madd fp5, fp1, fp3, fp4;
103 fp5 = __PS_MADD(fp1, fp3, fp4);
104
105 //ps_merge11 fp12, fp1, fp1; // fp12 = 1.0, 1.0
106 fp12 = __PS_MERGE11(fp1, fp1);
107
108 //ps_sum0 fp13, fp5, fp5, fp5; // fp3 <- w
109 fp13 = __PS_SUM0(fp5, fp5, fp5);
110
111 //psq_l fp4, 0(m), 0, 0;
112 //fp4[0] = m[0][0];
113 //fp4[1] = m[0][1];
114 fp4 = __PSQ_LX(m, 0, 0, 0);
115
116 //ps_merge00 fp13, fp13, fp13;
117 fp13 = __PS_MERGE00(fp13, fp13);
118
119 //psq_l fp5, 8(m), 0, 0;
120 //fp5[0] = m[0][2];
121 //fp5[1] = m[0][3];
122 fp5 = __PSQ_LX(m, 8, 0, 0);
123
124 //ps_div fp13, fp12, fp13; // fp13 <- 1/w
125 fp13 = __PS_DIV(fp12, fp13);
126
127 //psq_l fp6, 16(m), 0, 0;
128 //fp6[0] = m[1][0];
129 //fp6[1] = m[1][1];
130 fp6 = __PSQ_LX(m, 16, 0, 0);
131
132 //psq_l fp7, 24(m), 0, 0;
133 //fp7[0] = m[1][2];
134 //fp7[1] = m[1][3];
135 fp7 = __PSQ_LX(m, 24, 0, 0);
136
137 //psq_l fp8, 32(m), 0, 0;
138 //fp8[0] = m[2][0];
139 //fp8[1] = m[2][1];
140 fp8 = __PSQ_LX(m, 32, 0, 0);
141
142 //psq_l fp9, 40(m), 0, 0;
143 //fp9[0] = m[2][2];
144 //fp9[1] = m[2][3];
145 fp9 = __PSQ_LX(m, 40, 0, 0);
146
147 //ps_mul fp4, fp0, fp4;
148 fp4 = __PS_MUL(fp0, fp4);
149
150 //ps_madd fp2, fp1, fp5, fp4;
151 fp2 = __PS_MADD(fp1, fp5, fp4);
152
153 //ps_mul fp6, fp0, fp6;
154 fp6 = __PS_MUL(fp0, fp6);
155
156 //ps_madd fp3, fp1, fp7, fp6;
157 fp3 = __PS_MADD(fp1, fp7, fp6);
158
159 //ps_mul fp8, fp0, fp8;
160 fp8 = __PS_MUL(fp0, fp8);
161
162 //ps_sum0 fp2, fp2, fp2, fp2; // fp2 <- dst.x, --
163 fp2 = __PS_SUM0(fp2, fp2, fp2);
164
165 //ps_madd fp9, fp1, fp9, fp8;
166 fp9 = __PS_MADD(fp1, fp9, fp8);
167
168 //ps_sum1 fp2, fp3, fp2, fp3; // fp2 <- dst.x, dst.y
169 fp2 = __PS_SUM1(fp3, fp2, fp3);
170
171 //ps_sum0 fp3, fp9, fp9, fp9;
172 fp3 = __PS_SUM0(fp9, fp9, fp9);
173
174 //ps_mul fp2, fp2, fp13;
175 fp2 = __PS_MUL(fp2, fp13);
176
177 //psq_st fp2, 0(dst), 0, 0;
178 //dst->x = fp2[0];
179 //dst->y = fp2[1];
180 __PSQ_STX(dst, 0, fp2, 0, 0);
181
182 //ps_mul fp3, fp3, fp13;
183 fp3 = __PS_MUL(fp3, fp13);
184
185 //psq_st fp3, 8(dst), 1, 0;
186 //dst->z = fp3[0];
187 __PSQ_STX(dst, 8, fp3, 1, 0);
188 }
189 #endif
190
191 /*---------------------------------------------------------------------*
192 Name: MTX44MultVecArray
193
194 Description: multiplies an array of vectors by a matrix.
195
196
197 Arguments: m matrix.
198 srcBase start of source vector array.
199 dstBase start of resultant vector array.
200 note: ok if srcBase == dstBase.
201 count number of vectors in srcBase, dstBase arrays
202 note: cannot check for array overflow
203
204 Return: none
205 *---------------------------------------------------------------------*/
206 /*---------------------------------------------------------------------*
207 C version
208 *---------------------------------------------------------------------*/
C_MTX44MultVecArray(MTX_CONST Mtx44 m,const Vec * srcBase,Vec * dstBase,u32 count)209 void C_MTX44MultVecArray ( MTX_CONST Mtx44 m, const Vec *srcBase, Vec *dstBase, u32 count )
210 {
211 u32 i;
212 Vec vTmp;
213 f32 w;
214
215 ASSERTMSG( (m != 0), MTX44_MULTVECARRAY_1 );
216 ASSERTMSG( (srcBase != 0), MTX44_MULTVECARRAY_2 );
217 ASSERTMSG( (dstBase != 0), MTX44_MULTVECARRAY_3 );
218
219 for(i=0; i< count; i++)
220 {
221 // Vec has a 4th implicit 'w' coordinate of 1
222 vTmp.x = m[0][0]*srcBase->x + m[0][1]*srcBase->y + m[0][2]*srcBase->z + m[0][3];
223 vTmp.y = m[1][0]*srcBase->x + m[1][1]*srcBase->y + m[1][2]*srcBase->z + m[1][3];
224 vTmp.z = m[2][0]*srcBase->x + m[2][1]*srcBase->y + m[2][2]*srcBase->z + m[2][3];
225 w = m[3][0]*srcBase->x + m[3][1]*srcBase->y + m[3][2]*srcBase->z + m[3][3];
226 w = 1.0f/w;
227
228 // copy back
229 dstBase->x = vTmp.x * w;
230 dstBase->y = vTmp.y * w;
231 dstBase->z = vTmp.z * w;
232
233 srcBase++;
234 dstBase++;
235 }
236 }
237
238 #if !defined(WIN32) && !defined(WIN64)
239 /*---------------------------------------------------------------------*
240 Paired-Single intrinsics version
241 *---------------------------------------------------------------------*
242 Note that NO error checking is performed.
243 *---------------------------------------------------------------------*/
PSMTX44MultVecArray(MTX_CONST Mtx44 m,const Vec * srcBase,Vec * dstBase,u32 count)244 void PSMTX44MultVecArray ( MTX_CONST Mtx44 m, const Vec *srcBase, Vec *dstBase, u32 count )
245 {
246 u32 i;
247
248 for(i=0; i< count; i++)
249 {
250 PSMTX44MultVec(m, srcBase, dstBase);
251
252 srcBase++;
253 dstBase++;
254 }
255 }
256 #endif
257
258
259 /*---------------------------------------------------------------------*
260 Name: MTX44MultVecSR
261
262 Description: multiplies a vector by a matrix 3x3 (Scaling and Rotation)
263 component.
264
265 m x src = dst.
266
267 Arguments: m matrix.
268 src source vector for multiply.
269 dst resultant vector from multiply.
270 note: ok if src == dst.
271
272 Return: none
273 *---------------------------------------------------------------------*/
C_MTX44MultVecSR(MTX_CONST Mtx44 m,const Vec * src,Vec * dst)274 void C_MTX44MultVecSR ( MTX_CONST Mtx44 m, const Vec *src, Vec *dst )
275 {
276 Vec vTmp;
277
278 ASSERTMSG( (m != 0), MTX44_MULTVECSR_1 );
279 ASSERTMSG( (src != 0), MTX44_MULTVECSR_2 );
280 ASSERTMSG( (dst != 0), MTX44_MULTVECSR_3 );
281
282 // a Vec has a 4th implicit 'w' coordinate of 1
283 vTmp.x = m[0][0]*src->x + m[0][1]*src->y + m[0][2]*src->z;
284 vTmp.y = m[1][0]*src->x + m[1][1]*src->y + m[1][2]*src->z;
285 vTmp.z = m[2][0]*src->x + m[2][1]*src->y + m[2][2]*src->z;
286
287 // copy back
288 dst->x = vTmp.x;
289 dst->y = vTmp.y;
290 dst->z = vTmp.z;
291 }
292
293 #if !defined(WIN32) && !defined(WIN64)
294 /*---------------------------------------------------------------------*
295 Paired-Single intrinsics version
296 *---------------------------------------------------------------------*
297 Note that this performs NO error checking.
298 *---------------------------------------------------------------------*/
PSMTX44MultVecSR(MTX_CONST Mtx44 m,const Vec * src,Vec * dst)299 void PSMTX44MultVecSR ( MTX_CONST Mtx44 m, const Vec *src, Vec *dst )
300 {
301 f32x2 fp0, fp1, fp2, fp3, fp4, fp5, fp6, fp7, fp8, fp9, fp10, fp11, fp12, fp13;
302
303 //psq_l fp0, 0(m), 0, 0 // m[0][0], m[0][1] GQR0 = 0
304 //fp0[0] = m[0][0];
305 //fp0[1] = m[0][1];
306 fp0 = __PSQ_LX(m, 0, 0, 0);
307
308 // fp6 - x y
309 //psq_l fp6, 0(src), 0, 0
310 //fp6[0] = src->x;
311 //fp6[1] = src->y;
312 fp6 = __PSQ_LX(src, 0, 0, 0);
313
314 //psq_l fp2, 16(m), 0, 0 // m[1][0], m[1][1]
315 //fp2[0] = m[1][0];
316 //fp2[1] = m[1][1];
317 fp2 = __PSQ_LX(m, 16, 0, 0);
318
319 // fp8 = m00x m01y // next X
320 //ps_mul fp8, fp0, fp6
321 fp8 = __PS_MUL(fp0, fp6);
322
323 //psq_l fp4, 32(m), 0, 0 // m[2][0], m[2][1]
324 //fp4[0] = m[2][0];
325 //fp4[1] = m[2][1];
326 fp4 = __PSQ_LX(m, 32, 0, 0);
327
328 // fp10 = m10x m11y // next Y
329 //ps_mul fp10, fp2, fp6
330 fp10 = __PS_MUL(fp2, fp6);
331
332 //psq_l fp7, 8(src), 1, 0 // fp7 - z,1.0
333 //fp7[0] = src->z;
334 //fp7[1] = 1.0F;
335 fp7 = __PSQ_LX(src, 8, 1, 0);
336
337 // fp12 = m20x m21y // next Z
338 //ps_mul fp12, fp4, fp6
339 fp12 = __PS_MUL(fp4, fp6);
340
341 //psq_l fp3, 24(m), 0, 0 // m[1][2], m[1][3]
342 //fp3[0] = m[1][2];
343 //fp3[1] = m[1][3];
344 fp3 = __PSQ_LX(m, 24, 0, 0);
345
346 //ps_sum0 fp8, fp8, fp8, fp8
347 fp8 = __PS_SUM0(fp8, fp8, fp8);
348
349 //psq_l fp5, 40(m), 0, 0 // m[2][2], m[2][3]
350 //fp5[0] = m[2][2];
351 //fp5[1] = m[2][3];
352 fp5 = __PSQ_LX(m, 40, 0, 0);
353
354 //ps_sum0 fp10, fp10, fp10, fp10
355 fp10 = __PS_SUM0(fp10, fp10, fp10);
356
357 //psq_l fp1, 8(m), 0, 0 // m[0][2], m[0][3]
358 //fp1[0] = m[0][2];
359 //fp1[1] = m[0][3];
360 fp1 = __PSQ_LX(m, 8, 0, 0);
361
362 //ps_sum0 fp12, fp12, fp12, fp12
363 fp12 = __PS_SUM0(fp12, fp12, fp12);
364
365 //ps_madd fp9, fp1, fp7, fp8
366 fp9 = __PS_MADD(fp1, fp7, fp8);
367
368 //psq_st fp9, 0(dst), 1, 0 // store X
369 //dst->x = fp9[0];
370 __PSQ_STX(dst, 0, fp9, 1, 0);
371
372 //ps_madd fp11, fp3, fp7, fp10
373 fp11 = __PS_MADD(fp3, fp7, fp10);
374
375 //psq_st fp11, 4(dst), 1, 0 // store Y
376 //dst->y = fp11[0];
377 __PSQ_STX(dst, 4, fp11, 1, 0);
378
379 //ps_madd fp13, fp5, fp7, fp12
380 fp13 = __PS_MADD(fp5, fp7, fp12);
381
382 //psq_st fp13, 8(dst), 1, 0 // sore Z
383 //dst->z = fp13[0];
384 __PSQ_STX(dst, 8, fp13, 1, 0);
385 }
386 #endif
387
388 /*---------------------------------------------------------------------*
389 Name: MTX44MultVecArraySR
390
391 Description: multiplies an array of vectors by a matrix 3x3
392 (Scaling and Rotation) component.
393
394 Arguments: m matrix.
395 srcBase start of source vector array.
396 dstBase start of resultant vector array.
397 note: ok if srcBase == dstBase.
398
399 count number of vectors in srcBase, dstBase arrays
400 note: cannot check for array overflow
401
402 Return: none
403 *---------------------------------------------------------------------*/
C_MTX44MultVecArraySR(MTX_CONST Mtx44 m,const Vec * srcBase,Vec * dstBase,u32 count)404 void C_MTX44MultVecArraySR ( MTX_CONST Mtx44 m, const Vec *srcBase, Vec *dstBase, u32 count )
405 {
406 u32 i;
407 Vec vTmp;
408
409 ASSERTMSG( (m != 0), MTX44_MULTVECARRAYSR_1 );
410 ASSERTMSG( (srcBase != 0), MTX44_MULTVECARRAYSR_2 );
411 ASSERTMSG( (dstBase != 0), MTX44_MULTVECARRAYSR_3 );
412
413 for ( i = 0; i < count; i ++ )
414 {
415 // Vec has a 4th implicit 'w' coordinate of 1
416 vTmp.x = m[0][0]*srcBase->x + m[0][1]*srcBase->y + m[0][2]*srcBase->z;
417 vTmp.y = m[1][0]*srcBase->x + m[1][1]*srcBase->y + m[1][2]*srcBase->z;
418 vTmp.z = m[2][0]*srcBase->x + m[2][1]*srcBase->y + m[2][2]*srcBase->z;
419
420 // copy back
421 dstBase->x = vTmp.x;
422 dstBase->y = vTmp.y;
423 dstBase->z = vTmp.z;
424
425 srcBase++;
426 dstBase++;
427 }
428 }
429
430 #if !defined(WIN32) && !defined(WIN64)
431 /*---------------------------------------------------------------------*
432 Paired-Single intrinsics version
433 *---------------------------------------------------------------------*
434 Note that this performs NO error checking.
435 *---------------------------------------------------------------------*/
PSMTX44MultVecArraySR(MTX_CONST Mtx44 m,const Vec * srcBase,Vec * dstBase,u32 count)436 void PSMTX44MultVecArraySR ( MTX_CONST Mtx44 m, const Vec *srcBase, Vec *dstBase, u32 count )
437 {
438 u32 i;
439
440 for ( i = 0; i < count; i ++ )
441 {
442 PSMTX44MultVecSR(m, srcBase, dstBase);
443 srcBase++;
444 dstBase++;
445 }
446 }
447 #endif
448
449
450 /*===========================================================================*/
451