1 /*---------------------------------------------------------------------------*
2 Project: matrix vector Library
3 File: quat.c
4
5 Copyright 1998-2011 Nintendo. All rights reserved.
6
7 These coded instructions, statements, and computer programs contain
8 proprietary information of Nintendo of America Inc. and/or Nintendo
9 Company Ltd., and are protected by Federal copyright law. They may
10 not be disclosed to third parties or copied or duplicated in any form,
11 in whole or in part, without the prior written consent of Nintendo.
12
13 *---------------------------------------------------------------------------*/
14
15 #include <math.h>
16 #include <stdio.h>
17 #include <cafe/mtx.h>
18 #include <cafe/mtx/mtx44.h>
19 #include "mtxAssert.h"
20 #include "mtx44Assert.h"
21
22 /*---------------------------------------------------------------------*
23 Constants
24 *---------------------------------------------------------------------*/
25 static const f32x2 c00 = {0.0F, 0.0F};
26 //static const f32x2 c01 = {0.0F, 1.0F};
27 //static const f32x2 c10 = {1.0F, 0.0F};
28 static const f32x2 c11 = {1.0F, 1.0F};
29 static const f32x2 c22 = {2.0F, 2.0F};
30 static const f32x2 c33 = {3.0F, 3.0F};
31 static const f32x2 c0505 = {0.5F, 0.5F};
32 static const f32x2 epsilon = {QUAT_EPSILON, QUAT_EPSILON};
33
34 /*---------------------------------------------------------------------------*
35 Name: QUATAdd
36
37 Description: Returns the sum of two quaternions.
38
39 Arguments: p - first quaternion
40 q - second quaternion
41 r - resultant quaternion p+q
42
43 Returns: none
44 *---------------------------------------------------------------------------*/
45 /*---------------------------------------------------------------------------*
46 C version
47 *---------------------------------------------------------------------------*/
C_QUATAdd(const Quaternion * p,const Quaternion * q,Quaternion * r)48 void C_QUATAdd( const Quaternion *p, const Quaternion *q, Quaternion *r )
49 {
50 ASSERTMSG( ( p != 0 ), QUAT_ADD_1 );
51 ASSERTMSG( ( q != 0 ), QUAT_ADD_2 );
52 ASSERTMSG( ( r != 0 ), QUAT_ADD_3 );
53
54 r->x = p->x + q->x;
55 r->y = p->y + q->y;
56 r->z = p->z + q->z;
57 r->w = p->w + q->w;
58 }
59
60 #if !defined(WIN32) && !defined(WIN64)
61 /*---------------------------------------------------------------------------*
62 Paired-Single intrinsics version
63 *---------------------------------------------------------------------------*
64 Note that this performs NO error checking.
65 *---------------------------------------------------------------------------*/
PSQUATAdd(const Quaternion * p,const Quaternion * q,Quaternion * r)66 void PSQUATAdd( const Quaternion *p, const Quaternion *q, Quaternion *r )
67 {
68 f32x2 pxy, qxy, rxy, pzw, qzw, rzw;
69
70 //psq_l pxy, 0(p), 0, 0
71 //pxy[0] = p->x;
72 //pxy[1] = p->y;
73 pxy = __PSQ_LX(p, 0, 0, 0);
74
75 //psq_l qxy, 0(q), 0, 0
76 //qxy[0] = q->x;
77 //qxy[1] = q->y;
78 qxy = __PSQ_LX(q, 0, 0, 0);
79
80 //ps_add rxy, pxy, qxy
81 rxy = __PS_ADD(pxy, qxy);
82
83 //psq_st rxy, 0(r), 0, 0
84 //r->x = rxy[0];
85 //r->y = rxy[1];
86 __PSQ_STX(r, 0, rxy, 0, 0);
87
88 //psq_l pzw, 8(p), 0, 0
89 //pzw[0] = p->z;
90 //pzw[1] = p->w;
91 pzw = __PSQ_LX(p, 8, 0, 0);
92
93 //psq_l qzw, 8(q), 0, 0
94 //qzw[0] = q->z;
95 //qzw[1] = q->w;
96 qzw = __PSQ_LX(q, 8, 0, 0);
97
98 //ps_add rzw, pzw, qzw
99 rzw = __PS_ADD(pzw, qzw);
100
101 //psq_st rzw, 8(r), 0, 0
102 //r->z = rzw[0];
103 //r->w = rzw[1];
104 __PSQ_STX(r, 8, rzw, 0, 0);
105 }
106 #endif
107
108 /*---------------------------------------------------------------------------*
109 Name: QUATSubtract
110
111 Description: Returns the difference of two quaternions p-q.
112
113 Arguments: p - left quaternion
114 q - right quaternion
115 r - resultant quaternion difference p-q
116
117 Returns: none
118 *---------------------------------------------------------------------------*/
119 /*---------------------------------------------------------------------------*
120 C version
121 *---------------------------------------------------------------------------*/
C_QUATSubtract(const Quaternion * p,const Quaternion * q,Quaternion * r)122 void C_QUATSubtract( const Quaternion *p, const Quaternion *q, Quaternion *r )
123 {
124 ASSERTMSG( ( p != 0 ), QUAT_SUBTRACT_1 );
125 ASSERTMSG( ( q != 0 ), QUAT_SUBTRACT_2 );
126 ASSERTMSG( ( r != 0 ), QUAT_SUBTRACT_3 );
127
128 r->x = p->x - q->x;
129 r->y = p->y - q->y;
130 r->z = p->z - q->z;
131 r->w = p->w - q->w;
132 }
133
134 #if !defined(WIN32) && !defined(WIN64)
135 /*---------------------------------------------------------------------------*
136 Paired-Single intrinsics version
137 *---------------------------------------------------------------------------*
138 Note that this performs NO error checking.
139 *---------------------------------------------------------------------------*/
PSQUATSubtract(const Quaternion * p,const Quaternion * q,Quaternion * r)140 void PSQUATSubtract( const Quaternion *p, const Quaternion *q, Quaternion *r )
141 {
142 f32x2 pxy, qxy, rxy, pzw, qzw, rzw;
143
144 //psq_l pxy, 0(p), 0, 0
145 //pxy[0] = p->x;
146 //pxy[1] = p->y;
147 pxy = __PSQ_LX(p, 0, 0, 0);
148
149 //psq_l qxy, 0(q), 0, 0
150 //qxy[0] = q->x;
151 //qxy[1] = q->y;
152 qxy = __PSQ_LX(q, 0, 0, 0);
153
154 //ps_sub rxy, pxy, qxy
155 rxy = __PS_SUB(pxy, qxy);
156
157 //psq_st rxy, 0(r), 0, 0
158 //r->x = rxy[0];
159 //r->y = rxy[1];
160 __PSQ_STX(r, 0, rxy, 0, 0);
161
162 //psq_l pzw, 8(p), 0, 0
163 //pzw[0] = p->z;
164 //pzw[1] = p->w;
165 pzw = __PSQ_LX(p, 8, 0, 0);
166
167 //psq_l qzw, 8(q), 0, 0
168 //qzw[0] = q->z;
169 //qzw[1] = q->w;
170 qzw = __PSQ_LX(q, 8, 0, 0);
171
172 //ps_sub rzw, pzw, qzw
173 rzw = __PS_SUB(pzw, qzw);
174
175 //psq_st rzw, 8(r), 0, 0
176 //r->z = rzw[0];
177 //r->w = rzw[1];
178 __PSQ_STX(r, 8, rzw, 0, 0);
179
180 }
181 #endif
182
183 /*---------------------------------------------------------------------------*
184 Name: QUATMultiply
185
186 Description: Returns the product of two quaternions. The order of
187 multiplication is important. (p*q != q*p)
188
189 Arguments: p - left quaternion
190 q - right quaternion
191 pq - resultant quaternion product p*q
192
193 Returns: none
194 *---------------------------------------------------------------------------*/
195 /*---------------------------------------------------------------------------*
196 C version
197 *---------------------------------------------------------------------------*/
C_QUATMultiply(const Quaternion * p,const Quaternion * q,Quaternion * pq)198 void C_QUATMultiply( const Quaternion *p, const Quaternion *q, Quaternion *pq )
199 {
200 Quaternion *r;
201 Quaternion pqTmp;
202
203 ASSERTMSG( ( p != 0 ), QUAT_MULTIPLY_1 );
204 ASSERTMSG( ( q != 0 ), QUAT_MULTIPLY_2 );
205 ASSERTMSG( ( pq != 0 ), QUAT_MULTIPLY_3 );
206
207 if ( p == pq || q == pq )
208 {
209 r = &pqTmp;
210 }
211 else
212 {
213 r = pq;
214 }
215
216 r->w = p->w*q->w - p->x*q->x - p->y*q->y - p->z*q->z;
217 r->x = p->w*q->x + p->x*q->w + p->y*q->z - p->z*q->y;
218 r->y = p->w*q->y + p->y*q->w + p->z*q->x - p->x*q->z;
219 r->z = p->w*q->z + p->z*q->w + p->x*q->y - p->y*q->x;
220
221 if ( r == &pqTmp )
222 {
223 *pq = pqTmp;
224 }
225 }
226
227 #if !defined(WIN32) && !defined(WIN64)
228 /*---------------------------------------------------------------------------*
229 Paired-Single intrinsics version
230 *---------------------------------------------------------------------------*
231 Note that this performs NO error checking.
232 *---------------------------------------------------------------------------*/
233
PSQUATMultiply(const Quaternion * p,const Quaternion * q,Quaternion * pq)234 void PSQUATMultiply( const Quaternion *p, const Quaternion *q, Quaternion *pq )
235 {
236 f32x2 pxy, pzw, qxy, qzw;
237 f32x2 pnxy, pnzw, pnxny, pnznw;
238 f32x2 rxy, rzw, sxy, szw;
239
240 // [px][py] : Load
241 //psq_l pxy, 0(p), 0, 0
242 //pxy[0] = p->x;
243 //pxy[1] = p->y;
244 pxy = __PSQ_LX(p, 0, 0, 0);
245
246 // [pz][pw] : Load
247 //psq_l pzw, 8(p), 0, 0
248 //pzw[0] = p->z;
249 //pzw[1] = p->w;
250 pzw = __PSQ_LX(p, 8, 0, 0);
251
252 // [qx][qy] : Load
253 //psq_l qxy, 0(q), 0, 0
254 //qxy[0] = q->x;
255 //qxy[1] = q->y;
256 qxy = __PSQ_LX(q, 0, 0, 0);
257
258 // [-px][-py]
259 //ps_neg pnxny, pxy
260 pnxny = __PS_NEG(pxy);
261
262 // [qz][qw] : Load
263 //psq_l qzw, 8(q), 0, 0
264 //qzw[0] = q->z;
265 //qzw[1] = q->w;
266 qzw = __PSQ_LX(q, 8, 0, 0);
267
268 // [-pz][-pw]
269 //ps_neg pnznw, pzw
270 pnznw = __PS_NEG(pzw);
271
272 // [-px][py]
273 //ps_merge01 pnxy, pnxny, pxy
274 pnxy = __PS_MERGE01(pnxny, pxy);
275
276 // [pz*qx][pw*qx]
277 //ps_muls0 rxy, pzw, qxy
278 rxy = __PS_MULS0(pzw, qxy);
279
280 // [-px*qx][-py*qx]
281 //ps_muls0 rzw, pnxny, qxy
282 rzw = __PS_MULS0(pnxny, qxy);
283
284 // [-pz][pw]
285 //ps_merge01 pnzw, pnznw, pzw
286 pnzw = __PS_MERGE01(pnznw, pzw);
287
288 // [-px*qy][py*qy]
289 //ps_muls1 szw, pnxy, qxy
290 szw = __PS_MULS1(pnxy, qxy);
291
292 // [pz*qx-px*qz][pw*qx+py*qz]
293 //ps_madds0 rxy, pnxy, qzw, rxy
294 rxy = __PS_MADDS0(pnxy, qzw, rxy);
295
296 // [-pz*qy][pw*qy]
297 //ps_muls1 sxy, pnzw, qxy
298 sxy = __PS_MULS1(pnzw, qxy);
299
300 // [-px*qx-pz*qz][-py*qx+pw*qz]
301 //ps_madds0 rzw, pnzw, qzw, rzw
302 rzw = __PS_MADDS0(pnzw, qzw, rzw);
303
304 // [-px*qy-pz*qw][py*qy-pw*qw]
305 //ps_madds1 szw, pnznw, qzw, szw
306 szw = __PS_MADDS1(pnznw, qzw, szw);
307
308 // [pw*qx+py*qz][pz*qx-px*qz]
309 //ps_merge10 rxy, rxy, rxy
310 rxy = __PS_MERGE10(rxy, rxy);
311
312 // [-pz*qy+px*qw][pw*qy+py*qw]
313 //ps_madds1 sxy, pxy, qzw, sxy
314 sxy = __PS_MADDS1(pxy, qzw, sxy);
315
316 // [-py*qx+pw*qz][-px*qx-pz*qz]
317 //ps_merge10 rzw, rzw, rzw
318 rzw = __PS_MERGE10(rzw, rzw);
319
320 // [pw*qx+py*qz-pz*qy+px*qw][pz*qx-px*qz+pw*qy+py*qw] : [pqx][pqy]
321 //ps_add rxy, rxy, sxy
322 rxy = __PS_ADD(rxy, sxy);
323
324 // [pqx][pqy] : Store
325 //psq_st rxy, 0(pq), 0, 0
326 //pq->x = rxy[0];
327 //pq->y = rxy[1];
328 __PSQ_STX(pq, 0, rxy, 0, 0);
329
330 // [-py*qx+pw*qz+px*qy+pz*qw][-px*qx-pz*qz-py*qy+pw*qw] : [pqz][pqw]
331 //ps_sub rzw, rzw, szw
332 rzw = __PS_SUB(rzw, szw);
333
334 // [pqz][pqw] : Store
335 //psq_st rzw, 8(pq), 0, 0
336 //pq->z = rzw[0];
337 //pq->w = rzw[1];
338 __PSQ_STX(pq, 8, rzw, 0, 0);
339 }
340 #endif
341
342 /*---------------------------------------------------------------------------*
343 Name: QUATScale
344
345 Description: Scales a quaternion.
346
347 Arguments: q - quaternion
348 r - resultant scaled quaternion
349 scale - float to scale by
350
351 Returns: none
352 *---------------------------------------------------------------------------*/
353 /*---------------------------------------------------------------------------*
354 C version
355 *---------------------------------------------------------------------------*/
C_QUATScale(const Quaternion * q,Quaternion * r,f32 scale)356 void C_QUATScale( const Quaternion *q, Quaternion *r, f32 scale )
357 {
358 ASSERTMSG( ( q != 0 ), QUAT_SCALE_1 );
359 ASSERTMSG( ( r != 0 ), QUAT_SCALE_2 );
360
361 r->x = q->x * scale;
362 r->y = q->y * scale;
363 r->z = q->z * scale;
364 r->w = q->w * scale;
365 }
366
367 #if !defined(WIN32) && !defined(WIN64)
368 /*---------------------------------------------------------------------------*
369 Paired-Single intrinsics version
370 *---------------------------------------------------------------------------*
371 Note that this performs NO error checking.
372 *---------------------------------------------------------------------------*/
373
PSQUATScale(const Quaternion * q,Quaternion * r,f32 scale)374 void PSQUATScale( const Quaternion *q, Quaternion *r, f32 scale )
375 {
376 f32x2 rxy, rzw;
377 f32x2 scale2 = {scale, scale};
378
379 //psq_l rxy, 0(q), 0, 0
380 //rxy[0] = q->x;
381 //rxy[1] = q->y;
382 rxy = __PSQ_LX(q, 0, 0, 0);
383
384 //psq_l rzw, 8(q), 0, 0
385 //rzw[0] = q->z;
386 //rzw[1] = q->w;
387 rzw = __PSQ_LX(q, 8, 0, 0);
388
389 //ps_muls0 rxy, rxy, scale
390 rxy = __PS_MULS0(rxy, scale2);
391
392 //psq_st rxy, 0(r), 0, 0
393 //r->x = rxy[0];
394 //r->y = rxy[1];
395 __PSQ_STX(r, 0, rxy, 0, 0);
396
397 //ps_muls0 rzw, rzw, scale
398 rzw = __PS_MULS0(rzw, scale2);
399
400 //psq_st rzw, 8(r), 0, 0
401 //r->z = rzw[0];
402 //r->w = rzw[1];
403 __PSQ_STX(r, 8, rzw, 0, 0);
404 }
405 #endif
406
407 /*---------------------------------------------------------------------------*
408 Name: QUATDotProduct
409
410 Description: Returns the dot product of the two quaternions.
411
412 Arguments: p - first quaternion
413 q - second quaternion
414
415 Returns: Dot product
416 *---------------------------------------------------------------------------*/
417 /*---------------------------------------------------------------------------*
418 C version
419 *---------------------------------------------------------------------------*/
C_QUATDotProduct(const Quaternion * p,const Quaternion * q)420 f32 C_QUATDotProduct( const Quaternion *p, const Quaternion *q )
421 {
422 ASSERTMSG( ( p != 0 ), QUAT_DOTPRODUCT_1 );
423 ASSERTMSG( ( q != 0 ), QUAT_DOTPRODUCT_2 );
424
425 return (q->x*p->x + q->y*p->y + q->z*p->z + q->w*p->w);
426 }
427
428 #if !defined(WIN32) && !defined(WIN64)
429 /*---------------------------------------------------------------------------*
430 Paired-Single intrinsics version
431 *---------------------------------------------------------------------------*
432 Note that this performs NO error checking.
433 *---------------------------------------------------------------------------*/
434
PSQUATDotProduct(const Quaternion * p,const Quaternion * q)435 f32 PSQUATDotProduct( const Quaternion *p, const Quaternion *q )
436 {
437 f32x2 pxy, pzw, qxy, qzw, dp;
438
439 //psq_l pxy, 0(p), 0, 0
440 //pxy[0] = p->x;
441 //pxy[1] = p->y;
442 pxy = __PSQ_LX(p, 0, 0, 0);
443
444 //psq_l qxy, 0(q), 0, 0
445 //qxy[0] = q->x;
446 //qxy[1] = q->y;
447 qxy = __PSQ_LX(q, 0, 0, 0);
448
449 //ps_mul dp, pxy, qxy
450 dp = __PS_MUL(pxy, qxy);
451
452 //psq_l pzw, 8(p), 0, 0
453 //pzw[0] = p->z;
454 //pzw[1] = p->w;
455 pzw = __PSQ_LX(p, 8, 0, 0);
456
457 //psq_l qzw, 8(q), 0, 0
458 //qzw[0] = q->z;
459 //qzw[1] = q->w;
460 qzw = __PSQ_LX(q, 8, 0, 0);
461
462 //ps_madd dp, pzw, qzw, dp
463 dp = __PS_MADD(pzw, qzw, dp);
464
465 //ps_sum0 dp, dp, dp, dp
466 dp = __PS_SUM0(dp, dp, dp);
467
468 return (f32)dp[0];
469 }
470 #endif
471
472 /*---------------------------------------------------------------------------*
473 Name: QUATNormalize
474
475 Description: Normalizes a quaternion
476
477 Arguments: src - the source quaternion
478 unit - resulting unit quaternion
479
480 Returns: none
481 *---------------------------------------------------------------------------*/
482 /*---------------------------------------------------------------------------*
483 C version
484 *---------------------------------------------------------------------------*/
C_QUATNormalize(const Quaternion * src,Quaternion * unit)485 void C_QUATNormalize( const Quaternion *src, Quaternion *unit )
486 {
487 f32 mag;
488
489 ASSERTMSG( ( src != 0 ), QUAT_NORMALIZE_1 );
490 ASSERTMSG( ( unit != 0 ), QUAT_NORMALIZE_2 );
491
492 mag = (src->x * src->x) + (src->y * src->y) + (src->z * src->z) + (src->w * src->w);
493
494 if ( mag >= QUAT_EPSILON )
495 {
496 mag = 1.0F / sqrtf(mag);
497
498 unit->x = src->x * mag;
499 unit->y = src->y * mag;
500 unit->z = src->z * mag;
501 unit->w = src->w * mag;
502 }
503 else
504 {
505 unit->x = unit->y = unit->z = unit->w = 0.0F;
506 }
507 }
508
509 #if !defined(WIN32) && !defined(WIN64)
510 /*---------------------------------------------------------------------------*
511 Paired-Single intrinsics version
512 *---------------------------------------------------------------------------*
513 Note that this performs NO error checking.
514 *---------------------------------------------------------------------------*/
515
PSQUATNormalize(const Quaternion * src,Quaternion * unit)516 void PSQUATNormalize( const Quaternion *src, Quaternion *unit )
517 {
518 f32x2 sxy, szw;
519 f32x2 mag, rsqmag, diff;
520 f32x2 nwork0, nwork1;
521
522 //psq_l sxy, 0(src), 0, 0
523 //sxy[0] = src->x;
524 //sxy[1] = src->y;
525 sxy = __PSQ_LX(src, 0, 0, 0);
526
527 // mag = [x*x][y*y]
528 //ps_mul mag, sxy, sxy
529 mag = __PS_MUL(sxy, sxy);
530
531 //psq_l szw, 8(src), 0, 0
532 //szw[0] = src->z;
533 //szw[1] = src->w;
534 szw = __PSQ_LX(src, 8, 0, 0);
535
536 // c00 = [0.0F]
537 //ps_sub c00, epsilon, epsilon
538 // mag = [x*x+z*z][y*y+w*w]
539 //ps_madd mag, szw, szw, mag
540 mag = __PS_MADD(szw, szw, mag);
541
542 // mag = [x*x+y*y+z*z+w*w][N/A]
543 //ps_sum0 mag, mag, mag, mag
544 mag = __PS_SUM0(mag, mag, mag);
545
546 // rsqmag = 1.0F / sqrtf(mag) : estimation
547 //frsqrte rsqmag, mag
548 rsqmag = __PS_RSQRTE(mag);
549
550 // diff = mag - epsilon
551 //ps_sub diff, mag, epsilon
552 diff = __PS_SUB(mag, epsilon);
553
554 // Newton-Rapson refinement (x1) : E' = (E/2)(3 - X * E * E)
555 //fmul nwork0, rsqmag, rsqmag
556 nwork0 = __PS_MUL(rsqmag, rsqmag);
557
558 //fmul nwork1, rsqmag, c0505
559 nwork1 = __PS_MUL(rsqmag, c0505);
560
561 //fnmsub nwork0, nwork0, mag, c33
562 nwork0 = __PS_NMSUB(nwork0, mag, c33);
563
564 //fmul rsqmag, nwork0, nwork1
565 rsqmag = __PS_MUL(nwork0, nwork1);
566
567 // rsqmag = ( mag >= epsilon ) ? rsqmag : 0
568 //ps_sel rsqmag, diff, rsqmag, c00
569 rsqmag = __PS_SEL(diff, rsqmag, c00);
570
571 // sxy = [x*rsqmag][y*rsqmag]
572 //ps_muls0 sxy, sxy, rsqmag
573 sxy = __PS_MULS0(sxy, rsqmag);
574
575 // szw = [z*rsqmag][w*rsqmag]
576 //ps_muls0 szw, szw, rsqmag
577 szw = __PS_MULS0(szw, rsqmag);
578
579 //psq_st sxy, 0(unit), 0, 0
580 //unit->x = sxy[0];
581 //unit->y = sxy[1];
582 __PSQ_STX(unit, 0, sxy, 0, 0);
583
584 //psq_st szw, 8(unit), 0, 0
585 //unit->z = szw[0];
586 //unit->w = szw[1];
587 __PSQ_STX(unit, 8, szw, 0, 0);
588 }
589 #endif
590
591 /*---------------------------------------------------------------------------*
592 Name: QUATInverse
593
594 Description: Returns the inverse of the quaternion.
595
596 Arguments: src - the source quaternion
597 inv - resulting inverse quaternion
598
599 Returns: none
600 *---------------------------------------------------------------------------*/
601 /*---------------------------------------------------------------------------*
602 C version
603 *---------------------------------------------------------------------------*/
C_QUATInverse(const Quaternion * src,Quaternion * inv)604 void C_QUATInverse( const Quaternion *src, Quaternion *inv )
605 {
606 f32 mag, norminv;
607
608 ASSERTMSG( ( src != 0 ), QUAT_INVERSE_1 );
609 ASSERTMSG( ( inv != 0 ), QUAT_INVERSE_2 );
610
611 mag = ( src->x*src->x + src->y*src->y + src->z*src->z + src->w*src->w );
612
613 if ( mag == 0.0f )
614 {
615 mag = 1.0f;
616 }
617
618 // [Inverse] = [Conjugate] / [Magnitude]
619 norminv = 1.0f / mag;
620 inv->x = -src->x * norminv;
621 inv->y = -src->y * norminv;
622 inv->z = -src->z * norminv;
623 inv->w = src->w * norminv;
624 }
625
626 #if !defined(WIN32) && !defined(WIN64)
627 /*---------------------------------------------------------------------------*
628 Paired-Single intrinsics version
629 *---------------------------------------------------------------------------*
630 Note that this performs NO error checking.
631 *---------------------------------------------------------------------------*/
632
PSQUATInverse(const Quaternion * src,Quaternion * inv)633 void PSQUATInverse( const Quaternion *src, Quaternion *inv )
634 {
635 f32x2 sxy, szw, izz, iww;
636 f32x2 mag, norminv, nninv, nwork0;
637
638 // load xy
639 //psq_l sxy, 0(src), 0, 0
640 //sxy[0] = src->x;
641 //sxy[1] = src->y;
642 sxy = __PSQ_LX(src, 0, 0, 0);
643
644 // mag = [x*x][y*y]
645 //ps_mul mag, sxy, sxy
646 mag = __PS_MUL(sxy, sxy);
647
648 // c00 = [0.0F]
649 //ps_sub c00, c11, c11
650
651 // load zw
652 //psq_l szw, 8(src), 0, 0
653 //szw[0] = src->z;
654 //szw[1] = src->w;
655 szw = __PSQ_LX(src, 8, 0, 0);
656
657 // mag = [x*x+z*z][y*y+w*w]
658 //ps_madd mag, szw, szw, mag
659 mag = __PS_MADD(szw, szw, mag);
660
661 // c22 = [2.0F]
662 //ps_add c22, c11, c11
663
664 // mag = [x*x+y*y+z*z+w*w][N/A]
665 //ps_sum0 mag, mag, mag, mag
666 mag = __PS_SUM0(mag, mag, mag);
667
668 // zero check
669 if ( mag[0] == 0.0f )
670 {
671 mag[0] = mag[1] = 1.0f;
672 }
673
674 // norminv = 1.0F / mag
675 //fres norminv, mag
676 norminv = __PS_RES(mag);
677
678 // Newton-Rapson refinment (x1) : E' = 2E-X*E*E
679 //ps_nmsub nwork0, mag, norminv, c22
680 nwork0 = __PS_NMSUB(mag, norminv, c22);
681
682 //ps_mul norminv, norminv, nwork0
683 norminv = __PS_MUL(norminv, nwork0);
684
685 // nninv = [ -norminv ]
686 //ps_neg nninv, norminv
687 nninv = __PS_NEG(norminv);
688
689 // iww = [ w*norminv ][ N/A ]
690 //ps_muls1 iww, norminv, szw
691 iww = __PS_MULS1(norminv, szw);
692
693 // sxy = [ -x*norminv ][ -y*norminv ]
694 //ps_muls0 sxy, sxy, nninv
695 sxy = __PS_MULS0(sxy, nninv);
696
697 // store w
698 //psq_st iww, 12(inv), 1, 0
699 //inv->w = iww[0];
700 __PSQ_STX(inv, 12, iww, 1, 0);
701
702 // izz = [ -z*norminv ][ N/A ]
703 //ps_muls0 izz, szw, nninv
704 izz = __PS_MULS0(szw, nninv);
705
706 // store xy
707 //psq_st sxy, 0(inv), 0, 0
708 //inv->x = sxy[0];
709 //inv->y = sxy[1];
710 __PSQ_STX(inv, 0, sxy, 0, 0);
711
712 // store z
713 //psq_st izz, 8(inv), 1, 0
714 //inv->z = izz[0];
715 __PSQ_STX(inv, 8, izz, 1, 0);
716 }
717 #endif
718
719 /*---------------------------------------------------------------------------*
720 Name: QUATDivide
721
722 Description: Returns the ratio of two quaternions. Creates a result
723 r = p/q such that q*r=p (order of multiplication is important).
724
725 Arguments: p - left quaternion
726 q - right quaternion
727 r - resultant ratio quaterion p/q
728
729 Returns: none
730 *---------------------------------------------------------------------------*/
731 /*---------------------------------------------------------------------------*
732 C version
733 *---------------------------------------------------------------------------*/
C_QUATDivide(const Quaternion * p,const Quaternion * q,Quaternion * r)734 void C_QUATDivide( const Quaternion *p, const Quaternion *q, Quaternion *r )
735 {
736 Quaternion qtmp;
737
738 ASSERTMSG( ( p != 0 ), QUAT_DIVIDE_1 );
739 ASSERTMSG( ( q != 0 ), QUAT_DIVIDE_2 );
740 ASSERTMSG( ( r != 0 ), QUAT_DIVIDE_3 );
741
742 C_QUATInverse(q, &qtmp);
743 C_QUATMultiply(&qtmp, p, r);
744 }
745
746 #if !defined(WIN32) && !defined(WIN64)
747 /*---------------------------------------------------------------------------*
748 Paired-Single intrinsics version
749 *---------------------------------------------------------------------------*
750 Note that this performs NO error checking.
751 *---------------------------------------------------------------------------*/
752
PSQUATDivide(const Quaternion * p,const Quaternion * q,Quaternion * r)753 void PSQUATDivide( const Quaternion *p, const Quaternion *q, Quaternion *r )
754 {
755 Quaternion qtmp;
756
757 PSQUATInverse(q, &qtmp);
758 PSQUATMultiply(&qtmp, p, r);
759 }
760 #endif
761
762 /*---------------------------------------------------------------------------*
763 Name: QUATExp
764
765 Description: Exponentiate quaternion (where q.w == 0).
766
767 Arguments: q - pure quaternion
768 r - resultant exponentiate quaternion (an unit quaternion)
769
770 Returns: none
771 *---------------------------------------------------------------------------*/
C_QUATExp(const Quaternion * q,Quaternion * r)772 void C_QUATExp( const Quaternion *q, Quaternion *r )
773 {
774 f32 theta, scale;
775
776 ASSERTMSG( ( q != 0 ), QUAT_EXP_1 );
777 ASSERTMSG( ( r != 0 ), QUAT_EXP_2 );
778
779 // pure quaternion check
780 ASSERTMSG( ( q->w == 0.0F ), QUAT_EXP_3 );
781
782 theta = sqrtf( q->x*q->x + q->y*q->y + q->z*q->z );
783 scale = 1.0F;
784
785 if ( theta > QUAT_EPSILON )
786 scale = (f32)sinf(theta)/theta;
787
788 r->x = scale * q->x;
789 r->y = scale * q->y;
790 r->z = scale * q->z;
791 r->w = (f32)cosf(theta);
792 }
793
794
795 /*---------------------------------------------------------------------------*
796 Name: QUATLogN
797
798 Description: Returns the natural logarithm of a UNIT quaternion
799
800 Arguments: q - unit quaternion
801 r - resultant logarithm quaternion (an pure quaternion)
802
803 Returns: none
804 *---------------------------------------------------------------------------*/
C_QUATLogN(const Quaternion * q,Quaternion * r)805 void C_QUATLogN( const Quaternion *q, Quaternion *r )
806 {
807 f32 theta,scale;
808
809 ASSERTMSG( ( q != 0 ), QUAT_LOGN_1 );
810 ASSERTMSG( ( r != 0 ), QUAT_LOGN_2 );
811
812 scale = q->x*q->x + q->y*q->y + q->z*q->z;
813
814 // unit quaternion check
815 #ifdef _DEBUG
816 {
817 f32 mag;
818 mag = scale + q->z*q->z;
819 if ( mag < 1.0F - QUAT_EPSILON || mag > 1.0F + QUAT_EPSILON )
820 {
821 ASSERT( QUAT_LOGN_3 );
822 }
823 }
824 #endif
825
826 scale = sqrtf(scale);
827 theta = atan2f( scale, q->w );
828
829 if ( scale > 0.0F )
830 scale = theta/scale;
831
832 r->x = scale*q->x;
833 r->y = scale*q->y;
834 r->z = scale*q->z;
835 r->w = 0.0F;
836
837 }
838
839
840 /*---------------------------------------------------------------------------*
841 Name: QUATMakeClosest
842
843 Description: Modify q so it is on the same side of the hypersphere as qto
844
845 Arguments: q - quaternion
846 qto - quaternion to be close to
847 r - resultant modified quaternion
848
849 Returns: NONE
850 *---------------------------------------------------------------------------*/
C_QUATMakeClosest(const Quaternion * q,const Quaternion * qto,Quaternion * r)851 void C_QUATMakeClosest( const Quaternion *q, const Quaternion *qto, Quaternion *r )
852 {
853 f32 dot;
854
855 ASSERTMSG( ( q != 0 ), QUAT_MAKECLOSEST_1 );
856 ASSERTMSG( ( qto != 0 ), QUAT_MAKECLOSEST_2 );
857 ASSERTMSG( ( r != 0 ), QUAT_MAKECLOSEST_3 );
858
859 dot = q->x*qto->x + q->y*qto->y + q->z*qto->z + q->w*qto->w;
860
861 if ( dot < 0.0f )
862 {
863 r->x = -q->x;
864 r->y = -q->y;
865 r->z = -q->z;
866 r->w = -q->w;
867 }
868 else
869 {
870 *r = *q;
871 }
872 }
873
874
875 /*---------------------------------------------------------------------------*
876 Name: QUATRotAxisRad
877
878 Description: Returns rotation quaternion about arbitrary axis.
879
880 Arguments: r - resultant rotation quaternion
881 axis - rotation axis
882 rad - rotation angle in radians
883
884 Returns: NONE
885 *---------------------------------------------------------------------------*/
C_QUATRotAxisRad(Quaternion * r,const Vec * axis,f32 rad)886 void C_QUATRotAxisRad( Quaternion *r, const Vec *axis, f32 rad )
887 {
888 f32 half, sh, ch;
889 Vec nAxis;
890
891 ASSERTMSG( ( r != 0 ), QUAT_ROTAXISRAD_1 );
892 ASSERTMSG( ( axis != 0 ), QUAT_ROTAXISRAD_2 );
893
894 VECNormalize(axis, &nAxis);
895
896 half = rad * 0.50F;
897 sh = sinf(half);
898 ch = cosf(half);
899
900 r->x = sh * nAxis.x;
901 r->y = sh * nAxis.y;
902 r->z = sh * nAxis.z;
903 r->w = ch;
904 }
905
906
907 /*---------------------------------------------------------------------------*
908 Name: QUATMtx
909
910 Description: Converts a matrix to a unit quaternion.
911
912 Arguments: r - result quaternion
913 m - the matrix
914
915 Returns: NONE
916 *---------------------------------------------------------------------------*/
C_QUATMtx(Quaternion * r,MTX_CONST Mtx m)917 void C_QUATMtx( Quaternion *r, MTX_CONST Mtx m )
918 {
919 f32 tr,s;
920 s32 i,j,k;
921 s32 nxt[3] = {1,2,0};
922 f32 q[3];
923
924 ASSERTMSG( ( r != 0 ), QUAT_MTX_1 );
925 ASSERTMSG( ( m != 0 ), QUAT_MTX_2 );
926
927 tr = m[0][0] + m[1][1] + m[2][2];
928 if( tr > 0.0f )
929 {
930 s = (f32)sqrtf(tr + 1.0f);
931 r->w = s * 0.5f;
932 s = 0.5f / s;
933 r->x = (m[2][1] - m[1][2]) * s;
934 r->y = (m[0][2] - m[2][0]) * s;
935 r->z = (m[1][0] - m[0][1]) * s;
936 }
937 else
938 {
939 i = 0;
940 if (m[1][1] > m[0][0]) i = 1;
941 if (m[2][2] > m[i][i]) i = 2;
942 j = nxt[i];
943 k = nxt[j];
944 s = (f32)sqrtf( (m[i][i] - (m[j][j] + m[k][k])) + 1.0f );
945 q[i] = s * 0.5f;
946
947 if (s!=0.0f)
948 s = 0.5f / s;
949
950 r->w = (m[k][j] - m[j][k]) * s;
951 q[j] = (m[i][j] + m[j][i]) * s;
952 q[k] = (m[i][k] + m[k][i]) * s;
953
954 r->x = q[0];
955 r->y = q[1];
956 r->z = q[2];
957 }
958 }
959
960
961 /*---------------------------------------------------------------------------*
962 Name: QUATLerp
963
964 Description: Linear interpolation of two quaternions.
965
966 Arguments: p - first quaternion
967 q - second quaternion
968 r - resultant interpolated quaternion
969 t - interpolation parameter
970
971 Returns: NONE
972 *---------------------------------------------------------------------------*/
C_QUATLerp(const Quaternion * p,const Quaternion * q,Quaternion * r,f32 t)973 void C_QUATLerp( const Quaternion *p, const Quaternion *q, Quaternion *r, f32 t )
974 {
975 ASSERTMSG( ( p != 0 ), QUAT_LERP_1 );
976 ASSERTMSG( ( q != 0 ), QUAT_LERP_2 );
977 ASSERTMSG( ( r != 0 ), QUAT_LERP_3 );
978
979 r->x = t * ( q->x - p->x ) + p->x;
980 r->y = t * ( q->y - p->y ) + p->y;
981 r->z = t * ( q->z - p->z ) + p->z;
982 r->w = t * ( q->w - p->w ) + p->w;
983 }
984
985
986 /*---------------------------------------------------------------------------*
987 Name: QUATSlerp
988
989 Description: Spherical linear interpolation of two quaternions.
990
991 Arguments: p - first quaternion
992 q - second quaternion
993 r - resultant interpolated quaternion
994 t - interpolation parameter
995
996 Returns: NONE
997 *---------------------------------------------------------------------------*/
C_QUATSlerp(const Quaternion * p,const Quaternion * q,Quaternion * r,f32 t)998 void C_QUATSlerp( const Quaternion *p, const Quaternion *q, Quaternion *r, f32 t )
999 {
1000 f32 theta, sin_th, cos_th, tp, tq;
1001
1002 ASSERTMSG( ( p != 0 ), QUAT_SLERP_1 );
1003 ASSERTMSG( ( q != 0 ), QUAT_SLERP_2 );
1004 ASSERTMSG( ( r != 0 ), QUAT_SLERP_3 );
1005
1006 cos_th = p->x * q->x + p->y * q->y + p->z * q->z + p->w * q->w;
1007 tq = 1.0F;
1008
1009 if ( cos_th < 0.0F )
1010 {
1011 cos_th = -cos_th;
1012 tq = -tq;
1013 }
1014
1015 if ( cos_th <= 1.0F - QUAT_EPSILON )
1016 {
1017 theta = acosf(cos_th);
1018 sin_th = sinf(theta);
1019 tp = sinf((1.0F - t) * theta) / sin_th;
1020 tq *= sinf( t * theta ) / sin_th;
1021 }
1022 else
1023 {
1024 // cos(theta) is close to 1.0F -> linear
1025 tp = 1.0F - t;
1026 tq = tq * t;
1027 }
1028
1029 r->x = tp * p->x + tq * q->x;
1030 r->y = tp * p->y + tq * q->y;
1031 r->z = tp * p->z + tq * q->z;
1032 r->w = tp * p->w + tq * q->w;
1033
1034 }
1035
1036
1037 /*---------------------------------------------------------------------------*
1038 Name: QUATSquad
1039
1040 Description: Spherical cubic quadrangle interpolation of two quaternions
1041 with derrived inner-quadrangle quaternions.
1042
1043 This will be used with the function QUATCompA().
1044
1045 Arguments: p - first quaternion
1046 a - derrived inner-quadrangle quaternion
1047 b - derrived inner-quadrangle quaternion
1048 q - second quaternion
1049 r - resultant interpolated quaternion
1050 t - interpolation parameter
1051
1052 Returns: NONE
1053 *---------------------------------------------------------------------------*/
C_QUATSquad(const Quaternion * p,const Quaternion * a,const Quaternion * b,const Quaternion * q,Quaternion * r,f32 t)1054 void C_QUATSquad( const Quaternion *p, const Quaternion *a, const Quaternion *b,
1055 const Quaternion *q, Quaternion *r, f32 t )
1056 {
1057 Quaternion pq, ab;
1058 f32 t2;
1059
1060 ASSERTMSG( ( p != 0 ), QUAT_SQUAD_1 );
1061 ASSERTMSG( ( a != 0 ), QUAT_SQUAD_2 );
1062 ASSERTMSG( ( b != 0 ), QUAT_SQUAD_3 );
1063 ASSERTMSG( ( q != 0 ), QUAT_SQUAD_4 );
1064 ASSERTMSG( ( r != 0 ), QUAT_SQUAD_5 );
1065
1066 t2 = 2 * t * ( 1.0F - t );
1067 C_QUATSlerp(p, q, &pq, t);
1068 C_QUATSlerp(a, b, &ab, t);
1069 C_QUATSlerp(&pq, &ab, r, t2);
1070 }
1071
1072
1073 /*---------------------------------------------------------------------------*
1074 Name: QUATCompA
1075
1076 Description: Compute a, the term used in Boehm-type interpolation
1077 a[n] = q[n]* qexp(-(1/4)*( ln(qinv(q[n])*q[n+1]) +
1078 ln( qinv(q[n])*q[n-1] )))
1079
1080 Arguments: qprev - previous quaternion
1081 q - current quaternion
1082 qnext - next quaternion
1083 a - result quaternion A
1084
1085 Returns: none
1086 ---------------------------------------------------------------------------*/
C_QUATCompA(const Quaternion * qprev,const Quaternion * q,const Quaternion * qnext,Quaternion * a)1087 void C_QUATCompA( const Quaternion *qprev, const Quaternion *q, const Quaternion *qnext, Quaternion *a )
1088 {
1089 Quaternion qm, qp, lqm, lqp, qpqm, exq;
1090
1091 ASSERTMSG( ( qprev != 0 ), QUAT_COMPA_1 );
1092 ASSERTMSG( ( q != 0 ), QUAT_COMPA_2 );
1093 ASSERTMSG( ( qnext != 0 ), QUAT_COMPA_3 );
1094 ASSERTMSG( ( a != 0 ), QUAT_COMPA_4 );
1095
1096 C_QUATDivide(qprev, q, &qm);
1097 C_QUATLogN(&qm, &lqm);
1098 C_QUATDivide(qnext, q, &qp);
1099 C_QUATLogN(&qp, &lqp);
1100
1101 C_QUATAdd(&lqp, &lqm, &qpqm);
1102 C_QUATScale(&qpqm, &qpqm, -0.25F);
1103
1104 C_QUATExp(&qpqm, &exq);
1105 C_QUATMultiply(q, &exq, a);
1106 }
1107
1108 /*---------------------------------------------------------------------*
1109
1110 Name: MTXQuat
1111
1112 Description: sets a rotation matrix from a quaternion.
1113
1114
1115 Arguments: m matrix to be set
1116
1117 q ptr to quaternion.
1118
1119 Return: none
1120
1121 *---------------------------------------------------------------------*/
1122 /*---------------------------------------------------------------------*
1123 C version
1124 *---------------------------------------------------------------------*/
C_MTXQuat(Mtx m,const Quaternion * q)1125 void C_MTXQuat ( Mtx m, const Quaternion *q )
1126 {
1127 f32 s,xs,ys,zs,wx,wy,wz,xx,xy,xz,yy,yz,zz;
1128
1129 ASSERTMSG( (m != 0), MTX_QUAT_1 );
1130 ASSERTMSG( (q != 0), MTX_QUAT_2 );
1131 ASSERTMSG( ( q->x || q->y || q->z || q->w ), MTX_QUAT_3 );
1132
1133 s = 2.0f / ( (q->x * q->x) + (q->y * q->y) + (q->z * q->z) + (q->w * q->w) );
1134
1135 xs = q->x * s; ys = q->y * s; zs = q->z * s;
1136 wx = q->w * xs; wy = q->w * ys; wz = q->w * zs;
1137 xx = q->x * xs; xy = q->x * ys; xz = q->x * zs;
1138 yy = q->y * ys; yz = q->y * zs; zz = q->z * zs;
1139
1140 m[0][0] = 1.0f - (yy + zz);
1141 m[0][1] = xy - wz;
1142 m[0][2] = xz + wy;
1143 m[0][3] = 0.0f;
1144
1145 m[1][0] = xy + wz;
1146 m[1][1] = 1.0f - (xx + zz);
1147 m[1][2] = yz - wx;
1148 m[1][3] = 0.0f;
1149
1150 m[2][0] = xz - wy;
1151 m[2][1] = yz + wx;
1152 m[2][2] = 1.0f - (xx + yy);
1153 m[2][3] = 0.0f;
1154 }
1155
1156 #if !defined(WIN32) && !defined(WIN64)
1157 /*---------------------------------------------------------------------*
1158 Paired-Single intrinsics version
1159 *---------------------------------------------------------------------*
1160 Note that this performs NO error checking.
1161 *---------------------------------------------------------------------*/
PSMTXQuat(Mtx m,const Quaternion * q)1162 void PSMTXQuat ( Mtx m, const Quaternion *q )
1163 {
1164 f32x2 scale;
1165 f32x2 V_XY;
1166 f32x2 V_ZW;
1167 f32x2 V_YX;
1168
1169 f32x2 D0;
1170 f32x2 D1;
1171 f32x2 tmp2, tmp3, tmp4; //tmp1,
1172 f32x2 tmp5, tmp6, tmp7, tmp8, tmp9;
1173
1174 // V_XY = [qx][qy] : LOAD
1175 //V_XY[0] = q->x;
1176 //V_XY[1] = q->y;
1177 V_XY = __PSQ_LX(q, 0, 0, 0);
1178
1179 // V_ZW = [qz][qw] : LOAD
1180 //V_ZW[0] = q->z;
1181 //V_ZW[1] = q->w;
1182 V_ZW = __PSQ_LX(q, 8, 0, 0);
1183
1184 // tmp2 = [qx*qx][qy*qy]
1185 tmp2 = __PS_MUL(V_XY, V_XY);
1186
1187 // V_YX = [qy][qx]
1188 V_YX = __PS_MERGE10(V_XY, V_XY);
1189
1190 // tmp4 = [qx*qx+qz*qz][qy*qy+qw*qw]
1191 tmp4 = __PS_MADD(V_ZW, V_ZW, tmp2);
1192
1193 // tmp3 = [qz*qz][qw*qw]
1194 tmp3 = __PS_MUL(V_ZW, V_ZW);
1195
1196 // scale = [qx*qx+qy*qy+qz*qz+qw*qw]
1197 scale = __PS_SUM0(tmp4, tmp4, tmp4);
1198 scale[1] = scale[0];
1199
1200 // tmp7 = [qy*qw][qx*qw]
1201 tmp7 = __PS_MULS1(V_YX, V_ZW);
1202
1203 // Newton-Rapson refinment (1/X) : E' = 2E-X*E*E
1204 // tmp9 = [E = Est.(1/X)]
1205 tmp9 = __PS_RES(scale);
1206
1207 // tmp4 = [qx*qx+qz*qz][qy*qy+qz*qz]
1208 tmp4 = __PS_SUM1(tmp3, tmp4, tmp2);
1209
1210 // scale = [2-X*E]
1211 scale = __PS_NMSUB(scale, tmp9, c22);
1212
1213 // tmp6 = [qz*qw][?]
1214 tmp6 = __PS_MULS1(V_ZW, V_ZW);
1215
1216 // scale = [E(2-scale*E) = E']
1217 scale = __PS_MUL(tmp9, scale);
1218
1219 // tmp2 = [qx*qx+qy*qy]
1220 tmp2 = __PS_SUM0(tmp2, tmp2, tmp2);
1221
1222 // scale = [s = 2E' = 2.0F/(qx*qx+qy*qy+qz*qz+qw*qw)]
1223 scale = __PS_MUL(scale, c22);
1224
1225 // tmp8 = [qx*qy+qz*qw][?]
1226 tmp8 = __PS_MADD(V_XY, V_YX, tmp6);
1227
1228 // tmp6 = [qx*qy-qz*qw][?]
1229 tmp6 = __PS_MSUB(V_XY, V_YX, tmp6);
1230
1231 // c00 [m03] : STORE
1232 //m[0][3] = c00[0];
1233 __PSQ_STX(m, 12, c00, 1, 0);
1234
1235 // tmp2 = [1-s(qx*qx+qy*qy)] : [m22]
1236 tmp2 = __PS_NMSUB(tmp2, scale, c11);
1237
1238 // tmp4 = [1-s(qx*qx+qz*qz)][1-s(qy*qy+qz*qz)] : [m11][m00]
1239 tmp4 = __PS_NMSUB(tmp4, scale, c11);
1240
1241 // c00 [m23] : STORE
1242 //m[2][3] = c00[0];
1243 __PSQ_STX(m, 44, c00, 1, 0);
1244
1245 // tmp8 = [s(qx*qy+qz*qw)][?] : [m10]
1246 tmp8 = __PS_MUL(tmp8, scale);
1247
1248 // tmp6 = [s(qx*qy-qz*qw)][?] : [m01]
1249 tmp6 = __PS_MUL(tmp6, scale);
1250
1251 // tmp2 [m22] : STORE
1252 //m[2][2] = tmp2[0];
1253 __PSQ_STX(m, 40, tmp2, 1, 0);
1254
1255 // tmp5 = [qx*qz+qy*qw][qy*qz+qx*qw]
1256 tmp5 = __PS_MADDS0(V_XY, V_ZW, tmp7);
1257
1258 // D1 = [m10][m11]
1259 D1 = __PS_MERGE00(tmp8, tmp4);
1260
1261 // tmp7 = [qx*qz-qy*qw][qy*qz-qx*qw]
1262 tmp7 = __PS_NMSUB(tmp7, c22, tmp5);
1263
1264 // D1 [m10][m11] : STORE
1265 //m[1][0] = D1[0];
1266 //m[1][1] = D1[1];
1267 __PSQ_STX(m, 16, D1, 0, 0);
1268
1269 // D0 = [m00][m01]
1270 D0 = __PS_MERGE10(tmp4, tmp6);
1271
1272 // tmp5 = [s(qx*qz+qy*qw)][s(qy*qz+qx*qw)] : [m02][m21]
1273 tmp5 = __PS_MUL(tmp5, scale);
1274
1275 // tmp7 = [s(qx*qz-qy*qw)][s(qy*qz-qx*qw)] : [m20][m12]
1276 tmp7 = __PS_MUL(tmp7, scale);
1277
1278 // D0 [m00][m01] : STORE
1279 //m[0][0] = D0[0];
1280 //m[0][1] = D0[1];
1281 __PSQ_STX(m, 0, D0, 0, 0);
1282
1283 // tmp5 [m02] : STORE
1284 //m[0][2] = tmp5[0];
1285 __PSQ_STX(m, 8, tmp5, 1, 0);
1286
1287 // tmp3 = [m12][m13]
1288 D1 = __PS_MERGE10(tmp7, c00);
1289
1290 // tmp9 = [m20][m21]
1291 D0 = __PS_MERGE01(tmp7, tmp5);
1292
1293 // tmp3 [m12][m13] : STORE
1294 //m[1][2] = D1[0];
1295 //m[1][3] = D1[1];
1296 __PSQ_STX(m, 24, D1, 0, 0);
1297
1298 // tmp9 [m20][m21] : STORE
1299 //m[2][0] = D0[0];
1300 //m[2][1] = D0[1];
1301 __PSQ_STX(m, 32, D0, 0, 0);
1302 }
1303 #endif
1304
1305 /*===========================================================================*/
1306