1 /*---------------------------------------------------------------------------*
2   Project: matrix vector Library
3   File:    quat.c
4 
5   Copyright (C) Nintendo.  All rights reserved.
6 
7   These coded instructions, statements, and computer programs contain
8   proprietary information of Nintendo of America Inc. and/or Nintendo
9   Company Ltd., and are protected by Federal copyright law.  They may
10   not be disclosed to third parties or copied or duplicated in any form,
11   in whole or in part, without the prior written consent of Nintendo.
12 
13  *---------------------------------------------------------------------------*/
14 
15 #include <math.h>
16 #include <stdio.h>
17 #include <cafe/mtx.h>
18 #include <cafe/mtx/mtx44.h>
19 #include "mtxAssert.h"
20 #include "mtx44Assert.h"
21 
22 /*---------------------------------------------------------------------*
23     Constants
24  *---------------------------------------------------------------------*/
25 static const f32x2 c00 = {0.0F, 0.0F};
26 //static const f32x2 c01 = {0.0F, 1.0F};
27 //static const f32x2 c10 = {1.0F, 0.0F};
28 static const f32x2 c11 = {1.0F, 1.0F};
29 static const f32x2 c22 = {2.0F, 2.0F};
30 static const f32x2 c33 = {3.0F, 3.0F};
31 static const f32x2 c0505 = {0.5F, 0.5F};
32 static const f32x2    epsilon = {QUAT_EPSILON, QUAT_EPSILON};
33 
34 /*---------------------------------------------------------------------------*
35   Name:         QUATAdd
36 
37   Description:  Returns the sum of two quaternions.
38 
39   Arguments:    p - first quaternion
40                 q - second quaternion
41                 r - resultant quaternion p+q
42 
43   Returns:      none
44  *---------------------------------------------------------------------------*/
45 /*---------------------------------------------------------------------------*
46     C version
47  *---------------------------------------------------------------------------*/
C_QUATAdd(const Quaternion * p,const Quaternion * q,Quaternion * r)48 void C_QUATAdd( const Quaternion *p, const Quaternion *q, Quaternion *r )
49 {
50     ASSERTMSG( ( p != 0 ), QUAT_ADD_1 );
51     ASSERTMSG( ( q != 0 ), QUAT_ADD_2 );
52     ASSERTMSG( ( r != 0 ), QUAT_ADD_3 );
53 
54     r->x = p->x + q->x;
55     r->y = p->y + q->y;
56     r->z = p->z + q->z;
57     r->w = p->w + q->w;
58 }
59 
60 #if !defined(WIN32) && !defined(WIN64)
61 /*---------------------------------------------------------------------------*
62     Paired-Single intrinsics version
63  *---------------------------------------------------------------------------*
64             Note that this performs NO error checking.
65  *---------------------------------------------------------------------------*/
PSQUATAdd(const Quaternion * p,const Quaternion * q,Quaternion * r)66 void PSQUATAdd( const Quaternion *p, const Quaternion *q, Quaternion *r )
67 {
68     f32x2    pxy, qxy, rxy, pzw, qzw, rzw;
69 
70     //psq_l     pxy,  0(p), 0, 0
71     //pxy[0] = p->x;
72     //pxy[1] = p->y;
73     pxy = __PSQ_LX(p, 0, 0, 0);
74 
75     //psq_l     qxy,  0(q), 0, 0
76     //qxy[0] = q->x;
77     //qxy[1] = q->y;
78     qxy = __PSQ_LX(q, 0, 0, 0);
79 
80     //ps_add  rxy, pxy, qxy
81     rxy = __PS_ADD(pxy, qxy);
82 
83     //psq_st    rxy,  0(r), 0, 0
84     //r->x = rxy[0];
85     //r->y = rxy[1];
86     __PSQ_STX(r, 0, rxy, 0, 0);
87 
88     //psq_l     pzw,  8(p), 0, 0
89     //pzw[0] = p->z;
90     //pzw[1] = p->w;
91     pzw = __PSQ_LX(p, 8, 0, 0);
92 
93     //psq_l     qzw,  8(q), 0, 0
94     //qzw[0] = q->z;
95     //qzw[1] = q->w;
96     qzw = __PSQ_LX(q, 8, 0, 0);
97 
98     //ps_add  rzw, pzw, qzw
99     rzw = __PS_ADD(pzw, qzw);
100 
101     //psq_st    rzw,  8(r), 0, 0
102     //r->z = rzw[0];
103     //r->w = rzw[1];
104     __PSQ_STX(r, 8, rzw, 0, 0);
105 }
106 #endif
107 
108 /*---------------------------------------------------------------------------*
109   Name:         QUATSubtract
110 
111   Description:  Returns the difference of two quaternions p-q.
112 
113   Arguments:    p - left quaternion
114                 q - right quaternion
115                 r - resultant quaternion difference p-q
116 
117   Returns:      none
118  *---------------------------------------------------------------------------*/
119 /*---------------------------------------------------------------------------*
120     C version
121  *---------------------------------------------------------------------------*/
C_QUATSubtract(const Quaternion * p,const Quaternion * q,Quaternion * r)122 void C_QUATSubtract( const Quaternion *p, const Quaternion *q, Quaternion *r )
123 {
124     ASSERTMSG( ( p != 0 ), QUAT_SUBTRACT_1 );
125     ASSERTMSG( ( q != 0 ), QUAT_SUBTRACT_2 );
126     ASSERTMSG( ( r != 0 ), QUAT_SUBTRACT_3 );
127 
128     r->x = p->x - q->x;
129     r->y = p->y - q->y;
130     r->z = p->z - q->z;
131     r->w = p->w - q->w;
132 }
133 
134 #if !defined(WIN32) && !defined(WIN64)
135 /*---------------------------------------------------------------------------*
136     Paired-Single intrinsics version
137  *---------------------------------------------------------------------------*
138             Note that this performs NO error checking.
139  *---------------------------------------------------------------------------*/
PSQUATSubtract(const Quaternion * p,const Quaternion * q,Quaternion * r)140 void PSQUATSubtract( const Quaternion *p, const Quaternion *q, Quaternion *r )
141 {
142     f32x2    pxy, qxy, rxy, pzw, qzw, rzw;
143 
144     //psq_l     pxy,  0(p), 0, 0
145     //pxy[0] = p->x;
146     //pxy[1] = p->y;
147     pxy = __PSQ_LX(p, 0, 0, 0);
148 
149     //psq_l     qxy,  0(q), 0, 0
150     //qxy[0] = q->x;
151     //qxy[1] = q->y;
152     qxy = __PSQ_LX(q, 0, 0, 0);
153 
154     //ps_sub  rxy, pxy, qxy
155     rxy = __PS_SUB(pxy, qxy);
156 
157     //psq_st    rxy,  0(r), 0, 0
158     //r->x = rxy[0];
159     //r->y = rxy[1];
160     __PSQ_STX(r, 0, rxy, 0, 0);
161 
162     //psq_l     pzw,  8(p), 0, 0
163     //pzw[0] = p->z;
164     //pzw[1] = p->w;
165     pzw = __PSQ_LX(p, 8, 0, 0);
166 
167     //psq_l     qzw,  8(q), 0, 0
168     //qzw[0] = q->z;
169     //qzw[1] = q->w;
170     qzw = __PSQ_LX(q, 8, 0, 0);
171 
172     //ps_sub  rzw, pzw, qzw
173     rzw = __PS_SUB(pzw, qzw);
174 
175     //psq_st    rzw,  8(r), 0, 0
176     //r->z = rzw[0];
177     //r->w = rzw[1];
178     __PSQ_STX(r, 8, rzw, 0, 0);
179 
180 }
181 #endif
182 
183 /*---------------------------------------------------------------------------*
184   Name:         QUATMultiply
185 
186   Description:  Returns the product of two quaternions. The order of
187                 multiplication is important. (p*q != q*p)
188 
189   Arguments:    p - left quaternion
190                 q - right quaternion
191                 pq - resultant quaternion product p*q
192 
193   Returns:      none
194  *---------------------------------------------------------------------------*/
195 /*---------------------------------------------------------------------------*
196     C version
197  *---------------------------------------------------------------------------*/
C_QUATMultiply(const Quaternion * p,const Quaternion * q,Quaternion * pq)198 void C_QUATMultiply( const Quaternion *p, const Quaternion *q, Quaternion *pq )
199 {
200     Quaternion *r;
201     Quaternion  pqTmp;
202 
203     ASSERTMSG( ( p  != 0 ), QUAT_MULTIPLY_1 );
204     ASSERTMSG( ( q  != 0 ), QUAT_MULTIPLY_2 );
205     ASSERTMSG( ( pq != 0 ), QUAT_MULTIPLY_3 );
206 
207     if ( p == pq || q == pq )
208     {
209         r = &pqTmp;
210     }
211     else
212     {
213         r = pq;
214     }
215 
216     r->w = p->w*q->w - p->x*q->x - p->y*q->y - p->z*q->z;
217     r->x = p->w*q->x + p->x*q->w + p->y*q->z - p->z*q->y;
218     r->y = p->w*q->y + p->y*q->w + p->z*q->x - p->x*q->z;
219     r->z = p->w*q->z + p->z*q->w + p->x*q->y - p->y*q->x;
220 
221     if ( r == &pqTmp )
222     {
223         *pq = pqTmp;
224     }
225 }
226 
227 #if !defined(WIN32) && !defined(WIN64)
228 /*---------------------------------------------------------------------------*
229     Paired-Single intrinsics version
230  *---------------------------------------------------------------------------*
231             Note that this performs NO error checking.
232  *---------------------------------------------------------------------------*/
233 
PSQUATMultiply(const Quaternion * p,const Quaternion * q,Quaternion * pq)234 void PSQUATMultiply( const Quaternion *p, const Quaternion *q, Quaternion *pq )
235 {
236     f32x2    pxy, pzw, qxy, qzw;
237     f32x2    pnxy, pnzw, pnxny, pnznw;
238     f32x2    rxy, rzw, sxy, szw;
239 
240     // [px][py] : Load
241     //psq_l       pxy, 0(p), 0, 0
242     //pxy[0] = p->x;
243     //pxy[1] = p->y;
244     pxy = __PSQ_LX(p, 0, 0, 0);
245 
246     // [pz][pw] : Load
247     //psq_l       pzw, 8(p), 0, 0
248     //pzw[0] = p->z;
249     //pzw[1] = p->w;
250     pzw = __PSQ_LX(p, 8, 0, 0);
251 
252     // [qx][qy] : Load
253     //psq_l       qxy, 0(q), 0, 0
254     //qxy[0] = q->x;
255     //qxy[1] = q->y;
256     qxy = __PSQ_LX(q, 0, 0, 0);
257 
258     // [-px][-py]
259     //ps_neg      pnxny, pxy
260     pnxny = __PS_NEG(pxy);
261 
262     // [qz][qw] : Load
263     //psq_l       qzw, 8(q), 0, 0
264     //qzw[0] = q->z;
265     //qzw[1] = q->w;
266     qzw = __PSQ_LX(q, 8, 0, 0);
267 
268     // [-pz][-pw]
269     //ps_neg      pnznw, pzw
270     pnznw = __PS_NEG(pzw);
271 
272     // [-px][py]
273     //ps_merge01  pnxy, pnxny, pxy
274     pnxy = __PS_MERGE01(pnxny, pxy);
275 
276     // [pz*qx][pw*qx]
277     //ps_muls0    rxy, pzw, qxy
278     rxy = __PS_MULS0(pzw, qxy);
279 
280     // [-px*qx][-py*qx]
281     //ps_muls0    rzw, pnxny, qxy
282     rzw = __PS_MULS0(pnxny, qxy);
283 
284     // [-pz][pw]
285     //ps_merge01  pnzw, pnznw, pzw
286     pnzw = __PS_MERGE01(pnznw, pzw);
287 
288     // [-px*qy][py*qy]
289     //ps_muls1    szw, pnxy, qxy
290     szw = __PS_MULS1(pnxy, qxy);
291 
292     // [pz*qx-px*qz][pw*qx+py*qz]
293     //ps_madds0   rxy, pnxy, qzw, rxy
294     rxy = __PS_MADDS0(pnxy, qzw, rxy);
295 
296     // [-pz*qy][pw*qy]
297     //ps_muls1    sxy, pnzw, qxy
298     sxy = __PS_MULS1(pnzw, qxy);
299 
300     // [-px*qx-pz*qz][-py*qx+pw*qz]
301     //ps_madds0   rzw, pnzw, qzw, rzw
302     rzw = __PS_MADDS0(pnzw, qzw, rzw);
303 
304     // [-px*qy-pz*qw][py*qy-pw*qw]
305     //ps_madds1   szw, pnznw, qzw, szw
306     szw = __PS_MADDS1(pnznw, qzw, szw);
307 
308     // [pw*qx+py*qz][pz*qx-px*qz]
309     //ps_merge10  rxy, rxy, rxy
310     rxy = __PS_MERGE10(rxy, rxy);
311 
312     // [-pz*qy+px*qw][pw*qy+py*qw]
313     //ps_madds1   sxy, pxy, qzw, sxy
314     sxy = __PS_MADDS1(pxy, qzw, sxy);
315 
316     // [-py*qx+pw*qz][-px*qx-pz*qz]
317     //ps_merge10  rzw, rzw, rzw
318     rzw = __PS_MERGE10(rzw, rzw);
319 
320     // [pw*qx+py*qz-pz*qy+px*qw][pz*qx-px*qz+pw*qy+py*qw] : [pqx][pqy]
321     //ps_add      rxy, rxy, sxy
322     rxy = __PS_ADD(rxy, sxy);
323 
324     // [pqx][pqy] : Store
325     //psq_st      rxy, 0(pq), 0, 0
326     //pq->x = rxy[0];
327     //pq->y = rxy[1];
328     __PSQ_STX(pq, 0, rxy, 0, 0);
329 
330     // [-py*qx+pw*qz+px*qy+pz*qw][-px*qx-pz*qz-py*qy+pw*qw] : [pqz][pqw]
331     //ps_sub      rzw, rzw, szw
332     rzw = __PS_SUB(rzw, szw);
333 
334     // [pqz][pqw] : Store
335     //psq_st      rzw, 8(pq), 0, 0
336     //pq->z = rzw[0];
337     //pq->w = rzw[1];
338     __PSQ_STX(pq, 8, rzw, 0, 0);
339 }
340 #endif
341 
342 /*---------------------------------------------------------------------------*
343   Name:         QUATScale
344 
345   Description:  Scales a quaternion.
346 
347   Arguments:    q     - quaternion
348                 r     - resultant scaled quaternion
349                 scale - float to scale by
350 
351   Returns:      none
352  *---------------------------------------------------------------------------*/
353 /*---------------------------------------------------------------------------*
354     C version
355  *---------------------------------------------------------------------------*/
C_QUATScale(const Quaternion * q,Quaternion * r,f32 scale)356 void C_QUATScale( const Quaternion *q, Quaternion *r, f32 scale )
357 {
358     ASSERTMSG( ( q  != 0 ), QUAT_SCALE_1 );
359     ASSERTMSG( ( r  != 0 ), QUAT_SCALE_2 );
360 
361     r->x = q->x * scale;
362     r->y = q->y * scale;
363     r->z = q->z * scale;
364     r->w = q->w * scale;
365 }
366 
367 #if !defined(WIN32) && !defined(WIN64)
368 /*---------------------------------------------------------------------------*
369     Paired-Single intrinsics version
370  *---------------------------------------------------------------------------*
371             Note that this performs NO error checking.
372  *---------------------------------------------------------------------------*/
373 
PSQUATScale(const Quaternion * q,Quaternion * r,f32 scale)374 void PSQUATScale( const Quaternion *q, Quaternion *r, f32 scale )
375 {
376     f32x2    rxy, rzw;
377     f32x2    scale2 = {scale, scale};
378 
379     //psq_l       rxy, 0(q), 0, 0
380     //rxy[0] = q->x;
381     //rxy[1] = q->y;
382     rxy = __PSQ_LX(q, 0, 0, 0);
383 
384     //psq_l       rzw, 8(q), 0, 0
385     //rzw[0] = q->z;
386     //rzw[1] = q->w;
387     rzw = __PSQ_LX(q, 8, 0, 0);
388 
389     //ps_muls0    rxy, rxy, scale
390     rxy = __PS_MULS0(rxy, scale2);
391 
392     //psq_st      rxy, 0(r), 0, 0
393     //r->x = rxy[0];
394     //r->y = rxy[1];
395     __PSQ_STX(r, 0, rxy, 0, 0);
396 
397     //ps_muls0    rzw, rzw, scale
398     rzw = __PS_MULS0(rzw, scale2);
399 
400     //psq_st      rzw, 8(r), 0, 0
401     //r->z = rzw[0];
402     //r->w = rzw[1];
403     __PSQ_STX(r, 8, rzw, 0, 0);
404 }
405 #endif
406 
407 /*---------------------------------------------------------------------------*
408   Name:         QUATDotProduct
409 
410   Description:  Returns the dot product of the two quaternions.
411 
412   Arguments:    p - first quaternion
413                 q - second quaternion
414 
415   Returns:      Dot product
416  *---------------------------------------------------------------------------*/
417 /*---------------------------------------------------------------------------*
418     C version
419  *---------------------------------------------------------------------------*/
C_QUATDotProduct(const Quaternion * p,const Quaternion * q)420 f32 C_QUATDotProduct( const Quaternion *p, const Quaternion *q )
421 {
422     ASSERTMSG( ( p  != 0 ), QUAT_DOTPRODUCT_1 );
423     ASSERTMSG( ( q  != 0 ), QUAT_DOTPRODUCT_2 );
424 
425     return (q->x*p->x + q->y*p->y + q->z*p->z + q->w*p->w);
426 }
427 
428 #if !defined(WIN32) && !defined(WIN64)
429 /*---------------------------------------------------------------------------*
430     Paired-Single intrinsics version
431  *---------------------------------------------------------------------------*
432             Note that this performs NO error checking.
433  *---------------------------------------------------------------------------*/
434 
PSQUATDotProduct(const Quaternion * p,const Quaternion * q)435 f32 PSQUATDotProduct( const Quaternion *p, const Quaternion *q )
436 {
437     f32x2    pxy, pzw, qxy, qzw, dp;
438 
439     //psq_l       pxy, 0(p), 0, 0
440     //pxy[0] = p->x;
441     //pxy[1] = p->y;
442     pxy = __PSQ_LX(p, 0, 0, 0);
443 
444     //psq_l       qxy, 0(q), 0, 0
445     //qxy[0] = q->x;
446     //qxy[1] = q->y;
447     qxy = __PSQ_LX(q, 0, 0, 0);
448 
449     //ps_mul      dp, pxy, qxy
450     dp = __PS_MUL(pxy, qxy);
451 
452     //psq_l       pzw, 8(p), 0, 0
453     //pzw[0] = p->z;
454     //pzw[1] = p->w;
455     pzw = __PSQ_LX(p, 8, 0, 0);
456 
457     //psq_l       qzw, 8(q), 0, 0
458     //qzw[0] = q->z;
459     //qzw[1] = q->w;
460     qzw = __PSQ_LX(q, 8, 0, 0);
461 
462     //ps_madd     dp, pzw, qzw, dp
463     dp = __PS_MADD(pzw, qzw, dp);
464 
465     //ps_sum0     dp, dp, dp, dp
466     dp = __PS_SUM0(dp, dp, dp);
467 
468     return (f32)dp[0];
469 }
470 #endif
471 
472 /*---------------------------------------------------------------------------*
473   Name:         QUATNormalize
474 
475   Description:  Normalizes a quaternion
476 
477   Arguments:    src - the source quaternion
478                 unit - resulting unit quaternion
479 
480   Returns:      none
481  *---------------------------------------------------------------------------*/
482 /*---------------------------------------------------------------------------*
483     C version
484  *---------------------------------------------------------------------------*/
C_QUATNormalize(const Quaternion * src,Quaternion * unit)485 void C_QUATNormalize( const Quaternion *src, Quaternion *unit )
486 {
487     f32 mag;
488 
489     ASSERTMSG( ( src  != 0 ), QUAT_NORMALIZE_1 );
490     ASSERTMSG( ( unit != 0 ), QUAT_NORMALIZE_2 );
491 
492     mag = (src->x * src->x) + (src->y * src->y) + (src->z * src->z) + (src->w * src->w);
493 
494     if ( mag >= QUAT_EPSILON )
495     {
496         mag = 1.0F / sqrtf(mag);
497 
498         unit->x = src->x * mag;
499         unit->y = src->y * mag;
500         unit->z = src->z * mag;
501         unit->w = src->w * mag;
502     }
503     else
504     {
505         unit->x = unit->y = unit->z = unit->w = 0.0F;
506     }
507 }
508 
509 #if !defined(WIN32) && !defined(WIN64)
510 /*---------------------------------------------------------------------------*
511     Paired-Single intrinsics version
512  *---------------------------------------------------------------------------*
513             Note that this performs NO error checking.
514  *---------------------------------------------------------------------------*/
515 
PSQUATNormalize(const Quaternion * src,Quaternion * unit)516 void PSQUATNormalize( const Quaternion *src, Quaternion *unit )
517 {
518     f32x2    sxy, szw;
519     f32x2    mag, rsqmag, diff;
520     f32x2    nwork0, nwork1;
521 
522     //psq_l       sxy, 0(src), 0, 0
523     //sxy[0] = src->x;
524     //sxy[1] = src->y;
525     sxy = __PSQ_LX(src, 0, 0, 0);
526 
527     // mag = [x*x][y*y]
528     //ps_mul      mag, sxy, sxy
529     mag = __PS_MUL(sxy, sxy);
530 
531     //psq_l       szw, 8(src), 0, 0
532     //szw[0] = src->z;
533     //szw[1] = src->w;
534     szw = __PSQ_LX(src, 8, 0, 0);
535 
536     // c00 = [0.0F]
537     //ps_sub      c00, epsilon, epsilon
538     // mag = [x*x+z*z][y*y+w*w]
539     //ps_madd     mag, szw, szw, mag
540     mag = __PS_MADD(szw, szw, mag);
541 
542     // mag = [x*x+y*y+z*z+w*w][N/A]
543     //ps_sum0     mag, mag, mag, mag
544     mag = __PS_SUM0(mag, mag, mag);
545 
546     // rsqmag = 1.0F / sqrtf(mag) : estimation
547     //frsqrte     rsqmag, mag
548     rsqmag = __PS_RSQRTE(mag);
549 
550     // diff = mag - epsilon
551     //ps_sub      diff, mag, epsilon
552     diff = __PS_SUB(mag, epsilon);
553 
554     // Newton-Rapson refinement (x1) : E' = (E/2)(3 - X * E * E)
555     //fmul        nwork0, rsqmag, rsqmag
556     nwork0 = __PS_MUL(rsqmag, rsqmag);
557 
558     //fmul        nwork1, rsqmag, c0505
559     nwork1 = __PS_MUL(rsqmag, c0505);
560 
561     //fnmsub      nwork0, nwork0, mag, c33
562     nwork0 = __PS_NMSUB(nwork0, mag, c33);
563 
564     //fmul        rsqmag, nwork0, nwork1
565     rsqmag = __PS_MUL(nwork0, nwork1);
566 
567     // rsqmag = ( mag >= epsilon ) ? rsqmag : 0
568     //ps_sel      rsqmag, diff, rsqmag, c00
569     rsqmag = __PS_SEL(diff, rsqmag, c00);
570 
571     // sxy = [x*rsqmag][y*rsqmag]
572     //ps_muls0    sxy, sxy, rsqmag
573     sxy = __PS_MULS0(sxy, rsqmag);
574 
575     // szw = [z*rsqmag][w*rsqmag]
576     //ps_muls0    szw, szw, rsqmag
577     szw = __PS_MULS0(szw, rsqmag);
578 
579     //psq_st      sxy, 0(unit), 0, 0
580     //unit->x = sxy[0];
581     //unit->y = sxy[1];
582     __PSQ_STX(unit, 0, sxy, 0, 0);
583 
584     //psq_st      szw, 8(unit), 0, 0
585     //unit->z = szw[0];
586     //unit->w = szw[1];
587     __PSQ_STX(unit, 8, szw, 0, 0);
588 }
589 #endif
590 
591 /*---------------------------------------------------------------------------*
592   Name:         QUATInverse
593 
594   Description:  Returns the inverse of the quaternion.
595 
596   Arguments:    src - the source quaternion
597                 inv - resulting inverse quaternion
598 
599   Returns:      none
600  *---------------------------------------------------------------------------*/
601 /*---------------------------------------------------------------------------*
602     C version
603  *---------------------------------------------------------------------------*/
C_QUATInverse(const Quaternion * src,Quaternion * inv)604 void C_QUATInverse( const Quaternion *src, Quaternion *inv )
605 {
606     f32 mag, norminv;
607 
608     ASSERTMSG( ( src != 0 ), QUAT_INVERSE_1 );
609     ASSERTMSG( ( inv != 0 ), QUAT_INVERSE_2 );
610 
611     mag = ( src->x*src->x + src->y*src->y + src->z*src->z + src->w*src->w );
612 
613     if ( mag == 0.0f )
614     {
615         mag = 1.0f;
616     }
617 
618     // [Inverse] = [Conjugate] / [Magnitude]
619     norminv = 1.0f / mag;
620     inv->x = -src->x * norminv;
621     inv->y = -src->y * norminv;
622     inv->z = -src->z * norminv;
623     inv->w =  src->w * norminv;
624 }
625 
626 #if !defined(WIN32) && !defined(WIN64)
627 /*---------------------------------------------------------------------------*
628     Paired-Single intrinsics version
629  *---------------------------------------------------------------------------*
630             Note that this performs NO error checking.
631  *---------------------------------------------------------------------------*/
632 
PSQUATInverse(const Quaternion * src,Quaternion * inv)633 void PSQUATInverse( const Quaternion *src, Quaternion *inv )
634 {
635     f32x2    sxy, szw, izz, iww;
636     f32x2    mag, norminv, nninv, nwork0;
637 
638     // load xy
639     //psq_l       sxy, 0(src), 0, 0
640     //sxy[0] = src->x;
641     //sxy[1] = src->y;
642     sxy = __PSQ_LX(src, 0, 0, 0);
643 
644     // mag = [x*x][y*y]
645     //ps_mul      mag, sxy, sxy
646     mag = __PS_MUL(sxy, sxy);
647 
648     // c00 = [0.0F]
649     //ps_sub      c00, c11, c11
650 
651     // load zw
652     //psq_l       szw, 8(src), 0, 0
653     //szw[0] = src->z;
654     //szw[1] = src->w;
655     szw = __PSQ_LX(src, 8, 0, 0);
656 
657     // mag = [x*x+z*z][y*y+w*w]
658     //ps_madd     mag, szw, szw, mag
659     mag = __PS_MADD(szw, szw, mag);
660 
661     // c22 = [2.0F]
662     //ps_add      c22, c11, c11
663 
664     // mag = [x*x+y*y+z*z+w*w][N/A]
665     //ps_sum0     mag, mag, mag, mag
666     mag = __PS_SUM0(mag, mag, mag);
667 
668     // zero check
669     if ( mag[0] == 0.0f )
670     {
671         mag[0] = mag[1] = 1.0f;
672     }
673 
674     // norminv = 1.0F / mag
675     //fres        norminv, mag
676     norminv = __PS_RES(mag);
677 
678     // Newton-Rapson refinment (x1) : E' = 2E-X*E*E
679     //ps_nmsub    nwork0, mag, norminv, c22
680     nwork0 = __PS_NMSUB(mag, norminv, c22);
681 
682     //ps_mul      norminv, norminv, nwork0
683     norminv = __PS_MUL(norminv, nwork0);
684 
685     // nninv = [ -norminv ]
686     //ps_neg      nninv, norminv
687     nninv = __PS_NEG(norminv);
688 
689     // iww = [ w*norminv ][ N/A ]
690     //ps_muls1    iww, norminv, szw
691     iww = __PS_MULS1(norminv, szw);
692 
693     // sxy = [ -x*norminv ][ -y*norminv ]
694     //ps_muls0    sxy, sxy, nninv
695     sxy = __PS_MULS0(sxy, nninv);
696 
697     // store w
698     //psq_st      iww, 12(inv), 1, 0
699     //inv->w = iww[0];
700     __PSQ_STX(inv, 12, iww, 1, 0);
701 
702     // izz = [ -z*norminv ][ N/A ]
703     //ps_muls0    izz, szw, nninv
704     izz = __PS_MULS0(szw, nninv);
705 
706     // store xy
707     //psq_st      sxy, 0(inv), 0, 0
708     //inv->x = sxy[0];
709     //inv->y = sxy[1];
710     __PSQ_STX(inv, 0, sxy, 0, 0);
711 
712     // store z
713     //psq_st      izz, 8(inv), 1, 0
714     //inv->z = izz[0];
715     __PSQ_STX(inv, 8, izz, 1, 0);
716 }
717 #endif
718 
719 /*---------------------------------------------------------------------------*
720   Name:         QUATDivide
721 
722   Description:  Returns the ratio of two quaternions.  Creates a result
723                 r = p/q such that q*r=p (order of multiplication is important).
724 
725   Arguments:    p - left quaternion
726                 q - right quaternion
727                 r - resultant ratio quaterion p/q
728 
729   Returns:      none
730  *---------------------------------------------------------------------------*/
731 /*---------------------------------------------------------------------------*
732     C version
733  *---------------------------------------------------------------------------*/
C_QUATDivide(const Quaternion * p,const Quaternion * q,Quaternion * r)734 void C_QUATDivide( const Quaternion *p, const Quaternion *q, Quaternion *r )
735 {
736     Quaternion qtmp;
737 
738     ASSERTMSG( ( p != 0 ), QUAT_DIVIDE_1 );
739     ASSERTMSG( ( q != 0 ), QUAT_DIVIDE_2 );
740     ASSERTMSG( ( r != 0 ), QUAT_DIVIDE_3 );
741 
742     C_QUATInverse(q, &qtmp);
743     C_QUATMultiply(&qtmp, p, r);
744 }
745 
746 #if !defined(WIN32) && !defined(WIN64)
747 /*---------------------------------------------------------------------------*
748     Paired-Single intrinsics version
749  *---------------------------------------------------------------------------*
750             Note that this performs NO error checking.
751  *---------------------------------------------------------------------------*/
752 
PSQUATDivide(const Quaternion * p,const Quaternion * q,Quaternion * r)753 void PSQUATDivide( const Quaternion *p, const Quaternion *q, Quaternion *r )
754 {
755     Quaternion qtmp;
756 
757     PSQUATInverse(q, &qtmp);
758     PSQUATMultiply(&qtmp, p, r);
759 }
760 #endif
761 
762 /*---------------------------------------------------------------------------*
763   Name:         QUATExp
764 
765   Description:  Exponentiate quaternion (where q.w == 0).
766 
767   Arguments:    q - pure quaternion
768                 r - resultant exponentiate quaternion (an unit quaternion)
769 
770   Returns:      none
771  *---------------------------------------------------------------------------*/
C_QUATExp(const Quaternion * q,Quaternion * r)772 void C_QUATExp( const Quaternion *q, Quaternion *r )
773 {
774     f32 theta, scale;
775 
776     ASSERTMSG( ( q != 0 ), QUAT_EXP_1 );
777     ASSERTMSG( ( r != 0 ), QUAT_EXP_2 );
778 
779     // pure quaternion check
780     ASSERTMSG( ( q->w == 0.0F ), QUAT_EXP_3 );
781 
782     theta = sqrtf( q->x*q->x + q->y*q->y + q->z*q->z );
783     scale = 1.0F;
784 
785     if ( theta > QUAT_EPSILON )
786         scale = (f32)sinf(theta)/theta;
787 
788     r->x = scale * q->x;
789     r->y = scale * q->y;
790     r->z = scale * q->z;
791     r->w = (f32)cosf(theta);
792 }
793 
794 
795 /*---------------------------------------------------------------------------*
796   Name:         QUATLogN
797 
798   Description:  Returns the natural logarithm of a UNIT quaternion
799 
800   Arguments:    q - unit quaternion
801                 r - resultant logarithm quaternion (an pure quaternion)
802 
803   Returns:      none
804  *---------------------------------------------------------------------------*/
C_QUATLogN(const Quaternion * q,Quaternion * r)805 void C_QUATLogN( const Quaternion *q, Quaternion *r )
806 {
807     f32 theta,scale;
808 
809     ASSERTMSG( ( q != 0 ), QUAT_LOGN_1 );
810     ASSERTMSG( ( r != 0 ), QUAT_LOGN_2 );
811 
812     scale = q->x*q->x + q->y*q->y + q->z*q->z;
813 
814     // unit quaternion check
815 #ifdef _DEBUG
816     {
817         f32 mag;
818         mag = scale + q->z*q->z;
819         if ( mag < 1.0F - QUAT_EPSILON || mag > 1.0F + QUAT_EPSILON )
820         {
821             ASSERT( QUAT_LOGN_3 );
822         }
823     }
824 #endif
825 
826     scale = sqrtf(scale);
827     theta = atan2f( scale, q->w );
828 
829     if ( scale > 0.0F )
830         scale = theta/scale;
831 
832     r->x = scale*q->x;
833     r->y = scale*q->y;
834     r->z = scale*q->z;
835     r->w = 0.0F;
836 
837 }
838 
839 
840 /*---------------------------------------------------------------------------*
841   Name:         QUATMakeClosest
842 
843   Description:  Modify q so it is on the same side of the hypersphere as qto
844 
845   Arguments:    q   - quaternion
846                 qto - quaternion to be close to
847                 r   - resultant modified quaternion
848 
849   Returns:      NONE
850  *---------------------------------------------------------------------------*/
C_QUATMakeClosest(const Quaternion * q,const Quaternion * qto,Quaternion * r)851 void C_QUATMakeClosest( const Quaternion *q, const Quaternion *qto, Quaternion *r )
852 {
853     f32 dot;
854 
855     ASSERTMSG( ( q   != 0 ), QUAT_MAKECLOSEST_1 );
856     ASSERTMSG( ( qto != 0 ), QUAT_MAKECLOSEST_2 );
857     ASSERTMSG( ( r   != 0 ), QUAT_MAKECLOSEST_3 );
858 
859     dot =  q->x*qto->x + q->y*qto->y + q->z*qto->z + q->w*qto->w;
860 
861     if ( dot < 0.0f )
862     {
863         r->x = -q->x;
864         r->y = -q->y;
865         r->z = -q->z;
866         r->w = -q->w;
867     }
868     else
869     {
870         *r = *q;
871     }
872 }
873 
874 
875 /*---------------------------------------------------------------------------*
876   Name:         QUATRotAxisRad
877 
878   Description:  Returns rotation quaternion about arbitrary axis.
879 
880   Arguments:    r    - resultant rotation quaternion
881                 axis - rotation axis
882                 rad  - rotation angle in radians
883 
884   Returns:      NONE
885  *---------------------------------------------------------------------------*/
C_QUATRotAxisRad(Quaternion * r,const Vec * axis,f32 rad)886 void C_QUATRotAxisRad( Quaternion *r, const Vec *axis, f32 rad )
887 {
888     f32 half, sh, ch;
889     Vec nAxis;
890 
891     ASSERTMSG( ( r    != 0 ), QUAT_ROTAXISRAD_1 );
892     ASSERTMSG( ( axis != 0 ), QUAT_ROTAXISRAD_2 );
893 
894     VECNormalize(axis, &nAxis);
895 
896     half = rad * 0.50F;
897     sh   = sinf(half);
898     ch   = cosf(half);
899 
900     r->x = sh * nAxis.x;
901     r->y = sh * nAxis.y;
902     r->z = sh * nAxis.z;
903     r->w = ch;
904 }
905 
906 
907 /*---------------------------------------------------------------------------*
908   Name:         QUATMtx
909 
910   Description:  Converts a matrix to a unit quaternion.
911 
912   Arguments:    r   - result quaternion
913                 m   - the matrix
914 
915   Returns:      NONE
916  *---------------------------------------------------------------------------*/
C_QUATMtx(Quaternion * r,MTX_CONST Mtx m)917 void C_QUATMtx( Quaternion *r, MTX_CONST Mtx m )
918 {
919     f32 tr,s;
920     s32 i,j,k;
921     s32 nxt[3] = {1,2,0};
922     f32 q[3];
923 
924     ASSERTMSG( ( r != 0 ), QUAT_MTX_1 );
925     ASSERTMSG( ( m != 0 ), QUAT_MTX_2 );
926 
927     tr = m[0][0] + m[1][1] + m[2][2];
928     if( tr > 0.0f )
929     {
930         s = (f32)sqrtf(tr + 1.0f);
931         r->w = s * 0.5f;
932         s = 0.5f / s;
933         r->x = (m[2][1] - m[1][2]) * s;
934         r->y = (m[0][2] - m[2][0]) * s;
935         r->z = (m[1][0] - m[0][1]) * s;
936     }
937     else
938     {
939         i = 0;
940         if (m[1][1] > m[0][0]) i = 1;
941         if (m[2][2] > m[i][i]) i = 2;
942         j = nxt[i];
943         k = nxt[j];
944         s = (f32)sqrtf( (m[i][i] - (m[j][j] + m[k][k])) + 1.0f );
945         q[i] = s * 0.5f;
946 
947         if (s!=0.0f)
948             s = 0.5f / s;
949 
950         r->w = (m[k][j] - m[j][k]) * s;
951         q[j] = (m[i][j] + m[j][i]) * s;
952         q[k] = (m[i][k] + m[k][i]) * s;
953 
954         r->x = q[0];
955         r->y = q[1];
956         r->z = q[2];
957     }
958 }
959 
960 
961 /*---------------------------------------------------------------------------*
962   Name:         QUATLerp
963 
964   Description:  Linear interpolation of two quaternions.
965 
966   Arguments:    p - first quaternion
967                 q - second quaternion
968                 r - resultant interpolated quaternion
969                 t - interpolation parameter
970 
971   Returns:      NONE
972  *---------------------------------------------------------------------------*/
C_QUATLerp(const Quaternion * p,const Quaternion * q,Quaternion * r,f32 t)973 void C_QUATLerp( const Quaternion *p, const Quaternion *q, Quaternion *r, f32 t )
974 {
975     ASSERTMSG( ( p != 0 ), QUAT_LERP_1 );
976     ASSERTMSG( ( q != 0 ), QUAT_LERP_2 );
977     ASSERTMSG( ( r != 0 ), QUAT_LERP_3 );
978 
979     r->x = t * ( q->x - p->x ) + p->x;
980     r->y = t * ( q->y - p->y ) + p->y;
981     r->z = t * ( q->z - p->z ) + p->z;
982     r->w = t * ( q->w - p->w ) + p->w;
983 }
984 
985 
986 /*---------------------------------------------------------------------------*
987   Name:         QUATSlerp
988 
989   Description:  Spherical linear interpolation of two quaternions.
990 
991   Arguments:    p - first quaternion
992                 q - second quaternion
993                 r - resultant interpolated quaternion
994                 t - interpolation parameter
995 
996   Returns:      NONE
997  *---------------------------------------------------------------------------*/
C_QUATSlerp(const Quaternion * p,const Quaternion * q,Quaternion * r,f32 t)998 void C_QUATSlerp( const Quaternion *p, const Quaternion *q, Quaternion *r, f32 t )
999 {
1000     f32 theta, sin_th, cos_th, tp, tq;
1001 
1002     ASSERTMSG( ( p != 0 ), QUAT_SLERP_1 );
1003     ASSERTMSG( ( q != 0 ), QUAT_SLERP_2 );
1004     ASSERTMSG( ( r != 0 ), QUAT_SLERP_3 );
1005 
1006     cos_th = p->x * q->x + p->y * q->y + p->z * q->z + p->w * q->w;
1007     tq     = 1.0F;
1008 
1009     if ( cos_th < 0.0F )
1010     {
1011         cos_th = -cos_th;
1012         tq     = -tq;
1013     }
1014 
1015     if ( cos_th <= 1.0F - QUAT_EPSILON )
1016     {
1017         theta  = acosf(cos_th);
1018         sin_th = sinf(theta);
1019         tp     = sinf((1.0F - t) * theta) / sin_th;
1020         tq    *= sinf( t * theta ) / sin_th;
1021     }
1022     else
1023     {
1024         // cos(theta) is close to 1.0F -> linear
1025         tp = 1.0F - t;
1026         tq = tq * t;
1027     }
1028 
1029     r->x = tp * p->x + tq * q->x;
1030     r->y = tp * p->y + tq * q->y;
1031     r->z = tp * p->z + tq * q->z;
1032     r->w = tp * p->w + tq * q->w;
1033 
1034 }
1035 
1036 
1037 /*---------------------------------------------------------------------------*
1038   Name:         QUATSquad
1039 
1040   Description:  Spherical cubic quadrangle interpolation of two quaternions
1041                 with derrived inner-quadrangle quaternions.
1042 
1043                 This will be used with the function QUATCompA().
1044 
1045   Arguments:    p - first quaternion
1046                 a - derrived inner-quadrangle quaternion
1047                 b - derrived inner-quadrangle quaternion
1048                 q - second quaternion
1049                 r - resultant interpolated quaternion
1050                 t - interpolation parameter
1051 
1052   Returns:      NONE
1053  *---------------------------------------------------------------------------*/
C_QUATSquad(const Quaternion * p,const Quaternion * a,const Quaternion * b,const Quaternion * q,Quaternion * r,f32 t)1054 void C_QUATSquad( const Quaternion *p, const Quaternion *a, const Quaternion *b,
1055                   const Quaternion *q, Quaternion *r, f32 t )
1056 {
1057     Quaternion pq, ab;
1058     f32 t2;
1059 
1060     ASSERTMSG( ( p != 0 ), QUAT_SQUAD_1 );
1061     ASSERTMSG( ( a != 0 ), QUAT_SQUAD_2 );
1062     ASSERTMSG( ( b != 0 ), QUAT_SQUAD_3 );
1063     ASSERTMSG( ( q != 0 ), QUAT_SQUAD_4 );
1064     ASSERTMSG( ( r != 0 ), QUAT_SQUAD_5 );
1065 
1066     t2 = 2 * t * ( 1.0F - t );
1067     C_QUATSlerp(p, q, &pq, t);
1068     C_QUATSlerp(a, b, &ab, t);
1069     C_QUATSlerp(&pq, &ab, r, t2);
1070 }
1071 
1072 
1073 /*---------------------------------------------------------------------------*
1074   Name:         QUATCompA
1075 
1076   Description:  Compute a, the term used in Boehm-type interpolation
1077                 a[n] = q[n]* qexp(-(1/4)*( ln(qinv(q[n])*q[n+1]) +
1078                                            ln( qinv(q[n])*q[n-1] )))
1079 
1080   Arguments:    qprev - previous quaternion
1081                 q     - current quaternion
1082                 qnext - next quaternion
1083                 a     - result quaternion A
1084 
1085   Returns:      none
1086 ---------------------------------------------------------------------------*/
C_QUATCompA(const Quaternion * qprev,const Quaternion * q,const Quaternion * qnext,Quaternion * a)1087 void C_QUATCompA( const Quaternion *qprev, const Quaternion *q, const Quaternion *qnext, Quaternion *a )
1088 {
1089     Quaternion qm, qp, lqm, lqp, qpqm, exq;
1090 
1091     ASSERTMSG( ( qprev != 0 ), QUAT_COMPA_1 );
1092     ASSERTMSG( ( q     != 0 ), QUAT_COMPA_2 );
1093     ASSERTMSG( ( qnext != 0 ), QUAT_COMPA_3 );
1094     ASSERTMSG( ( a     != 0 ), QUAT_COMPA_4 );
1095 
1096     C_QUATDivide(qprev, q, &qm);
1097     C_QUATLogN(&qm, &lqm);
1098     C_QUATDivide(qnext, q, &qp);
1099     C_QUATLogN(&qp, &lqp);
1100 
1101     C_QUATAdd(&lqp, &lqm, &qpqm);
1102     C_QUATScale(&qpqm, &qpqm, -0.25F);
1103 
1104     C_QUATExp(&qpqm, &exq);
1105     C_QUATMultiply(q, &exq, a);
1106 }
1107 
1108 /*---------------------------------------------------------------------*
1109 
1110 Name:            MTXQuat
1111 
1112 Description:     sets a rotation matrix from a quaternion.
1113 
1114 
1115 Arguments:       m        matrix to be set
1116 
1117                  q        ptr to quaternion.
1118 
1119 Return:          none
1120 
1121  *---------------------------------------------------------------------*/
1122 /*---------------------------------------------------------------------*
1123     C version
1124  *---------------------------------------------------------------------*/
C_MTXQuat(Mtx m,const Quaternion * q)1125 void C_MTXQuat ( Mtx m, const Quaternion *q )
1126 {
1127     f32 s,xs,ys,zs,wx,wy,wz,xx,xy,xz,yy,yz,zz;
1128 
1129     ASSERTMSG( (m != 0),                         MTX_QUAT_1     );
1130     ASSERTMSG( (q != 0),                         MTX_QUAT_2     );
1131     ASSERTMSG( ( q->x || q->y || q->z || q->w ), MTX_QUAT_3     );
1132 
1133     s = 2.0f / ( (q->x * q->x) + (q->y * q->y) + (q->z * q->z) + (q->w * q->w) );
1134 
1135     xs = q->x *  s;     ys = q->y *  s;  zs = q->z *  s;
1136     wx = q->w * xs;     wy = q->w * ys;  wz = q->w * zs;
1137     xx = q->x * xs;     xy = q->x * ys;  xz = q->x * zs;
1138     yy = q->y * ys;     yz = q->y * zs;  zz = q->z * zs;
1139 
1140     m[0][0] = 1.0f - (yy + zz);
1141     m[0][1] = xy   - wz;
1142     m[0][2] = xz   + wy;
1143     m[0][3] = 0.0f;
1144 
1145     m[1][0] = xy   + wz;
1146     m[1][1] = 1.0f - (xx + zz);
1147     m[1][2] = yz   - wx;
1148     m[1][3] = 0.0f;
1149 
1150     m[2][0] = xz   - wy;
1151     m[2][1] = yz   + wx;
1152     m[2][2] = 1.0f - (xx + yy);
1153     m[2][3] = 0.0f;
1154 }
1155 
1156 #if !defined(WIN32) && !defined(WIN64)
1157 /*---------------------------------------------------------------------*
1158     Paired-Single intrinsics version
1159  *---------------------------------------------------------------------*
1160                 Note that this performs NO error checking.
1161  *---------------------------------------------------------------------*/
PSMTXQuat(Mtx m,const Quaternion * q)1162 void PSMTXQuat ( Mtx m, const Quaternion *q )
1163 {
1164     f32x2    scale;
1165     f32x2    V_XY;
1166     f32x2    V_ZW;
1167     f32x2    V_YX;
1168 
1169     f32x2    D0;
1170     f32x2    D1;
1171     f32x2    tmp2, tmp3, tmp4; //tmp1,
1172     f32x2    tmp5, tmp6, tmp7, tmp8, tmp9;
1173 
1174     // V_XY = [qx][qy] : LOAD
1175     //V_XY[0] = q->x;
1176     //V_XY[1] = q->y;
1177     V_XY = __PSQ_LX(q, 0, 0, 0);
1178 
1179     // V_ZW = [qz][qw] : LOAD
1180     //V_ZW[0] = q->z;
1181     //V_ZW[1] = q->w;
1182     V_ZW = __PSQ_LX(q, 8, 0, 0);
1183 
1184     // tmp2 = [qx*qx][qy*qy]
1185     tmp2 = __PS_MUL(V_XY, V_XY);
1186 
1187     // V_YX = [qy][qx]
1188     V_YX = __PS_MERGE10(V_XY, V_XY);
1189 
1190     // tmp4 = [qx*qx+qz*qz][qy*qy+qw*qw]
1191     tmp4 = __PS_MADD(V_ZW, V_ZW, tmp2);
1192 
1193     // tmp3 = [qz*qz][qw*qw]
1194     tmp3 = __PS_MUL(V_ZW, V_ZW);
1195 
1196     // scale = [qx*qx+qy*qy+qz*qz+qw*qw]
1197     scale = __PS_SUM0(tmp4, tmp4, tmp4);
1198     scale[1] = scale[0];
1199 
1200     // tmp7 = [qy*qw][qx*qw]
1201     tmp7 = __PS_MULS1(V_YX, V_ZW);
1202 
1203     // Newton-Rapson refinment (1/X) : E' = 2E-X*E*E
1204     // tmp9 = [E = Est.(1/X)]
1205     tmp9 = __PS_RES(scale);
1206 
1207     // tmp4 = [qx*qx+qz*qz][qy*qy+qz*qz]
1208     tmp4 = __PS_SUM1(tmp3, tmp4, tmp2);
1209 
1210     // scale = [2-X*E]
1211     scale = __PS_NMSUB(scale, tmp9, c22);
1212 
1213     // tmp6 = [qz*qw][?]
1214     tmp6 = __PS_MULS1(V_ZW, V_ZW);
1215 
1216     // scale = [E(2-scale*E) = E']
1217     scale = __PS_MUL(tmp9, scale);
1218 
1219     // tmp2 = [qx*qx+qy*qy]
1220     tmp2 = __PS_SUM0(tmp2, tmp2, tmp2);
1221 
1222     // scale = [s = 2E' = 2.0F/(qx*qx+qy*qy+qz*qz+qw*qw)]
1223     scale = __PS_MUL(scale, c22);
1224 
1225     // tmp8 = [qx*qy+qz*qw][?]
1226     tmp8 = __PS_MADD(V_XY, V_YX, tmp6);
1227 
1228     // tmp6 = [qx*qy-qz*qw][?]
1229     tmp6 = __PS_MSUB(V_XY, V_YX, tmp6);
1230 
1231     // c00 [m03] : STORE
1232     //m[0][3] = c00[0];
1233     __PSQ_STX(m, 12, c00, 1, 0);
1234 
1235     // tmp2 = [1-s(qx*qx+qy*qy)]   : [m22]
1236     tmp2 = __PS_NMSUB(tmp2, scale, c11);
1237 
1238     // tmp4 = [1-s(qx*qx+qz*qz)][1-s(qy*qy+qz*qz)] : [m11][m00]
1239     tmp4 = __PS_NMSUB(tmp4, scale, c11);
1240 
1241     // c00 [m23] : STORE
1242     //m[2][3] = c00[0];
1243     __PSQ_STX(m, 44, c00, 1, 0);
1244 
1245     // tmp8 = [s(qx*qy+qz*qw)][?]  : [m10]
1246     tmp8 = __PS_MUL(tmp8, scale);
1247 
1248     // tmp6 = [s(qx*qy-qz*qw)][?]  : [m01]
1249     tmp6  = __PS_MUL(tmp6, scale);
1250 
1251     // tmp2 [m22] : STORE
1252     //m[2][2] = tmp2[0];
1253     __PSQ_STX(m, 40, tmp2, 1, 0);
1254 
1255     // tmp5 = [qx*qz+qy*qw][qy*qz+qx*qw]
1256     tmp5 = __PS_MADDS0(V_XY, V_ZW, tmp7);
1257 
1258     // D1 = [m10][m11]
1259     D1 = __PS_MERGE00(tmp8, tmp4);
1260 
1261     // tmp7 = [qx*qz-qy*qw][qy*qz-qx*qw]
1262     tmp7 = __PS_NMSUB(tmp7, c22, tmp5);
1263 
1264     // D1 [m10][m11] : STORE
1265     //m[1][0] = D1[0];
1266     //m[1][1] = D1[1];
1267     __PSQ_STX(m, 16, D1, 0, 0);
1268 
1269     // D0 = [m00][m01]
1270     D0 = __PS_MERGE10(tmp4, tmp6);
1271 
1272     // tmp5 = [s(qx*qz+qy*qw)][s(qy*qz+qx*qw)] : [m02][m21]
1273     tmp5 = __PS_MUL(tmp5, scale);
1274 
1275     // tmp7 = [s(qx*qz-qy*qw)][s(qy*qz-qx*qw)] : [m20][m12]
1276     tmp7 = __PS_MUL(tmp7, scale);
1277 
1278     // D0 [m00][m01] : STORE
1279     //m[0][0] = D0[0];
1280     //m[0][1] = D0[1];
1281     __PSQ_STX(m, 0, D0, 0, 0);
1282 
1283     // tmp5 [m02] : STORE
1284     //m[0][2] = tmp5[0];
1285     __PSQ_STX(m, 8, tmp5, 1, 0);
1286 
1287     // tmp3 = [m12][m13]
1288     D1 = __PS_MERGE10(tmp7, c00);
1289 
1290     // tmp9 = [m20][m21]
1291     D0 = __PS_MERGE01(tmp7, tmp5);
1292 
1293     // tmp3 [m12][m13] : STORE
1294     //m[1][2] = D1[0];
1295     //m[1][3] = D1[1];
1296     __PSQ_STX(m, 24, D1, 0, 0);
1297 
1298     // tmp9 [m20][m21] : STORE
1299     //m[2][0] = D0[0];
1300     //m[2][1] = D0[1];
1301     __PSQ_STX(m, 32, D0, 0, 0);
1302 }
1303 #endif
1304 
1305 /*===========================================================================*/
1306