1/*---------------------------------------------------------------------------*
2  Project:  Horizon
3  File:     math_Matrix44.ipp
4  Copyright (C)2009-2010 Nintendo Co., Ltd.  All rights reserved.
5  These coded instructions, statements, and computer programs contain
6  proprietary information of Nintendo of America Inc. and/or Nintendo
7  Company Ltd., and are protected by Federal copyright law. They may
8  not be disclosed to third parties or copied or duplicated in any form,
9  in whole or in part, without the prior written consent of Nintendo.
10  $Revision: 13623 $
11 *---------------------------------------------------------------------------
12
13
14*/
15
16#include <cmath>
17#include <nn/math/math_Vector3.h>
18
19namespace nn {
20namespace math {
21namespace ARMv6 {
22/* Please see man pages for details
23
24
25*/
26
27/*
28
29
30
31
32
33
34
35*/
36NN_MATH_INLINE VEC4*
37VEC3TransformC(VEC4* pOut, const MTX44* pM, const VEC3* pV)
38{
39    VEC4 tmp;
40    tmp.x = pM->f._00 * pV->x + pM->f._01 * pV->y + pM->f._02 * pV->z + pM->f._03;
41    tmp.y = pM->f._10 * pV->x + pM->f._11 * pV->y + pM->f._12 * pV->z + pM->f._13;
42    tmp.z = pM->f._20 * pV->x + pM->f._21 * pV->y + pM->f._22 * pV->z + pM->f._23;
43    tmp.w = pM->f._30 * pV->x + pM->f._31 * pV->y + pM->f._32 * pV->z + pM->f._33;
44
45    pOut->x = tmp.x;
46    pOut->y = tmp.y;
47    pOut->z = tmp.z;
48    pOut->w = tmp.w;
49
50    return pOut;
51}
52
53/*
54
55*/
56
57/* ------------------------------------------------------------------------
58        MTX44
59   ------------------------------------------------------------------------ */
60
61/* Please see man pages for details
62
63
64*/
65
66
67
68/*
69
70
71
72
73
74
75*/
76NN_MATH_INLINE MTX44*
77MTX44CopyC(MTX44* pOut, const MTX44* p)
78{
79    if (pOut != p)
80    {
81        *pOut = *p;
82    }
83
84    return pOut;
85}
86
87/*
88
89
90
91
92
93
94
95*/
96NN_MATH_INLINE MTX44*
97MTX44AddC(MTX44* pOut, const MTX44* p1, const MTX44* p2)
98{
99    pOut->f._00 = p1->f._00 + p2->f._00;
100    pOut->f._01 = p1->f._01 + p2->f._01;
101    pOut->f._02 = p1->f._02 + p2->f._02;
102    pOut->f._03 = p1->f._03 + p2->f._03;
103
104    pOut->f._10 = p1->f._10 + p2->f._10;
105    pOut->f._11 = p1->f._11 + p2->f._11;
106    pOut->f._12 = p1->f._12 + p2->f._12;
107    pOut->f._13 = p1->f._13 + p2->f._13;
108
109    pOut->f._20 = p1->f._20 + p2->f._20;
110    pOut->f._21 = p1->f._21 + p2->f._21;
111    pOut->f._22 = p1->f._22 + p2->f._22;
112    pOut->f._23 = p1->f._23 + p2->f._23;
113
114    pOut->f._30 = p1->f._30 + p2->f._30;
115    pOut->f._31 = p1->f._31 + p2->f._31;
116    pOut->f._32 = p1->f._32 + p2->f._32;
117    pOut->f._33 = p1->f._33 + p2->f._33;
118
119    return pOut;
120}
121
122
123
124/*
125
126
127
128
129
130
131
132*/
133NN_MATH_INLINE MTX44*
134MTX44MultC(MTX44* pOut, const MTX44* p, f32 f)
135{
136    pOut->f._00 = p->f._00 * f;
137    pOut->f._01 = p->f._01 * f;
138    pOut->f._02 = p->f._02 * f;
139    pOut->f._03 = p->f._03 * f;
140
141    pOut->f._10 = p->f._10 * f;
142    pOut->f._11 = p->f._11 * f;
143    pOut->f._12 = p->f._12 * f;
144    pOut->f._13 = p->f._13 * f;
145
146    pOut->f._20 = p->f._20 * f;
147    pOut->f._21 = p->f._21 * f;
148    pOut->f._22 = p->f._22 * f;
149    pOut->f._23 = p->f._23 * f;
150
151    pOut->f._30 = p->f._30 * f;
152    pOut->f._31 = p->f._31 * f;
153    pOut->f._32 = p->f._32 * f;
154    pOut->f._33 = p->f._33 * f;
155
156    return pOut;
157}
158
159
160/*
161
162
163
164
165
166
167*/
168NN_MATH_INLINE MTX44*
169MTX44TransposeC(MTX44* pOut, const MTX44 *pSrc)
170{
171    MTX44 tmp;
172    const MTX44 *pMtx;
173
174    if (pOut != pSrc)
175    {
176        pMtx = pSrc;
177        pOut->f._00 = pSrc->f._00;
178        pOut->f._11 = pSrc->f._11;
179        pOut->f._22 = pSrc->f._22;
180        pOut->f._33 = pSrc->f._33;
181    }
182    else
183    {
184        pMtx = &tmp;
185        tmp.f._01 = pSrc->f._01;
186        tmp.f._02 = pSrc->f._02;
187        tmp.f._03 = pSrc->f._03;
188        tmp.f._12 = pSrc->f._12;
189        tmp.f._13 = pSrc->f._13;
190        tmp.f._23 = pSrc->f._23;
191    }
192
193    pOut->f._01 = pSrc->f._10;
194    pOut->f._02 = pSrc->f._20;
195    pOut->f._03 = pSrc->f._30;
196    pOut->f._12 = pSrc->f._21;
197    pOut->f._13 = pSrc->f._31;
198    pOut->f._23 = pSrc->f._32;
199
200    pOut->f._10 = pMtx->f._01;
201    pOut->f._20 = pMtx->f._02;
202    pOut->f._30 = pMtx->f._03;
203    pOut->f._21 = pMtx->f._12;
204    pOut->f._31 = pMtx->f._13;
205    pOut->f._32 = pMtx->f._23;
206
207    return pOut;
208}
209
210
211/*
212
213
214
215
216
217
218
219
220
221*/
222NN_MATH_INLINE MTX44*
223MTX44PerspectiveRadC(MTX44* pOut, f32 fovy, f32 aspect, f32 n, f32 f)
224{
225    NN_NULL_ASSERT(pOut);
226
227    f32 (*const m)[4] = pOut->m;
228
229    // find the cotangent of half the (YZ) field of view
230
231    const f32 angle = fovy * 0.5f;
232
233    const f32 cot = 1.0f / ::std::tanf(angle);
234
235    m[0][0] =  cot / aspect;
236    m[0][1] =  0.0f;
237    m[0][2] =  0.0f;
238    m[0][3] =  0.0f;
239
240    m[1][0] =  0.0f;
241    m[1][1] =   cot;
242    m[1][2] =  0.0f;
243    m[1][3] =  0.0f;
244
245    m[2][0] =  0.0f;
246    m[2][1] =  0.0f;
247
248    const f32 tmp = 1.0f / (f - n);
249    m[2][2] = f * tmp;
250    m[2][3] = f * n * tmp;
251
252    m[3][0] =  0.0f;
253    m[3][1] =  0.0f;
254    m[3][2] = -1.0f;
255    m[3][3] =  0.0f;
256
257    return pOut;
258}
259NN_MATH_INLINE MTX44*
260MTX44PerspectiveRadC_FAST(MTX44* pOut, f32 fovy, f32 aspect, f32 n, f32 f)
261{
262    NN_NULL_ASSERT(pOut);
263
264    f32 (*const m)[4] = pOut->m;
265
266    // find the cotangent of half the (YZ) field of view
267
268    const f32 angle = fovy * 0.5f;
269#if (MTX44PERSPECTIVERAD_CONFIG == D_FAST_C_ALGO)
270    f32 sin, cos;
271    SinCosFIdx(&sin, &cos, NN_MATH_RAD_TO_FIDX(angle));
272    const f32 cot = cos/sin;
273#else
274    const f32 cot = 1.0f / ::std::tanf(angle);
275#endif
276    const f32 tmp = 1.0f / (f - n);
277
278    register f32 m00, m11, m22, m23;
279
280    m00 =  cot / aspect;
281    m11 =  cot;
282    m22 = f * tmp;
283    m23 = f * n * tmp;
284
285    m[0][1] =  0.0f;
286    m[0][2] =  0.0f;
287    m[0][3] =  0.0f;
288    m[1][0] =  0.0f;
289    m[1][2] =  0.0f;
290    m[1][3] =  0.0f;
291    m[2][0] =  0.0f;
292    m[2][1] =  0.0f;
293    m[3][0] =  0.0f;
294    m[3][1] =  0.0f;
295    m[3][2] = -1.0f;
296    m[3][3] =  0.0f;
297
298    m[0][0] = m00;
299    m[1][1] = m11;
300    m[2][2] = m22;
301    m[2][3] = m23;
302
303
304    return pOut;
305}
306
307/*
308
309
310
311
312
313
314
315
316
317
318
319
320*/
321NN_MATH_INLINE MTX44*
322MTX44FrustumC(MTX44* pOut, f32 l, f32 r, f32 b, f32 t, f32 n, f32 f)
323{
324    NN_NULL_ASSERT( pOut );
325
326    // Note: Be careful about "l" vs. "1" below!!!
327
328    f32 (*const m)[4] = pOut->m;
329    f32 tmp     =  1.0f / (r - l);
330    m[0][0] =  (2*n) * tmp;
331    m[0][1] =  0.0f;
332    m[0][2] =  (r + l) * tmp;
333    m[0][3] =  0.0f;
334
335    tmp     =  1.0f / (t - b);
336    m[1][0] =  0.0f;
337    m[1][1] =  (2*n) * tmp;
338    m[1][2] =  (t + b) * tmp;
339    m[1][3] =  0.0f;
340
341    m[2][0] =  0.0f;
342    m[2][1] =  0.0f;
343
344    tmp = 1.0f / (f - n);
345
346    m[2][2] = f * tmp;
347    m[2][3] = f * n * tmp;
348
349    m[3][0] =  0.0f;
350    m[3][1] =  0.0f;
351    m[3][2] = -1.0f;
352    m[3][3] =  0.0f;
353
354    return pOut;
355}
356NN_MATH_INLINE MTX44*
357MTX44FrustumC_FAST(MTX44* pOut, f32 l, f32 r, f32 b, f32 t, f32 n, f32 f)
358{
359    NN_NULL_ASSERT( pOut );
360
361    // Note: Be careful about "l" vs. "1" below!!!
362
363    f32 (*const m)[4] = pOut->m;
364    f32 tmp1 =  1.0f / (r - l);
365    f32 tmp3 =  1.0f / (f - n);
366    f32 tmp2 =  1.0f / (t - b);
367
368    register f32 m00, m02, m11, m12, m22, m23;
369
370    m00 =  (2*n) * tmp1;
371    m02 =  (r + l) * tmp1;
372
373    m11 =  (2*n) * tmp2;
374    m12 =  (t + b) * tmp2;
375
376    m22 = f * tmp3;
377    m23 = f * n * tmp3;
378
379    m[0][1] =  0.0f;
380    m[0][3] =  0.0f;
381
382    m[1][0] =  0.0f;
383    m[1][3] =  0.0f;
384
385    m[2][0] =  0.0f;
386    m[2][1] =  0.0f;
387
388    m[3][0] =  0.0f;
389    m[3][1] =  0.0f;
390    m[3][2] = -1.0f;
391    m[3][3] =  0.0f;
392
393    m[0][0] =  m00;
394    m[0][2] =  m02;
395
396    m[1][1] =  m11;
397    m[1][2] =  m12;
398
399    m[2][2] =  m22;
400    m[2][3] =  m23;
401
402    return pOut;
403}
404
405
406/*
407
408
409
410
411
412
413
414
415
416
417
418
419*/
420NN_MATH_INLINE MTX44*
421MTX44OrthoC(MTX44* pOut, f32 l, f32 r, f32 b, f32 t, f32 n, f32 f)
422{
423    NN_NULL_ASSERT( pOut );
424
425    // Note: Be careful about "l" vs. "1" below!!!
426
427    f32 (*const m)[4] = pOut->m;
428    f32 tmp     =  1.0f / (r - l);
429    m[0][0] =  2.0f * tmp;
430    m[0][1] =  0.0f;
431    m[0][2] =  0.0f;
432    m[0][3] = -(r + l) * tmp;
433
434    tmp     =  1.0f / (t - b);
435    m[1][0] =  0.0f;
436    m[1][1] =  2.0f * tmp;
437    m[1][2] =  0.0f;
438    m[1][3] = -(t + b) * tmp;
439
440    m[2][0] =  0.0f;
441    m[2][1] =  0.0f;
442
443    tmp     =  1.0f / (f - n);
444
445    m[2][2] = tmp;
446    m[2][3] = n * tmp;
447
448    m[3][0] =  0.0f;
449    m[3][1] =  0.0f;
450    m[3][2] =  0.0f;
451    m[3][3] =  1.0f;
452
453    return pOut;
454}
455NN_MATH_INLINE MTX44*
456MTX44OrthoC_FAST(MTX44* pOut, f32 l, f32 r, f32 b, f32 t, f32 n, f32 f)
457{
458    NN_NULL_ASSERT( pOut );
459
460    // Note: Be careful about "l" vs. "1" below!!!
461
462    f32 (*const m)[4] = pOut->m;
463    register f32 tmp1     =  1.0f / (r - l);
464    register f32 tmp2     =  1.0f / (t - b);
465    register f32 tmp3     =  1.0f / (f - n);
466    register f32 m00, m03, m11, m13, m22, m23;
467
468    m00 =  2.0f * tmp1;
469    m03 = -(r + l) * tmp1;
470
471    m11 =  2.0f * tmp2;
472    m13 = -(t + b) * tmp2;
473
474    m22 = tmp3;
475    m23 = n * tmp3;
476
477    m[0][1] =  0.0f;
478    m[0][2] =  0.0f;
479
480    m[1][0] =  0.0f;
481    m[1][2] =  0.0f;
482
483    m[2][0] =  0.0f;
484    m[2][1] =  0.0f;
485
486    m[3][0] =  0.0f;
487    m[3][1] =  0.0f;
488    m[3][2] =  0.0f;
489    m[3][3] =  1.0f;
490
491    m[0][0] =  m00;
492    m[0][3] =  m03;
493    m[1][1] =  m11;
494    m[1][3] =  m13;
495    m[2][2] =  m22;
496    m[2][3] =  m23;
497
498    return pOut;
499}
500
501
502    /*---------------------------------------------------------------------------*
503          Description:  Rotates projection matrix in the direction of the screen.
504      Arguments:    pOut    Pointer to the matrix to rotate
505                    pivot
506      Returns:
507     *---------------------------------------------------------------------------
508
509*/
510    inline MTX44*
511    MTX44PivotC( MTX44* pOut, PivotDirection pivot )
512    {
513        // TODO: Need to optimize the routine.
514
515        const f32 PIVOT_ROTATION_SIN_COS[ PIVOT_NUM ][ 2 ] =
516        {
517        #ifdef NN_PLATFORM_CTR
518            { 0.0f,  1.0f }, // NONE
519            { -1.0f, 0.0f }, // TO_UP
520            { 0.0f, -1.0f }, // TO_RIGHT
521            { 1.0f,  0.0f }, // TO_BOTTOM
522            { 0.0f,  1.0f }, // TO_LEFT
523        #else
524            { 0.0f,  1.0f }, // NONE
525            { 0.0f,  1.0f }, // TO_UP
526            { -1.0f, 0.0f }, // TO_RIGHT
527            { 0.0f, -1.0f }, // TO_BOTTOM
528            { 1.0f,  0.0f }, // TO_LEFT
529        #endif
530        };
531
532        if ( pivot == PIVOT_NONE )
533        {
534            return pOut;
535        }
536
537        f32 sin = PIVOT_ROTATION_SIN_COS[ pivot ][ 0 ];
538        f32 cos = PIVOT_ROTATION_SIN_COS[ pivot ][ 1 ];
539
540        f32 (*const m)[4] = pOut->m;
541
542        if ( sin == 0.0f )
543        {
544            m[0][0] = cos * m[0][0];
545            m[0][1] = cos * m[0][1];
546            m[0][2] = cos * m[0][2];
547            m[0][3] = cos * m[0][3];
548
549            m[1][0] = cos * m[1][0];
550            m[1][1] = cos * m[1][1];
551            m[1][2] = cos * m[1][2];
552            m[1][3] = cos * m[1][3];
553        }
554        else // if ( cos == 0.0f )
555        {
556            f32 tmp = m[0][0];
557            m[0][0] = -sin * m[1][0];
558            m[1][0] = sin * tmp;
559
560            tmp = m[0][1];
561            m[0][1] = -sin * m[1][1];
562            m[1][1] = sin * tmp;
563
564            tmp = m[0][2];
565            m[0][2] = -sin * m[1][2];
566            m[1][2] = sin * tmp;
567
568            tmp = m[0][3];
569            m[0][3] = -sin * m[1][3];
570            m[1][3] = sin * tmp;
571        }
572
573        return pOut;
574
575    }
576    inline MTX44*
577    MTX44PivotC_FAST( MTX44* pOut, PivotDirection pivot )
578    {
579        f32 (*const m)[4] = pOut->m;
580        #ifdef NN_PLATFORM_CTR
581        if ( ( pivot == PIVOT_NONE ) || ( pivot == PIVOT_UPSIDE_TO_LEFT ) )
582        #else
583        if ( ( pivot == PIVOT_NONE ) || ( pivot == PIVOT_UPSIDE_TO_TOP ) )
584        #endif
585        {
586            return pOut;
587        }
588
589        #ifdef NN_PLATFORM_CTR
590        if ( pivot == PIVOT_UPSIDE_TO_RIGHT )
591        #else
592        if ( pivot == PIVOT_UPSIDE_TO_BOTTOM )
593        #endif
594        {
595            register f32 m00, m01, m02, m03, m10, m11, m12, m13;
596
597            m00 = -m[0][0];
598            m01 = -m[0][1];
599            m02 = -m[0][2];
600            m03 = -m[0][3];
601
602            m10 = -m[1][0];
603            m11 = -m[1][1];
604            m12 = -m[1][2];
605            m13 = -m[1][3];
606
607            m[0][0] = m00;
608            m[0][1] = m01;
609            m[0][2] = m02;
610            m[0][3] = m03;
611
612            m[1][0] = m10;
613            m[1][1] = m11;
614            m[1][2] = m12;
615            m[1][3] = m13;
616        }
617        #ifdef NN_PLATFORM_CTR
618        else if ( pivot == PIVOT_UPSIDE_TO_BOTTOM )
619        #else
620        else if ( pivot == PIVOT_UPSIDE_TO_LEFT )
621        #endif
622        {
623            register f32 m00, m01, m02, m03, m10, m11, m12, m13;
624
625            m10 = m[0][0];
626            m11 = m[0][1];
627            m12 = m[0][2];
628            m13 = m[0][3];
629
630            m00 = -m[1][0];
631            m01 = -m[1][1];
632            m02 = -m[1][2];
633            m03 = -m[1][3];
634
635            m[0][0] = m00;
636            m[0][1] = m01;
637            m[0][2] = m02;
638            m[0][3] = m03;
639
640            m[1][0] = m10;
641            m[1][1] = m11;
642            m[1][2] = m12;
643            m[1][3] = m13;
644        }
645        #ifdef NN_PLATFORM_CTR
646        else // if ( pivot == PIVOT_UPSIDE_TO_TOP )
647        #else
648        else // if ( pivot == PIVOT_UPSIDE_TO_RIGHT )
649        #endif
650        {
651            register f32 m00, m01, m02, m03, m10, m11, m12, m13;
652
653            m10 = -m[0][0];
654            m11 = -m[0][1];
655            m12 = -m[0][2];
656            m13 = -m[0][3];
657
658            m00 = m[1][0];
659            m01 = m[1][1];
660            m02 = m[1][2];
661            m03 = m[1][3];
662
663            m[0][0] = m00;
664            m[0][1] = m01;
665            m[0][2] = m02;
666            m[0][3] = m03;
667
668            m[1][0] = m10;
669            m[1][1] = m11;
670            m[1][2] = m12;
671            m[1][3] = m13;
672        }
673        return pOut;
674    }
675
676
677/*
678
679
680
681
682
683
684
685*/
686NN_MATH_INLINE MTX44*
687MTX44MultC(MTX44* pOut, const MTX44* __restrict p1, const MTX44* __restrict p2)
688{
689    NN_NULL_ASSERT( pOut );
690    NN_NULL_ASSERT( p1 );
691    NN_NULL_ASSERT( p2 );
692
693    MTX44 mTmp;
694
695    MTX44* __restrict pDst = ( pOut == p1 || pOut == p2 ) ? &mTmp : pOut;
696
697    pDst->f._00 = p1->f._00 * p2->f._00 + p1->f._01 * p2->f._10 + p1->f._02 * p2->f._20 + p1->f._03 * p2->f._30;
698    pDst->f._01 = p1->f._00 * p2->f._01 + p1->f._01 * p2->f._11 + p1->f._02 * p2->f._21 + p1->f._03 * p2->f._31;
699    pDst->f._02 = p1->f._00 * p2->f._02 + p1->f._01 * p2->f._12 + p1->f._02 * p2->f._22 + p1->f._03 * p2->f._32;
700    pDst->f._03 = p1->f._00 * p2->f._03 + p1->f._01 * p2->f._13 + p1->f._02 * p2->f._23 + p1->f._03 * p2->f._33;
701
702    pDst->f._10 = p1->f._10 * p2->f._00 + p1->f._11 * p2->f._10 + p1->f._12 * p2->f._20 + p1->f._13 * p2->f._30;
703    pDst->f._11 = p1->f._10 * p2->f._01 + p1->f._11 * p2->f._11 + p1->f._12 * p2->f._21 + p1->f._13 * p2->f._31;
704    pDst->f._12 = p1->f._10 * p2->f._02 + p1->f._11 * p2->f._12 + p1->f._12 * p2->f._22 + p1->f._13 * p2->f._32;
705    pDst->f._13 = p1->f._10 * p2->f._03 + p1->f._11 * p2->f._13 + p1->f._12 * p2->f._23 + p1->f._13 * p2->f._33;
706
707    pDst->f._20 = p1->f._20 * p2->f._00 + p1->f._21 * p2->f._10 + p1->f._22 * p2->f._20 + p1->f._23 * p2->f._30;
708    pDst->f._21 = p1->f._20 * p2->f._01 + p1->f._21 * p2->f._11 + p1->f._22 * p2->f._21 + p1->f._23 * p2->f._31;
709    pDst->f._22 = p1->f._20 * p2->f._02 + p1->f._21 * p2->f._12 + p1->f._22 * p2->f._22 + p1->f._23 * p2->f._32;
710    pDst->f._23 = p1->f._20 * p2->f._03 + p1->f._21 * p2->f._13 + p1->f._22 * p2->f._23 + p1->f._23 * p2->f._33;
711
712    pDst->f._30 = p1->f._30 * p2->f._00 + p1->f._31 * p2->f._10 + p1->f._32 * p2->f._20 + p1->f._33 * p2->f._30;
713    pDst->f._31 = p1->f._30 * p2->f._01 + p1->f._31 * p2->f._11 + p1->f._32 * p2->f._21 + p1->f._33 * p2->f._31;
714    pDst->f._32 = p1->f._30 * p2->f._02 + p1->f._31 * p2->f._12 + p1->f._32 * p2->f._22 + p1->f._33 * p2->f._32;
715    pDst->f._33 = p1->f._30 * p2->f._03 + p1->f._31 * p2->f._13 + p1->f._32 * p2->f._23 + p1->f._33 * p2->f._33;
716
717    if ( pDst != pOut )
718    {
719        MTX44Copy( pOut, pDst );
720    }
721
722    return pOut;
723}
724
725namespace {
726    inline void SwapF(f32 &a, f32 &b)
727    {
728        f32 tmp;
729        tmp = a;
730        a = b;
731        b = tmp;
732    }
733} // namespace (unnamed)
734
735
736/*
737
738
739
740
741
742
743*/
744NN_MATH_INLINE u32
745MTX44InverseC(MTX44* pOut, const MTX44* p)
746{
747    MTX44 mTmp;
748    f32 (*src)[4];
749    f32 (*inv)[4];
750    f32   w;
751
752    NN_NULL_ASSERT( p );
753    NN_NULL_ASSERT( pOut );
754
755    MTX44Copy(&mTmp, p);
756    MTX44Identity(pOut);
757
758    src = mTmp.m;
759    inv = pOut->m;
760
761    for (int i = 0; i < 4; ++i)
762    {
763        f32 max = 0.0f;
764        s32 swp = i;
765
766        // ---- partial pivoting -----
767        for(int k = i ; k < 4 ; k++ )
768        {
769            f32 ftmp;
770            ftmp = ::std::fabs(src[k][i]);
771            if ( ftmp > max )
772            {
773                max = ftmp;
774                swp = k;
775            }
776        }
777
778        // check singular matrix
779        //(or can't solve inverse matrix with this algorithm)
780        if ( max == 0.0f )
781        {
782            return 0;
783        }
784
785        // swap row
786        if ( swp != i )
787        {
788            for (int k = 0; k < 4; k++)
789            {
790                SwapF(src[i][k], src[swp][k]);
791                SwapF(inv[i][k], inv[swp][k]);
792            }
793        }
794
795        // ---- pivoting end ----
796
797        w = 1.0f / src[i][i];
798        for (int j = 0; j < 4; ++j)
799        {
800            src[i][j] *= w;
801            inv[i][j] *= w;
802        }
803
804        for (int k = 0; k < 4; ++k )
805        {
806            if ( k == i )
807                continue;
808
809            w = src[k][i];
810            for (int j = 0; j < 4; ++j)
811            {
812                src[k][j] -= src[i][j] * w;
813                inv[k][j] -= inv[i][j] * w;
814            }
815        }
816    }
817
818    return 1;
819}
820NN_MATH_INLINE u32
821MTX44InverseC_FAST_ALGO(MTX44* pOut, const MTX44* p)
822{
823    const f32 (*src)[4];
824    f32 (*inv)[4];
825
826    NN_NULL_ASSERT( p );
827    NN_NULL_ASSERT( pOut );
828
829    src = p->m;
830    inv = pOut->m;
831
832    f32 a11, a12, a13, a14, a21, a22, a23, a24, a31, a32, a33, a34, a41, a42, a43, a44;
833    f32 b11, b12, b13, b14, b21, b22, b23, b24, b31, b32, b33, b34, b41, b42, b43, b44;
834    f32 det;
835
836    a11 = src[0][0];
837    a12 = src[0][1];
838    a13 = src[0][2];
839    a14 = src[0][3];
840
841    a21 = src[1][0];
842    a22 = src[1][1];
843    a23 = src[1][2];
844    a24 = src[1][3];
845
846    a31 = src[2][0];
847    a32 = src[2][1];
848    a33 = src[2][2];
849    a34 = src[2][3];
850
851    a41 = src[3][0];
852    a42 = src[3][1];
853    a43 = src[3][2];
854    a44 = src[3][3];
855
856    det = a11*(a22*a33*a44 + a23*a34*a42 + a24*a32*a43)
857        + a12*(a21*a34*a43 + a23*a31*a44 + a24*a33*a41)
858        + a13*(a21*a32*a44 + a22*a34*a41 + a24*a31*a42)
859        + a14*(a21*a33*a42 + a22*a31*a43 + a23*a32*a41)
860        - a11*(a22*a34*a43 + a23*a32*a44 + a24*a33*a42)
861        - a12*(a21*a33*a44 + a23*a34*a41 + a24*a31*a43)
862        - a13*(a21*a34*a42 + a22*a31*a44 + a24*a32*a41)
863        - a14*(a21*a32*a43 + a22*a33*a41 + a23*a31*a42);
864
865    if(det==0.0f)
866        return 0;
867
868    det = 1.0f / det;
869
870    f32 a33xa44_a34xa43, a32xa44_a34xa42, a33xa42_a32xa43,
871        a33xa41_a31xa43, a31xa44_a34xa41, a32xa41_a31xa42;
872
873    a33xa44_a34xa43 = a33*a44 - a34*a43;
874    a32xa44_a34xa42 = a32*a44 - a34*a42;
875    a33xa42_a32xa43 = a33*a42 - a32*a43;
876    a33xa41_a31xa43 = a33*a41 - a31*a43;
877    a31xa44_a34xa41 = a31*a44 - a34*a41;
878    a32xa41_a31xa42 = a32*a41 - a31*a42;
879
880    f32 a23xa44_a24xa43, a24xa33_a23xa34, a24xa42_a22xa44, a22xa43_a23xa42,
881        a22xa34_a24xa32, a23xa32_a22xa33, a21xa44_a24xa41, a23xa41_a21xa43,
882        a24xa31_a21xa34, a21xa33_a23xa31, a21xa42_a22xa41, a22xa31_a21xa32;
883
884    a23xa44_a24xa43 = a23*a44 - a24*a43;
885    a24xa33_a23xa34 = a24*a33 - a23*a34;
886    a24xa42_a22xa44 = a24*a42 - a22*a44;
887    a22xa43_a23xa42 = a22*a43 - a23*a42;
888    a22xa34_a24xa32 = a22*a34 - a24*a32;
889    a23xa32_a22xa33 = a23*a32 - a22*a33;
890    a21xa44_a24xa41 = a21*a44 - a24*a41;
891    a23xa41_a21xa43 = a23*a41 - a21*a43;
892    a24xa31_a21xa34 = a24*a31 - a21*a34;
893    a21xa33_a23xa31 = a21*a33 - a23*a31;
894    a21xa42_a22xa41 = a21*a42 - a22*a41;
895    a22xa31_a21xa32 = a22*a31 - a21*a32;
896
897    b11 =( a22*a33xa44_a34xa43) - (a23*a32xa44_a34xa42) - (a24*a33xa42_a32xa43);
898    b12 =( a13*a32xa44_a34xa42) + (a14*a33xa42_a32xa43) - (a12*a33xa44_a34xa43);
899    b13 =( a12*a23xa44_a24xa43) + (a13*a24xa42_a22xa44) + (a14*a22xa43_a23xa42);
900    b14 =( a12*a24xa33_a23xa34) + (a13*a22xa34_a24xa32) + (a14*a23xa32_a22xa33);
901    b21 =( a23*a31xa44_a34xa41) + (a24*a33xa41_a31xa43) - (a21*a33xa44_a34xa43);
902    b22 =( a11*a33xa44_a34xa43) - (a13*a31xa44_a34xa41) - (a14*a33xa41_a31xa43);
903    b23 =( a13*a21xa44_a24xa41) + (a14*a23xa41_a21xa43) - (a11*a23xa44_a24xa43);
904    b24 =( a13*a24xa31_a21xa34) + (a14*a21xa33_a23xa31) - (a11*a24xa33_a23xa34);
905    b31 =( a21*a32xa44_a34xa42) - (a22*a31xa44_a34xa41) - (a24*a32xa41_a31xa42);
906    b32 =( a12*a31xa44_a34xa41) + (a14*a32xa41_a31xa42) - (a11*a32xa44_a34xa42);
907    b33 =( a14*a21xa42_a22xa41) - (a11*a24xa42_a22xa44) - (a12*a21xa44_a24xa41);
908    b34 =( a14*a22xa31_a21xa32) - (a11*a22xa34_a24xa32) - (a12*a24xa31_a21xa34);
909    b41 =( a21*a33xa42_a32xa43) - (a22*a33xa41_a31xa43) + (a23*a32xa41_a31xa42);
910    b42 =( a12*a33xa41_a31xa43) - (a13*a32xa41_a31xa42) - (a11*a33xa42_a32xa43);
911    b43 =(-a13*a21xa42_a22xa41) - (a11*a22xa43_a23xa42) - (a12*a23xa41_a21xa43);
912    b44 =(-a13*a22xa31_a21xa32) - (a11*a23xa32_a22xa33) - (a12*a21xa33_a23xa31);
913
914    b11 = b11 * det;
915    b12 = b12 * det;
916    b13 = b13 * det;
917    b14 = b14 * det;
918    b21 = b21 * det;
919    b22 = b22 * det;
920    b23 = b23 * det;
921    b24 = b24 * det;
922    b31 = b31 * det;
923    b32 = b32 * det;
924    b33 = b33 * det;
925    b34 = b34 * det;
926    b41 = b41 * det;
927    b42 = b42 * det;
928    b43 = b43 * det;
929    b44 = b44 * det;
930
931    inv[0][0] = b11;
932    inv[0][1] = b12;
933    inv[0][2] = b13;
934    inv[0][3] = b14;
935
936    inv[1][0] = b21;
937    inv[1][1] = b22;
938    inv[1][2] = b23;
939    inv[1][3] = b24;
940
941    inv[2][0] = b31;
942    inv[2][1] = b32;
943    inv[2][2] = b33;
944    inv[2][3] = b34;
945
946    inv[3][0] = b41;
947    inv[3][1] = b42;
948    inv[3][2] = b43;
949    inv[3][3] = b44;
950
951    return 1;
952}
953
954
955/*
956
957
958
959
960
961
962
963*/
964NN_MATH_INLINE MTX44*
965MTX44RotAxisRad_C( MTX44* pOut, const VEC3 *pAxis, f32 fRad )
966{
967    VEC3 vN;
968    f32 s, c;             // sinTheta, cosTheta
969    f32 t;                // ( 1 - cosTheta )
970    f32 x, y, z;          // x, y, z components of normalized axis
971    f32 xSq, ySq, zSq;    // x, y, z squared
972
973
974    NN_NULL_ASSERT( pOut );
975    NN_NULL_ASSERT( pAxis );
976
977    f32 (*const m)[4] = pOut->m;
978
979    s = ::std::sinf(fRad);
980    c = ::std::cosf(fRad);
981    t = 1.0f - c;
982
983    VEC3Normalize( &vN, pAxis );
984
985    x = vN.x;
986    y = vN.y;
987    z = vN.z;
988
989    xSq = x * x;
990    ySq = y * y;
991    zSq = z * z;
992
993    m[0][0] = ( t * xSq )   + ( c );
994    m[0][1] = ( t * x * y ) - ( s * z );
995    m[0][2] = ( t * x * z ) + ( s * y );
996    m[0][3] = 0.0f;
997
998    m[1][0] = ( t * x * y ) + ( s * z );
999    m[1][1] = ( t * ySq )   + ( c );
1000    m[1][2] = ( t * y * z ) - ( s * x );
1001    m[1][3] = 0.0f;
1002
1003    m[2][0] = ( t * x * z ) - ( s * y );
1004    m[2][1] = ( t * y * z ) + ( s * x );
1005    m[2][2] = ( t * zSq )   + ( c );
1006    m[2][3] = 0.0f;
1007
1008    m[3][0] = 0.0f;
1009    m[3][1] = 0.0f;
1010    m[3][2] = 0.0f;
1011    m[3][3] = 1.0f;
1012
1013    return pOut;
1014}
1015NN_MATH_INLINE MTX44*
1016MTX44RotAxisRad_C_FAST( MTX44* pOut, const VEC3 *pAxis, f32 fRad )
1017{
1018    VEC3 vN;
1019    f32 s, c;             // sinTheta, cosTheta
1020    f32 t;                // ( 1 - cosTheta )
1021    f32 x, y, z;          // x, y, z components of normalized axis
1022    f32 xSq, ySq, zSq;    // x, y, z squared
1023    f32 m00, m01, m02, m10, m11, m12, m20, m21, m22;
1024
1025
1026    NN_NULL_ASSERT( pOut );
1027    NN_NULL_ASSERT( pAxis );
1028
1029    f32 (*const m)[4] = pOut->m;
1030
1031#if (MTX44ROTAXISRAD__CONFIG == D_FAST_C_ALGO)
1032    SinCosFIdx(&s, &c, NN_MATH_RAD_TO_FIDX(fRad));
1033#else
1034    s = ::std::sinf(fRad);
1035    c = ::std::cosf(fRad);
1036#endif
1037    t = 1.0f - c;
1038
1039    VEC3Normalize( &vN, pAxis );
1040
1041    x = vN.x;
1042    y = vN.y;
1043    z = vN.z;
1044
1045    xSq = x * x;
1046    ySq = y * y;
1047    zSq = z * z;
1048
1049    m00 = ( t * xSq )   + ( c );
1050    m01 = ( t * x * y ) - ( s * z );
1051    m02 = ( t * x * z ) + ( s * y );
1052
1053    m10 = ( t * x * y ) + ( s * z );
1054    m11 = ( t * ySq )   + ( c );
1055    m12 = ( t * y * z ) - ( s * x );
1056
1057    m20 = ( t * x * z ) - ( s * y );
1058    m21 = ( t * y * z ) + ( s * x );
1059    m22 = ( t * zSq )   + ( c );
1060
1061
1062    m[0][0] = m00;
1063    m[0][1] = m01;
1064    m[0][2] = m02;
1065    m[0][3] = 0.0f;
1066
1067    m[1][0] = m10;
1068    m[1][1] = m11;
1069    m[1][2] = m12;
1070    m[1][3] = 0.0f;
1071
1072    m[2][0] = m20;
1073    m[2][1] = m21;
1074    m[2][2] = m22;
1075    m[2][3] = 0.0f;
1076
1077    m[3][0] = 0.0f;
1078    m[3][1] = 0.0f;
1079    m[3][2] = 0.0f;
1080    m[3][3] = 1.0f;
1081
1082    return pOut;
1083}
1084
1085
1086/*
1087
1088
1089
1090
1091
1092
1093
1094
1095*/
1096NN_MATH_INLINE MTX44*
1097MTX44RotXYZFIdxC(MTX44* pOut, f32 fIdxX, f32 fIdxY, f32 fIdxZ)
1098{
1099    NN_FLOAT_ASSERT(fIdxX);
1100    NN_FLOAT_ASSERT(fIdxY);
1101    NN_FLOAT_ASSERT(fIdxZ);
1102
1103    f32 sinx, cosx;
1104    f32 siny, cosy;
1105    f32 sinz, cosz;
1106    f32 f1, f2;
1107
1108    SinCosFIdx(&sinx, &cosx, fIdxX);
1109    SinCosFIdx(&siny, &cosy, fIdxY);
1110    SinCosFIdx(&sinz, &cosz, fIdxZ);
1111
1112    pOut->f._20 = -siny;
1113    pOut->f._00 = cosz * cosy;
1114    pOut->f._10 = sinz * cosy;
1115    pOut->f._21 = cosy * sinx;
1116    pOut->f._22 = cosy * cosx;
1117
1118    f1 = cosx * sinz;
1119    f2 = sinx * cosz;
1120
1121    pOut->f._01 = f2 * siny - f1;
1122    pOut->f._12 = f1 * siny - f2;
1123
1124    f1 = sinx * sinz;
1125    f2 = cosx * cosz;
1126    pOut->f._02 = f2 * siny + f1;
1127    pOut->f._11 = f1 * siny + f2;
1128
1129    pOut->f._03 = 0.f;
1130    pOut->f._13 = 0.f;
1131    pOut->f._23 = 0.f;
1132
1133    pOut->f._30 = 0.0f;
1134    pOut->f._31 = 0.0f;
1135    pOut->f._32 = 0.0f;
1136    pOut->f._33 = 1.0f;
1137
1138    return pOut;
1139}
1140NN_MATH_INLINE MTX44*
1141MTX44RotXYZFIdxC_FAST(MTX44* pOut, f32 fIdxX, f32 fIdxY, f32 fIdxZ)
1142{
1143    NN_FLOAT_ASSERT(fIdxX);
1144    NN_FLOAT_ASSERT(fIdxY);
1145    NN_FLOAT_ASSERT(fIdxZ);
1146
1147    f32 sinx, cosx;
1148    f32 siny, cosy;
1149    f32 sinz, cosz;
1150    f32 f1, f2, f3, f4;
1151    f32 f00, f10, f21, f22;
1152    f32 f01, f11, f02, f12;
1153
1154
1155
1156    {
1157
1158        u16 idxx;
1159        f32 abs_fidxx;
1160        f32 rx;
1161
1162        u16 idxy;
1163        f32 abs_fidxy;
1164        f32 ry;
1165
1166        u16 idxz;
1167        f32 abs_fidxz;
1168        f32 rz;
1169
1170        int negx, negy, negz;
1171
1172        negx = (fIdxX < 0.0f) ? 1 : 0;
1173        abs_fidxx = FAbs(fIdxX);
1174        negy = (fIdxY < 0.0f) ? 1 : 0;
1175        abs_fidxy = FAbs(fIdxY);
1176        negz = (fIdxZ < 0.0f) ? 1 : 0;
1177        abs_fidxz = FAbs(fIdxZ);
1178
1179
1180        while ( abs_fidxx >= 65536.0f )
1181        {
1182            abs_fidxx -= 65536.0f;
1183        }
1184        while ( abs_fidxy >= 65536.0f )
1185        {
1186            abs_fidxy -= 65536.0f;
1187        }
1188        while ( abs_fidxz >= 65536.0f )
1189        {
1190            abs_fidxz -= 65536.0f;
1191        }
1192
1193        idxx = F32ToU16(abs_fidxx);
1194        idxy = F32ToU16(abs_fidxy);
1195        idxz = F32ToU16(abs_fidxz);
1196        {
1197            f32 idxxf, idxyf, idxzf;
1198
1199            idxxf = U16ToF32(idxx);
1200            idxyf = U16ToF32(idxy);
1201            idxzf = U16ToF32(idxz);
1202
1203            rx = abs_fidxx - idxxf;
1204            ry = abs_fidxy - idxyf;
1205            rz = abs_fidxz - idxzf;
1206
1207
1208        }
1209
1210        idxx &= 0xff;
1211        idxy &= 0xff;
1212        idxz &= 0xff;
1213
1214        {
1215            f32 sinx_val, sinx_delta, cosx_val, cosx_delta;
1216            f32 siny_val, siny_delta, cosy_val, cosy_delta;
1217            f32 sinz_val, sinz_delta, cosz_val, cosz_delta;
1218
1219            sinx_val = internal::gSinCosTbl[idxx].sin_val;
1220            cosx_val = internal::gSinCosTbl[idxx].cos_val;
1221            sinx_delta = internal::gSinCosTbl[idxx].sin_delta;
1222            cosx_delta = internal::gSinCosTbl[idxx].cos_delta;
1223
1224            sinx = sinx_val + rx * sinx_delta;
1225            cosx = cosx_val + rx * cosx_delta;
1226
1227            siny_val = internal::gSinCosTbl[idxy].sin_val;
1228            cosy_val = internal::gSinCosTbl[idxy].cos_val;
1229            siny_delta = internal::gSinCosTbl[idxy].sin_delta;
1230            cosy_delta = internal::gSinCosTbl[idxy].cos_delta;
1231
1232            siny = siny_val + ry * siny_delta;
1233            cosy = cosy_val + ry * cosy_delta;
1234
1235            sinz_val = internal::gSinCosTbl[idxz].sin_val;
1236            cosz_val = internal::gSinCosTbl[idxz].cos_val;
1237            sinz_delta = internal::gSinCosTbl[idxz].sin_delta;
1238            cosz_delta = internal::gSinCosTbl[idxz].cos_delta;
1239
1240            sinz = sinz_val + rz * sinz_delta;
1241            cosz = cosz_val + rz * cosz_delta;
1242
1243        }
1244
1245        sinx = (negx) ? -sinx : sinx;
1246        siny = (negy) ? -siny : siny;
1247        sinz = (negz) ? -sinz : sinz;
1248
1249    }
1250
1251    f00 = cosz * cosy;
1252    f10 = sinz * cosy;
1253    f21 = sinx * cosy;
1254    f22 = cosx * cosy;
1255
1256    f1 = cosx * sinz;
1257    f2 = sinx * cosz;
1258
1259    f01 = f2 * siny - f1;
1260    f12 = f1 * siny - f2;
1261
1262    f3 = sinx * sinz;
1263    f4 = cosx * cosz;
1264
1265    f02 = f4 * siny + f3;
1266    f11 = f3 * siny + f4;
1267
1268    pOut->f._00 = f00;
1269    pOut->f._10 = f10;
1270    pOut->f._21 = f21;
1271    pOut->f._22 = f22;
1272
1273    pOut->f._01 = f01;
1274    pOut->f._12 = f12;
1275    pOut->f._02 = f02;
1276    pOut->f._11 = f11;
1277    pOut->f._20 = -siny;
1278
1279    *(unsigned int*)&pOut->f._03 = 0x00000000;
1280    *(unsigned int*)&pOut->f._13 = 0x00000000;
1281    *(unsigned int*)&pOut->f._23 = 0x00000000;
1282
1283    *(unsigned int*)&pOut->f._30 = 0x00000000;
1284    *(unsigned int*)&pOut->f._31 = 0x00000000;
1285    *(unsigned int*)&pOut->f._32 = 0x00000000;
1286    *(unsigned int*)&pOut->f._33 = 0x3F800000;
1287
1288    return pOut;
1289}
1290
1291/*
1292
1293
1294
1295
1296
1297
1298*/
1299NN_MATH_INLINE MTX44*
1300MTX44ScaleC(MTX44* pOut, const VEC3* pS)
1301{
1302    NN_NULL_ASSERT( pOut  );
1303    NN_NULL_ASSERT( pS  );
1304
1305    f32 (*const m)[4] = pOut->m;
1306
1307    m[0][0] = pS->x;    m[0][1] = 0.0f;  m[0][2] = 0.0f;  m[0][3] = 0.0f;
1308    m[1][0] = 0.0f;     m[1][1] = pS->y; m[1][2] = 0.0f;  m[1][3] = 0.0f;
1309    m[2][0] = 0.0f;     m[2][1] = 0.0f;  m[2][2] = pS->z; m[2][3] = 0.0f;
1310    m[3][0] = 0.0f;     m[3][1] = 0.0f;  m[3][2] = 0.0f; m[3][3] = 1.0f;
1311
1312    return pOut;
1313}
1314NN_MATH_INLINE MTX44*
1315MTX44ScaleC_FAST(MTX44* pOut, const VEC3* pS)
1316{
1317    NN_NULL_ASSERT( pOut  );
1318    NN_NULL_ASSERT( pS  );
1319
1320    unsigned int *m = reinterpret_cast<unsigned int *>(pOut->m);
1321
1322    f32 f1 = 1.0f;
1323    f32 f0 = 0.0f;
1324    const unsigned int f32_1 = *(reinterpret_cast<unsigned int*>(&f1));
1325    const unsigned int f32_0 = *(reinterpret_cast<unsigned int*>(&f0));
1326    const unsigned int *p = reinterpret_cast<const unsigned int*>(pS);
1327
1328    m[ 0] =  p[0];  m[ 1] = f32_0;  m[ 2] = f32_0;  m[ 3] = f32_0;
1329    m[ 4] = f32_0;  m[ 5] =  p[1];  m[ 6] = f32_0;  m[ 7] = f32_0;
1330    m[ 8] = f32_0;  m[ 9] = f32_0;  m[10] =  p[2];  m[11] = f32_0;
1331    m[12] = f32_0;  m[13] = f32_0;  m[14] = f32_0;  m[15] = f32_1;
1332
1333    return pOut;
1334}
1335
1336
1337/*
1338
1339
1340
1341
1342
1343
1344
1345*/
1346NN_MATH_INLINE MTX44*
1347MTX44MultScaleC(MTX44* pOut, const MTX44* pM, const VEC3* pS)
1348{
1349    // Version where the scale matrix is applied from the right.
1350    pOut->f._00 = pM->f._00 * pS->x;
1351    pOut->f._10 = pM->f._10 * pS->x;
1352    pOut->f._20 = pM->f._20 * pS->x;
1353
1354    pOut->f._01 = pM->f._01 * pS->y;
1355    pOut->f._11 = pM->f._11 * pS->y;
1356    pOut->f._21 = pM->f._21 * pS->y;
1357
1358    pOut->f._02 = pM->f._02 * pS->z;
1359    pOut->f._12 = pM->f._12 * pS->z;
1360    pOut->f._22 = pM->f._22 * pS->z;
1361
1362    if (pOut != pM)
1363    {
1364        pOut->f._03 = pM->f._03;
1365        pOut->f._13 = pM->f._13;
1366        pOut->f._23 = pM->f._23;
1367    }
1368
1369    return pOut;
1370}
1371
1372
1373/*
1374
1375
1376
1377
1378
1379
1380
1381*/
1382NN_MATH_INLINE MTX44*
1383MTX44MultScaleC(MTX44* pOut, const VEC3* pS, const MTX44* pM)
1384{
1385    NN_NULL_ASSERT( pOut  );
1386    NN_NULL_ASSERT( pS  );
1387    NN_NULL_ASSERT( pM  );
1388
1389    const f32 (*const src)[4] = pM->m;
1390    f32 (*const dst)[4] = pOut->m;
1391
1392    dst[0][0] = src[0][0] * pS->x;     dst[0][1] = src[0][1] * pS->x;
1393    dst[0][2] = src[0][2] * pS->x;     dst[0][3] = src[0][3] * pS->x;
1394
1395    dst[1][0] = src[1][0] * pS->y;     dst[1][1] = src[1][1] * pS->y;
1396    dst[1][2] = src[1][2] * pS->y;     dst[1][3] = src[1][3] * pS->y;
1397
1398    dst[2][0] = src[2][0] * pS->z;     dst[2][1] = src[2][1] * pS->z;
1399    dst[2][2] = src[2][2] * pS->z;     dst[2][3] = src[2][3] * pS->z;
1400
1401    return pOut;
1402}
1403
1404
1405/*
1406
1407
1408
1409
1410
1411
1412*/
1413NN_MATH_INLINE MTX44*
1414MTX44TranslateC(MTX44* pOut, const VEC3* pT)
1415{
1416    NN_NULL_ASSERT( pOut );
1417    NN_NULL_ASSERT( pT );
1418
1419    f32 (*const m)[4] = pOut->m;
1420
1421    m[0][0] = 1.0f;  m[0][1] = 0.0f;  m[0][2] = 0.0f;  m[0][3] = pT->x;
1422    m[1][0] = 0.0f;  m[1][1] = 1.0f;  m[1][2] = 0.0f;  m[1][3] = pT->y;
1423    m[2][0] = 0.0f;  m[2][1] = 0.0f;  m[2][2] = 1.0f;  m[2][3] = pT->z;
1424    m[3][0] = 0.0f;  m[3][1] = 0.0f;  m[3][2] = 0.0f;  m[3][3] = 1.0f;
1425
1426    return pOut;
1427}
1428NN_MATH_INLINE MTX44*
1429MTX44TranslateC_FAST(MTX44* pOut, const VEC3* pT)
1430{
1431    NN_NULL_ASSERT( pOut );
1432    NN_NULL_ASSERT( pT );
1433
1434    unsigned int *m = reinterpret_cast<unsigned int *>(pOut->m);
1435
1436    f32 f1 = 1.0f;
1437    f32 f0 = 0.0f;
1438    const unsigned int f32_1 = *(reinterpret_cast<unsigned int*>(&f1));
1439    const unsigned int f32_0 = *(reinterpret_cast<unsigned int*>(&f0));
1440    const unsigned int *p = reinterpret_cast<const unsigned int*>(pT);
1441
1442    m[ 0] = f32_1;  m[ 1] = f32_0;  m[ 2] = f32_0;  m[ 3] = p[0];
1443    m[ 4] = f32_0;  m[ 5] = f32_1;  m[ 6] = f32_0;  m[ 7] = p[1];
1444    m[ 8] = f32_0;  m[ 9] = f32_0;  m[10] = f32_1;  m[11] = p[2];
1445    m[12] = f32_0;  m[13] = f32_0;  m[14] = f32_0;  m[15] = f32_1;
1446
1447    return pOut;
1448}
1449
1450
1451/*
1452
1453
1454
1455
1456
1457
1458
1459*/
1460NN_MATH_INLINE MTX44*
1461MTX44MultTranslateC(MTX44* pOut, const VEC3* pT, const MTX44* pM)
1462{
1463    NN_NULL_ASSERT(pOut);
1464    NN_NULL_ASSERT(pT);
1465    NN_NULL_ASSERT(pM);
1466
1467    const f32 (*const src)[4] = pM->m;
1468    f32 (*const dst)[4] = pOut->m;
1469
1470    if ( src != dst )
1471    {
1472        dst[0][0] = src[0][0];    dst[0][1] = src[0][1];    dst[0][2] = src[0][2];
1473        dst[1][0] = src[1][0];    dst[1][1] = src[1][1];    dst[1][2] = src[1][2];
1474        dst[2][0] = src[2][0];    dst[2][1] = src[2][1];    dst[2][2] = src[2][2];
1475    }
1476
1477    dst[0][3] = src[0][3] + pT->x;
1478    dst[1][3] = src[1][3] + pT->y;
1479    dst[2][3] = src[2][3] + pT->z;
1480
1481    return pOut;
1482}
1483
1484/*
1485
1486
1487
1488
1489
1490
1491
1492*/
1493NN_MATH_INLINE MTX44*
1494MTX44MultTranslateC(MTX44* pOut, const MTX44* pM, const VEC3* pT)
1495{
1496    NN_NULL_ASSERT(pOut);
1497    NN_NULL_ASSERT(pT);
1498    NN_NULL_ASSERT(pM);
1499
1500    // pOut = pM * pT
1501    if (pOut != pM)
1502    {
1503        (void)MTX44Copy(pOut, pM);
1504    }
1505
1506    VEC4 tmp;
1507    VEC3Transform(&tmp, pM, pT);
1508
1509    pOut->f._03 = tmp.x;
1510    pOut->f._13 = tmp.y;
1511    pOut->f._23 = tmp.z;
1512
1513    return pOut;
1514}
1515
1516/*
1517
1518*/
1519
1520}  // namespace ARMv6
1521}  // namespace math
1522}  // namespace nn
1523