1 /*---------------------------------------------------------------------------*
2   Project:  Horizon
3   File:     math_Matrix43.cpp
4 
5   Copyright (C)2009-2012 Nintendo Co., Ltd.  All rights reserved.
6 
7   These coded instructions, statements, and computer programs contain
8   proprietary information of Nintendo of America Inc. and/or Nintendo
9   Company Ltd., and are protected by Federal copyright law.  They may
10   not be disclosed to third parties or copied or duplicated in any form,
11   in whole or in part, without the prior written consent of Nintendo.
12 
13   $Rev: 46347 $
14  *---------------------------------------------------------------------------*/
15 
16 #include <nn/math.h>
17 
18 #include <nn/math/math_Matrix43.h>
19 #include <nn/math/ARMv6/math_Matrix43.h>
20 
21 #if !defined(NN_MATH_AS_INLINE)
22 #include <nn/math/inline/math_Matrix43.ipp>
23 #include <nn/math/ARMv6/inline/math_Matrix43.ipp>
24 #endif
25 
26 
27 namespace nn {
28 namespace math {
29 namespace ARMv6 {
30 
31 #include <nn/hw/ARM/code32.h>
32 
33 NN_FUNC_ATTR_PRIVATE_SECTION
34 asm MTX43*
MTX43CopyAsm(MTX43 *,const MTX43 *)35 MTX43CopyAsm(MTX43* , const MTX43*)
36 {
37     CMP         r1,r0           // Are p and pOut the same?
38     BXEQ        lr              // If the same, returns without copying
39     VLDMIA      r1,{s0-s11}     // All p are put in the [S0-S11] registers
40     VSTMIA      r0,{s0-s11}     // All pOut are put in the [S0-S11] registers
41     BX          lr
42 }
43 
44 NN_FUNC_ATTR_PRIVATE_SECTION
45 asm MTX43*
MTX43AddAsm(MTX43 *,const MTX43 *,const MTX43 *)46 MTX43AddAsm(MTX43* , const MTX43* , const MTX43* )
47 {
48     VPUSH       {d8-d9}             // Save registers
49     VLDMIA      r2,{s0-s11}         // The entire p2 matrix is put in the [S0-S11] registers
50     VLDMIA      r1!,{s12-s19}       // Matrix p1 is put into the [S12-S19] registers
51 
52     VADD.F32    s0,s12,s0
53     VADD.F32    s1,s13,s1
54     VADD.F32    s2,s14,s2
55     VADD.F32    s3,s15,s3
56     VLDMIA      r1,{s12-s15}        // Continuation of p1
57 
58     VADD.F32    s4,s16,s4
59     VADD.F32    s5,s17,s5
60     VADD.F32    s6,s18,s6
61     VADD.F32    s7,s19,s7
62 
63     VADD.F32    s8,s12,s8
64     VADD.F32    s9,s13,s9
65     VADD.F32    s10,s14,s10
66     VADD.F32    s11,s15,s11
67 
68     VPOP        {d8-d9}             // Register return
69     VSTMIA      r0,{s0-s11}         // Store result
70     BX          lr                  // Return
71 }
72 
73 NN_FUNC_ATTR_PRIVATE_SECTION
74 asm MTX43*
MTX43MultAsm(MTX43 *,const MTX43 *,const MTX43 *)75 MTX43MultAsm(MTX43*, const MTX43*, const MTX43*)
76 {
77     VPUSH       {d8-d15}            // Save registers
78 
79     VLDR.F32    s9,[r2,#3*3*4]      // Matrix p2[3][0]
80     VLDR.F32    s10,[r2,#4+3*3*4]   // Matrix p2[3][1]
81     VLDR.F32    s11,[r2,#8+3*3*4]   // Matrix p2[3][2]
82 
83     VLDMIA      r1,{s12-s23}        // The entire matrix p1 is put into the [S12-S23] registers
84     VLDMIA      r2!,{s24-s29}       // Matrix p2 is put into the [S24-S29] registers
85 
86     VMUL.F32    s0,s12,s24
87     VMUL.F32    s1,s12,s25
88     VMUL.F32    s2,s12,s26
89 
90     VMUL.F32    s3,s15,s24
91     VMUL.F32    s4,s15,s25
92     VMUL.F32    s5,s15,s26
93 
94     VMUL.F32    s6,s18,s24
95     VMUL.F32    s7,s18,s25
96     VMUL.F32    s8,s18,s26
97 
98     VMLA.F32    s9,s21,s24
99     VMLA.F32    s10,s21,s25
100     VMLA.F32    s11,s21,s26
101 
102     VMLA.F32    s0,s13,s27
103     VMLA.F32    s1,s13,s28
104     VMLA.F32    s2,s13,s29
105 
106     VMLA.F32    s3,s16,s27
107     VMLA.F32    s4,s16,s28
108     VMLA.F32    s5,s16,s29
109 
110     VLDMIA      r2,{s24-s26}        // Continuation of p2
111 
112     VMLA.F32    s6,s19,s27
113     VMLA.F32    s7,s19,s28
114     VMLA.F32    s8,s19,s29
115 
116     VMLA.F32    s9,s22,s27
117     VMLA.F32    s10,s22,s28
118     VMLA.F32    s11,s22,s29
119 
120     VMLA.F32    s0,s14,s24
121     VMLA.F32    s1,s14,s25
122     VMLA.F32    s2,s14,s26
123 
124     VMLA.F32    s3,s17,s24
125     VMLA.F32    s4,s17,s25
126     VMLA.F32    s5,s17,s26
127 
128     VMLA.F32    s6,s20,s24
129     VMLA.F32    s7,s20,s25
130     VMLA.F32    s8,s20,s26
131 
132     VMLA.F32    s9,s23,s24
133     VMLA.F32    s10,s23,s25
134     VMLA.F32    s11,s23,s26
135 
136     VPOP        {d8-d15}            // Register return
137     VSTMIA      r0,{s0-s11}         // Store result
138     BX          lr                  // Return
139 
140 }
141 
142 NN_FUNC_ATTR_PRIVATE_SECTION
143 asm MTX43*
MTX43MultAsm(MTX43 *,const MTX43 *,f32)144 MTX43MultAsm(MTX43*, const MTX43*, f32)
145 {
146     VLDMIA      r1,{s1-s12}        // Matrix p is put into the [S1-S12] registers
147 
148     VMUL.F32    s1,s1,s0
149     VMUL.F32    s2,s2,s0
150     VMUL.F32    s3,s3,s0
151     VMUL.F32    s4,s4,s0
152 
153     VMUL.F32    s5,s5,s0
154     VMUL.F32    s6,s6,s0
155     VMUL.F32    s7,s7,s0
156     VMUL.F32    s8,s8,s0
157 
158     VMUL.F32    s9,s9,s0
159     VMUL.F32    s10,s10,s0
160     VMUL.F32    s11,s11,s0
161     VMUL.F32    s12,s12,s0
162 
163     VSTMIA      r0,{s1-s12}         // Store result
164     BX          lr                  // Return
165 
166 }
167 
168 #include <nn/hw/ARM/codereset.h>
169 
170 }  // namespace ARMv6
171 }  // namespace math
172 }  // namespace nn
173